bandripper.bandripper

  1import argparse
  2import json
  3import re
  4import string
  5from dataclasses import dataclass
  6from pathlib import Path
  7from urllib.parse import urlparse
  8
  9import requests
 10import whosyouragent
 11from bs4 import BeautifulSoup
 12from noiftimer import Timer
 13from printbuddies import ProgBar
 14
 15root = Path(__file__).parent
 16
 17
 18def clean_string(text: str) -> str:
 19    """Remove punctuation and trailing spaces from text."""
 20    return re.sub(f"[{re.escape(string.punctuation)}]", "", text).strip()
 21
 22
 23@dataclass
 24class Track:
 25    title: str
 26    number: int
 27    url: str
 28
 29    def __post_init__(self):
 30        self.title = clean_string(self.title)
 31
 32    @property
 33    def numbered_title(self):
 34        num = str(self.number)
 35        if len(num) == 1:
 36            num = "0" + num
 37        return f"{num} - {self.title}"
 38
 39
 40@dataclass
 41class Album:
 42    url: str
 43    artist: str = None
 44    title: str = None
 45    tracks: list[Track] = None
 46    art_url: str = None
 47
 48    def __repr__(self):
 49        return f"{self.title} by {self.artist}"
 50
 51    def __post_init__(self):
 52        response = requests.get(self.url, headers=whosyouragent.get_agent(as_dict=True))
 53        if response.status_code != 200:
 54            raise RuntimeError(
 55                f"Getting album info failed with code {response.status_code}"
 56            )
 57        soup = BeautifulSoup(response.text, "html.parser")
 58        self.art_url = soup.find("meta", attrs={"property": "og:image"}).get("content")
 59        for script in soup.find_all("script"):
 60            if script.get("data-cart"):
 61                data = script
 62                break
 63        data = json.loads(data.attrs["data-tralbum"])
 64        self.artist = clean_string(data["artist"])
 65        self.title = clean_string(data["current"]["title"])
 66        self.tracks = [
 67            Track(track["title"], track["track_num"], track["file"]["mp3-128"])
 68            for track in data["trackinfo"]
 69            if track.get("file")
 70        ]
 71
 72
 73class AlbumRipper:
 74    def __init__(
 75        self, album_url: str, no_track_number: bool = False, overwrite: bool = False
 76    ):
 77        """
 78        :param no_track_number: If True, don't add the track
 79        number to the front of the track title."""
 80        self.album = Album(album_url)
 81        self.no_track_number = no_track_number
 82        self.overwrite = overwrite
 83
 84    def make_save_path(self):
 85        self.save_path = Path.cwd() / self.album.artist / self.album.title
 86        self.save_path.mkdir(parents=True, exist_ok=True)
 87
 88    @property
 89    def headers(self) -> dict:
 90        """Get a headers dict with a random useragent."""
 91        return whosyouragent.get_agent(as_dict=True)
 92
 93    def save_track(self, track_title: str, content: bytes) -> Path:
 94        """Save track to self.save_path/{track_title}.mp3.
 95        Returns the Path object for the save location.
 96
 97        :param content: The binary data of the track."""
 98        file_path = self.save_path / f"{track_title}.mp3"
 99        file_path.write_bytes(content)
100        return file_path
101
102    def get_track_content(self, track_url: str) -> bytes:
103        """Make a request to track_url and return the content.
104        Raises a RunTimeError exception if response.status_code != 200."""
105        response = requests.get(track_url, headers=self.headers)
106        if response.status_code != 200:
107            raise RuntimeError(
108                f"Downloading track failed with status code {response.status_code}."
109            )
110        return response.content
111
112    def download_album_art(self):
113        """Download the album art and save as a .jpg."""
114        file_path = self.save_path / f"{self.album.title}.jpg"
115        try:
116            response = requests.get(self.album.art_url, headers=self.headers)
117            file_path.write_bytes(response.content)
118        except Exception as e:
119            print(f"Failed to download art for {self.album}.")
120            print(e)
121
122    def track_exists(self, track: Track) -> bool:
123        """Return if a track already exists in self.save_path."""
124        path = self.save_path / (
125            track.title if self.no_track_number else track.numbered_title
126        )
127        return path.with_suffix(".mp3").exists()
128
129    def rip(self):
130        """Download and save the album tracks and album art."""
131        if len(self.album.tracks) == 0:
132            print(f"No public tracks available for {self.album}.")
133            return None
134        self.make_save_path()
135        self.download_album_art()
136        num_tracks = len(self.album.tracks)
137        bar = ProgBar(num_tracks, width_ratio=0.5)
138        fails = []
139        if not self.overwrite:
140            self.album.tracks = [
141                track for track in self.album.tracks if not self.track_exists(track)
142            ]
143        for i, track in enumerate(self.album.tracks, 1):
144            bar.display(
145                suffix=f"Downloading track {i}/{num_tracks}: {track.title}",
146                counter_override=1 if len(self.album.tracks) == 1 else None,
147            )
148            try:
149                content = self.get_track_content(track.url)
150                self.save_track(
151                    track.title if self.no_track_number else track.numbered_title,
152                    content,
153                )
154            except Exception as e:
155                fails.append((track, str(e)))
156        print(
157            f"Finished downloading {num_tracks - len(fails)} tracks from {self.album} in {bar.timer.elapsed_str}."
158        )
159        if fails:
160            print("The following tracks failed to download:")
161            for fail in fails:
162                print(f"{fail[0].title}: {fail[1]}")
163
164
165class BandRipper:
166    def __init__(
167        self, band_url: str, no_track_number: bool = False, overwrite: bool = False
168    ):
169        self.band_url = band_url
170        self.albums = []
171        for url in self.get_album_urls(band_url):
172            try:
173                self.albums.append(AlbumRipper(url, no_track_number, overwrite))
174            except Exception as e:
175                print(e)
176
177    def get_album_urls(self, band_url: str) -> list[str]:
178        """Get album urls from the main bandcamp url."""
179        print(f"Fetching discography from {band_url}...")
180        response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True))
181        if response.status_code != 200:
182            raise RuntimeError(
183                f"Getting {band_url} failed with status code {response.status_code}."
184            )
185        soup = BeautifulSoup(response.text, "html.parser")
186        grid = soup.find("ol", attrs={"id": "music-grid"})
187        parsed_url = urlparse(band_url)
188        base_url = f"https://{parsed_url.netloc}"
189        return [base_url + album.a.get("href") for album in grid.find_all("li")]
190
191    def rip(self):
192        print(
193            f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}."
194        )
195        timer = Timer(subsecond_resolution=True)
196        timer.start()
197        fails = []
198        for album in self.albums:
199            try:
200                album.rip()
201            except Exception as e:
202                fails.append((album, e))
203        timer.stop()
204        artist = self.albums[0].album.artist
205        print(
206            f"Finished downloading {len(self.albums)} albums by {artist} in {timer.elapsed_str}."
207        )
208        if fails:
209            print(f"The following downloads failed:")
210            for fail in fails:
211                print(f"{fail[0]}: {fail[1]}")
212
213
214def page_is_discography(url: str) -> bool:
215    """Returns whether the url is to a discography page or not."""
216    response = requests.get(url, headers=whosyouragent.get_agent(as_dict=True))
217    if response.status_code != 200:
218        raise RuntimeError(
219            f"Getting {url} failed with status code {response.status_code}."
220        )
221    soup = BeautifulSoup(response.text, "html.parser")
222    # Returns None if it doesn't exist.
223    grid = soup.find("ol", attrs={"id": "music-grid"})
224    if grid:
225        return True
226    return False
227
228
229def get_args() -> argparse.Namespace:
230    parser = argparse.ArgumentParser()
231
232    parser.add_argument(
233        "urls",
234        type=str,
235        nargs="*",
236        help=""" The bandcamp url(s) for the album or artist.
237            If the url is to an artists main page,
238            all albums will be downloaded.
239            The tracks will be saved to a subdirectory of
240            your current directory.
241            If a track can't be streamed (i.e. private) it
242            won't be downloaded. Multiple urls can be passed.""",
243    )
244
245    parser.add_argument(
246        "-n",
247        "--no_track_number",
248        action="store_true",
249        help=""" By default the track number will be added
250        to the front of the track title. Pass this switch
251        to disable the behavior.""",
252    )
253
254    parser.add_argument(
255        "-o",
256        "--overwrite",
257        action="store_true",
258        help=""" Pass this flag to overwrite existing files.
259        Otherwise don't download tracks that already exist locally.""",
260    )
261
262    args = parser.parse_args()
263    args.urls = [url.strip("/") for url in args.urls]
264
265    return args
266
267
268def main(args: argparse.Namespace = None):
269    if not args:
270        args = get_args()
271    for url in args.urls:
272        if page_is_discography(url):
273            ripper = BandRipper(url, args.no_track_number, args.overwrite)
274        else:
275            ripper = AlbumRipper(url, args.no_track_number, args.overwrite)
276        ripper.rip()
277
278
279if __name__ == "__main__":
280    main(get_args())
def clean_string(text: str) -> str:
19def clean_string(text: str) -> str:
20    """Remove punctuation and trailing spaces from text."""
21    return re.sub(f"[{re.escape(string.punctuation)}]", "", text).strip()

Remove punctuation and trailing spaces from text.

@dataclass
class Track:
24@dataclass
25class Track:
26    title: str
27    number: int
28    url: str
29
30    def __post_init__(self):
31        self.title = clean_string(self.title)
32
33    @property
34    def numbered_title(self):
35        num = str(self.number)
36        if len(num) == 1:
37            num = "0" + num
38        return f"{num} - {self.title}"
Track(title: str, number: int, url: str)
@dataclass
class Album:
41@dataclass
42class Album:
43    url: str
44    artist: str = None
45    title: str = None
46    tracks: list[Track] = None
47    art_url: str = None
48
49    def __repr__(self):
50        return f"{self.title} by {self.artist}"
51
52    def __post_init__(self):
53        response = requests.get(self.url, headers=whosyouragent.get_agent(as_dict=True))
54        if response.status_code != 200:
55            raise RuntimeError(
56                f"Getting album info failed with code {response.status_code}"
57            )
58        soup = BeautifulSoup(response.text, "html.parser")
59        self.art_url = soup.find("meta", attrs={"property": "og:image"}).get("content")
60        for script in soup.find_all("script"):
61            if script.get("data-cart"):
62                data = script
63                break
64        data = json.loads(data.attrs["data-tralbum"])
65        self.artist = clean_string(data["artist"])
66        self.title = clean_string(data["current"]["title"])
67        self.tracks = [
68            Track(track["title"], track["track_num"], track["file"]["mp3-128"])
69            for track in data["trackinfo"]
70            if track.get("file")
71        ]
Album( url: str, artist: str = None, title: str = None, tracks: list[bandripper.bandripper.Track] = None, art_url: str = None)
class AlbumRipper:
 74class AlbumRipper:
 75    def __init__(
 76        self, album_url: str, no_track_number: bool = False, overwrite: bool = False
 77    ):
 78        """
 79        :param no_track_number: If True, don't add the track
 80        number to the front of the track title."""
 81        self.album = Album(album_url)
 82        self.no_track_number = no_track_number
 83        self.overwrite = overwrite
 84
 85    def make_save_path(self):
 86        self.save_path = Path.cwd() / self.album.artist / self.album.title
 87        self.save_path.mkdir(parents=True, exist_ok=True)
 88
 89    @property
 90    def headers(self) -> dict:
 91        """Get a headers dict with a random useragent."""
 92        return whosyouragent.get_agent(as_dict=True)
 93
 94    def save_track(self, track_title: str, content: bytes) -> Path:
 95        """Save track to self.save_path/{track_title}.mp3.
 96        Returns the Path object for the save location.
 97
 98        :param content: The binary data of the track."""
 99        file_path = self.save_path / f"{track_title}.mp3"
100        file_path.write_bytes(content)
101        return file_path
102
103    def get_track_content(self, track_url: str) -> bytes:
104        """Make a request to track_url and return the content.
105        Raises a RunTimeError exception if response.status_code != 200."""
106        response = requests.get(track_url, headers=self.headers)
107        if response.status_code != 200:
108            raise RuntimeError(
109                f"Downloading track failed with status code {response.status_code}."
110            )
111        return response.content
112
113    def download_album_art(self):
114        """Download the album art and save as a .jpg."""
115        file_path = self.save_path / f"{self.album.title}.jpg"
116        try:
117            response = requests.get(self.album.art_url, headers=self.headers)
118            file_path.write_bytes(response.content)
119        except Exception as e:
120            print(f"Failed to download art for {self.album}.")
121            print(e)
122
123    def track_exists(self, track: Track) -> bool:
124        """Return if a track already exists in self.save_path."""
125        path = self.save_path / (
126            track.title if self.no_track_number else track.numbered_title
127        )
128        return path.with_suffix(".mp3").exists()
129
130    def rip(self):
131        """Download and save the album tracks and album art."""
132        if len(self.album.tracks) == 0:
133            print(f"No public tracks available for {self.album}.")
134            return None
135        self.make_save_path()
136        self.download_album_art()
137        num_tracks = len(self.album.tracks)
138        bar = ProgBar(num_tracks, width_ratio=0.5)
139        fails = []
140        if not self.overwrite:
141            self.album.tracks = [
142                track for track in self.album.tracks if not self.track_exists(track)
143            ]
144        for i, track in enumerate(self.album.tracks, 1):
145            bar.display(
146                suffix=f"Downloading track {i}/{num_tracks}: {track.title}",
147                counter_override=1 if len(self.album.tracks) == 1 else None,
148            )
149            try:
150                content = self.get_track_content(track.url)
151                self.save_track(
152                    track.title if self.no_track_number else track.numbered_title,
153                    content,
154                )
155            except Exception as e:
156                fails.append((track, str(e)))
157        print(
158            f"Finished downloading {num_tracks - len(fails)} tracks from {self.album} in {bar.timer.elapsed_str}."
159        )
160        if fails:
161            print("The following tracks failed to download:")
162            for fail in fails:
163                print(f"{fail[0].title}: {fail[1]}")
AlbumRipper( album_url: str, no_track_number: bool = False, overwrite: bool = False)
75    def __init__(
76        self, album_url: str, no_track_number: bool = False, overwrite: bool = False
77    ):
78        """
79        :param no_track_number: If True, don't add the track
80        number to the front of the track title."""
81        self.album = Album(album_url)
82        self.no_track_number = no_track_number
83        self.overwrite = overwrite
Parameters
  • no_track_number: If True, don't add the track number to the front of the track title.
def make_save_path(self):
85    def make_save_path(self):
86        self.save_path = Path.cwd() / self.album.artist / self.album.title
87        self.save_path.mkdir(parents=True, exist_ok=True)
headers: dict

Get a headers dict with a random useragent.

def save_track(self, track_title: str, content: bytes) -> pathlib.Path:
 94    def save_track(self, track_title: str, content: bytes) -> Path:
 95        """Save track to self.save_path/{track_title}.mp3.
 96        Returns the Path object for the save location.
 97
 98        :param content: The binary data of the track."""
 99        file_path = self.save_path / f"{track_title}.mp3"
100        file_path.write_bytes(content)
101        return file_path

Save track to self.save_path/{track_title}.mp3. Returns the Path object for the save location.

Parameters
  • content: The binary data of the track.
def get_track_content(self, track_url: str) -> bytes:
103    def get_track_content(self, track_url: str) -> bytes:
104        """Make a request to track_url and return the content.
105        Raises a RunTimeError exception if response.status_code != 200."""
106        response = requests.get(track_url, headers=self.headers)
107        if response.status_code != 200:
108            raise RuntimeError(
109                f"Downloading track failed with status code {response.status_code}."
110            )
111        return response.content

Make a request to track_url and return the content. Raises a RunTimeError exception if response.status_code != 200.

def download_album_art(self):
113    def download_album_art(self):
114        """Download the album art and save as a .jpg."""
115        file_path = self.save_path / f"{self.album.title}.jpg"
116        try:
117            response = requests.get(self.album.art_url, headers=self.headers)
118            file_path.write_bytes(response.content)
119        except Exception as e:
120            print(f"Failed to download art for {self.album}.")
121            print(e)

Download the album art and save as a .jpg.

def track_exists(self, track: bandripper.bandripper.Track) -> bool:
123    def track_exists(self, track: Track) -> bool:
124        """Return if a track already exists in self.save_path."""
125        path = self.save_path / (
126            track.title if self.no_track_number else track.numbered_title
127        )
128        return path.with_suffix(".mp3").exists()

Return if a track already exists in self.save_path.

def rip(self):
130    def rip(self):
131        """Download and save the album tracks and album art."""
132        if len(self.album.tracks) == 0:
133            print(f"No public tracks available for {self.album}.")
134            return None
135        self.make_save_path()
136        self.download_album_art()
137        num_tracks = len(self.album.tracks)
138        bar = ProgBar(num_tracks, width_ratio=0.5)
139        fails = []
140        if not self.overwrite:
141            self.album.tracks = [
142                track for track in self.album.tracks if not self.track_exists(track)
143            ]
144        for i, track in enumerate(self.album.tracks, 1):
145            bar.display(
146                suffix=f"Downloading track {i}/{num_tracks}: {track.title}",
147                counter_override=1 if len(self.album.tracks) == 1 else None,
148            )
149            try:
150                content = self.get_track_content(track.url)
151                self.save_track(
152                    track.title if self.no_track_number else track.numbered_title,
153                    content,
154                )
155            except Exception as e:
156                fails.append((track, str(e)))
157        print(
158            f"Finished downloading {num_tracks - len(fails)} tracks from {self.album} in {bar.timer.elapsed_str}."
159        )
160        if fails:
161            print("The following tracks failed to download:")
162            for fail in fails:
163                print(f"{fail[0].title}: {fail[1]}")

Download and save the album tracks and album art.

class BandRipper:
166class BandRipper:
167    def __init__(
168        self, band_url: str, no_track_number: bool = False, overwrite: bool = False
169    ):
170        self.band_url = band_url
171        self.albums = []
172        for url in self.get_album_urls(band_url):
173            try:
174                self.albums.append(AlbumRipper(url, no_track_number, overwrite))
175            except Exception as e:
176                print(e)
177
178    def get_album_urls(self, band_url: str) -> list[str]:
179        """Get album urls from the main bandcamp url."""
180        print(f"Fetching discography from {band_url}...")
181        response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True))
182        if response.status_code != 200:
183            raise RuntimeError(
184                f"Getting {band_url} failed with status code {response.status_code}."
185            )
186        soup = BeautifulSoup(response.text, "html.parser")
187        grid = soup.find("ol", attrs={"id": "music-grid"})
188        parsed_url = urlparse(band_url)
189        base_url = f"https://{parsed_url.netloc}"
190        return [base_url + album.a.get("href") for album in grid.find_all("li")]
191
192    def rip(self):
193        print(
194            f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}."
195        )
196        timer = Timer(subsecond_resolution=True)
197        timer.start()
198        fails = []
199        for album in self.albums:
200            try:
201                album.rip()
202            except Exception as e:
203                fails.append((album, e))
204        timer.stop()
205        artist = self.albums[0].album.artist
206        print(
207            f"Finished downloading {len(self.albums)} albums by {artist} in {timer.elapsed_str}."
208        )
209        if fails:
210            print(f"The following downloads failed:")
211            for fail in fails:
212                print(f"{fail[0]}: {fail[1]}")
BandRipper( band_url: str, no_track_number: bool = False, overwrite: bool = False)
167    def __init__(
168        self, band_url: str, no_track_number: bool = False, overwrite: bool = False
169    ):
170        self.band_url = band_url
171        self.albums = []
172        for url in self.get_album_urls(band_url):
173            try:
174                self.albums.append(AlbumRipper(url, no_track_number, overwrite))
175            except Exception as e:
176                print(e)
def get_album_urls(self, band_url: str) -> list[str]:
178    def get_album_urls(self, band_url: str) -> list[str]:
179        """Get album urls from the main bandcamp url."""
180        print(f"Fetching discography from {band_url}...")
181        response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True))
182        if response.status_code != 200:
183            raise RuntimeError(
184                f"Getting {band_url} failed with status code {response.status_code}."
185            )
186        soup = BeautifulSoup(response.text, "html.parser")
187        grid = soup.find("ol", attrs={"id": "music-grid"})
188        parsed_url = urlparse(band_url)
189        base_url = f"https://{parsed_url.netloc}"
190        return [base_url + album.a.get("href") for album in grid.find_all("li")]

Get album urls from the main bandcamp url.

def rip(self):
192    def rip(self):
193        print(
194            f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}."
195        )
196        timer = Timer(subsecond_resolution=True)
197        timer.start()
198        fails = []
199        for album in self.albums:
200            try:
201                album.rip()
202            except Exception as e:
203                fails.append((album, e))
204        timer.stop()
205        artist = self.albums[0].album.artist
206        print(
207            f"Finished downloading {len(self.albums)} albums by {artist} in {timer.elapsed_str}."
208        )
209        if fails:
210            print(f"The following downloads failed:")
211            for fail in fails:
212                print(f"{fail[0]}: {fail[1]}")
def page_is_discography(url: str) -> bool:
215def page_is_discography(url: str) -> bool:
216    """Returns whether the url is to a discography page or not."""
217    response = requests.get(url, headers=whosyouragent.get_agent(as_dict=True))
218    if response.status_code != 200:
219        raise RuntimeError(
220            f"Getting {url} failed with status code {response.status_code}."
221        )
222    soup = BeautifulSoup(response.text, "html.parser")
223    # Returns None if it doesn't exist.
224    grid = soup.find("ol", attrs={"id": "music-grid"})
225    if grid:
226        return True
227    return False

Returns whether the url is to a discography page or not.

def get_args() -> argparse.Namespace:
230def get_args() -> argparse.Namespace:
231    parser = argparse.ArgumentParser()
232
233    parser.add_argument(
234        "urls",
235        type=str,
236        nargs="*",
237        help=""" The bandcamp url(s) for the album or artist.
238            If the url is to an artists main page,
239            all albums will be downloaded.
240            The tracks will be saved to a subdirectory of
241            your current directory.
242            If a track can't be streamed (i.e. private) it
243            won't be downloaded. Multiple urls can be passed.""",
244    )
245
246    parser.add_argument(
247        "-n",
248        "--no_track_number",
249        action="store_true",
250        help=""" By default the track number will be added
251        to the front of the track title. Pass this switch
252        to disable the behavior.""",
253    )
254
255    parser.add_argument(
256        "-o",
257        "--overwrite",
258        action="store_true",
259        help=""" Pass this flag to overwrite existing files.
260        Otherwise don't download tracks that already exist locally.""",
261    )
262
263    args = parser.parse_args()
264    args.urls = [url.strip("/") for url in args.urls]
265
266    return args
def main(args: argparse.Namespace = None):
269def main(args: argparse.Namespace = None):
270    if not args:
271        args = get_args()
272    for url in args.urls:
273        if page_is_discography(url):
274            ripper = BandRipper(url, args.no_track_number, args.overwrite)
275        else:
276            ripper = AlbumRipper(url, args.no_track_number, args.overwrite)
277        ripper.rip()