bandripper.bandripper
1import argparse 2import json 3import re 4import string 5from dataclasses import dataclass 6from pathlib import Path 7from urllib.parse import urlparse 8 9import requests 10import whosyouragent 11from bs4 import BeautifulSoup 12from noiftimer import Timer 13from printbuddies import ProgBar 14 15root = Path(__file__).parent 16 17 18def clean_string(text: str) -> str: 19 """Remove punctuation and trailing spaces from text.""" 20 return re.sub(f"[{re.escape(string.punctuation)}]", "", text).strip() 21 22 23@dataclass 24class Track: 25 title: str 26 number: int 27 url: str 28 29 def __post_init__(self): 30 self.title = clean_string(self.title) 31 32 @property 33 def numbered_title(self): 34 num = str(self.number) 35 if len(num) == 1: 36 num = "0" + num 37 return f"{num} - {self.title}" 38 39 40@dataclass 41class Album: 42 url: str 43 artist: str = None 44 title: str = None 45 tracks: list[Track] = None 46 art_url: str = None 47 48 def __repr__(self): 49 return f"{self.title} by {self.artist}" 50 51 def __post_init__(self): 52 response = requests.get(self.url, headers=whosyouragent.get_agent(as_dict=True)) 53 if response.status_code != 200: 54 raise RuntimeError( 55 f"Getting album info failed with code {response.status_code}" 56 ) 57 soup = BeautifulSoup(response.text, "html.parser") 58 self.art_url = soup.find("meta", attrs={"property": "og:image"}).get("content") 59 for script in soup.find_all("script"): 60 if script.get("data-cart"): 61 data = script 62 break 63 data = json.loads(data.attrs["data-tralbum"]) 64 self.artist = clean_string(data["artist"]) 65 self.title = clean_string(data["current"]["title"]) 66 self.tracks = [ 67 Track(track["title"], track["track_num"], track["file"]["mp3-128"]) 68 for track in data["trackinfo"] 69 if track.get("file") 70 ] 71 72 73class AlbumRipper: 74 def __init__( 75 self, album_url: str, no_track_number: bool = False, overwrite: bool = False 76 ): 77 """ 78 :param no_track_number: If True, don't add the track 79 number to the front of the track title.""" 80 self.album = Album(album_url) 81 self.no_track_number = no_track_number 82 self.overwrite = overwrite 83 84 def make_save_path(self): 85 self.save_path = Path.cwd() / self.album.artist / self.album.title 86 self.save_path.mkdir(parents=True, exist_ok=True) 87 88 @property 89 def headers(self) -> dict: 90 """Get a headers dict with a random useragent.""" 91 return whosyouragent.get_agent(as_dict=True) 92 93 def save_track(self, track_title: str, content: bytes) -> Path: 94 """Save track to self.save_path/{track_title}.mp3. 95 Returns the Path object for the save location. 96 97 :param content: The binary data of the track.""" 98 file_path = self.save_path / f"{track_title}.mp3" 99 file_path.write_bytes(content) 100 return file_path 101 102 def get_track_content(self, track_url: str) -> bytes: 103 """Make a request to track_url and return the content. 104 Raises a RunTimeError exception if response.status_code != 200.""" 105 response = requests.get(track_url, headers=self.headers) 106 if response.status_code != 200: 107 raise RuntimeError( 108 f"Downloading track failed with status code {response.status_code}." 109 ) 110 return response.content 111 112 def download_album_art(self): 113 """Download the album art and save as a .jpg.""" 114 file_path = self.save_path / f"{self.album.title}.jpg" 115 try: 116 response = requests.get(self.album.art_url, headers=self.headers) 117 file_path.write_bytes(response.content) 118 except Exception as e: 119 print(f"Failed to download art for {self.album}.") 120 print(e) 121 122 def track_exists(self, track: Track) -> bool: 123 """Return if a track already exists in self.save_path.""" 124 path = self.save_path / ( 125 track.title if self.no_track_number else track.numbered_title 126 ) 127 return path.with_suffix(".mp3").exists() 128 129 def rip(self): 130 """Download and save the album tracks and album art.""" 131 if len(self.album.tracks) == 0: 132 print(f"No public tracks available for {self.album}.") 133 return None 134 self.make_save_path() 135 self.download_album_art() 136 num_tracks = len(self.album.tracks) 137 bar = ProgBar(num_tracks, width_ratio=0.5) 138 fails = [] 139 if not self.overwrite: 140 self.album.tracks = [ 141 track for track in self.album.tracks if not self.track_exists(track) 142 ] 143 for i, track in enumerate(self.album.tracks, 1): 144 bar.display( 145 suffix=f"Downloading track {i}/{num_tracks}: {track.title}", 146 counter_override=1 if len(self.album.tracks) == 1 else None, 147 ) 148 try: 149 content = self.get_track_content(track.url) 150 self.save_track( 151 track.title if self.no_track_number else track.numbered_title, 152 content, 153 ) 154 except Exception as e: 155 fails.append((track, str(e))) 156 print( 157 f"Finished downloading {num_tracks - len(fails)} tracks from {self.album} in {bar.timer.elapsed_str}." 158 ) 159 if fails: 160 print("The following tracks failed to download:") 161 for fail in fails: 162 print(f"{fail[0].title}: {fail[1]}") 163 164 165class BandRipper: 166 def __init__( 167 self, band_url: str, no_track_number: bool = False, overwrite: bool = False 168 ): 169 self.band_url = band_url 170 self.albums = [] 171 for url in self.get_album_urls(band_url): 172 try: 173 self.albums.append(AlbumRipper(url, no_track_number, overwrite)) 174 except Exception as e: 175 print(e) 176 177 def get_album_urls(self, band_url: str) -> list[str]: 178 """Get album urls from the main bandcamp url.""" 179 print(f"Fetching discography from {band_url}...") 180 response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True)) 181 if response.status_code != 200: 182 raise RuntimeError( 183 f"Getting {band_url} failed with status code {response.status_code}." 184 ) 185 soup = BeautifulSoup(response.text, "html.parser") 186 grid = soup.find("ol", attrs={"id": "music-grid"}) 187 parsed_url = urlparse(band_url) 188 base_url = f"https://{parsed_url.netloc}" 189 return [base_url + album.a.get("href") for album in grid.find_all("li")] 190 191 def rip(self): 192 print( 193 f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}." 194 ) 195 timer = Timer(subsecond_resolution=True) 196 timer.start() 197 fails = [] 198 for album in self.albums: 199 try: 200 album.rip() 201 except Exception as e: 202 fails.append((album, e)) 203 timer.stop() 204 artist = self.albums[0].album.artist 205 print( 206 f"Finished downloading {len(self.albums)} albums by {artist} in {timer.elapsed_str}." 207 ) 208 if fails: 209 print(f"The following downloads failed:") 210 for fail in fails: 211 print(f"{fail[0]}: {fail[1]}") 212 213 214def page_is_discography(url: str) -> bool: 215 """Returns whether the url is to a discography page or not.""" 216 response = requests.get(url, headers=whosyouragent.get_agent(as_dict=True)) 217 if response.status_code != 200: 218 raise RuntimeError( 219 f"Getting {url} failed with status code {response.status_code}." 220 ) 221 soup = BeautifulSoup(response.text, "html.parser") 222 # Returns None if it doesn't exist. 223 grid = soup.find("ol", attrs={"id": "music-grid"}) 224 if grid: 225 return True 226 return False 227 228 229def get_args() -> argparse.Namespace: 230 parser = argparse.ArgumentParser() 231 232 parser.add_argument( 233 "urls", 234 type=str, 235 nargs="*", 236 help=""" The bandcamp url(s) for the album or artist. 237 If the url is to an artists main page, 238 all albums will be downloaded. 239 The tracks will be saved to a subdirectory of 240 your current directory. 241 If a track can't be streamed (i.e. private) it 242 won't be downloaded. Multiple urls can be passed.""", 243 ) 244 245 parser.add_argument( 246 "-n", 247 "--no_track_number", 248 action="store_true", 249 help=""" By default the track number will be added 250 to the front of the track title. Pass this switch 251 to disable the behavior.""", 252 ) 253 254 parser.add_argument( 255 "-o", 256 "--overwrite", 257 action="store_true", 258 help=""" Pass this flag to overwrite existing files. 259 Otherwise don't download tracks that already exist locally.""", 260 ) 261 262 args = parser.parse_args() 263 args.urls = [url.strip("/") for url in args.urls] 264 265 return args 266 267 268def main(args: argparse.Namespace = None): 269 if not args: 270 args = get_args() 271 for url in args.urls: 272 if page_is_discography(url): 273 ripper = BandRipper(url, args.no_track_number, args.overwrite) 274 else: 275 ripper = AlbumRipper(url, args.no_track_number, args.overwrite) 276 ripper.rip() 277 278 279if __name__ == "__main__": 280 main(get_args())
def
clean_string(text: str) -> str:
19def clean_string(text: str) -> str: 20 """Remove punctuation and trailing spaces from text.""" 21 return re.sub(f"[{re.escape(string.punctuation)}]", "", text).strip()
Remove punctuation and trailing spaces from text.
@dataclass
class
Track:
@dataclass
class
Album:
41@dataclass 42class Album: 43 url: str 44 artist: str = None 45 title: str = None 46 tracks: list[Track] = None 47 art_url: str = None 48 49 def __repr__(self): 50 return f"{self.title} by {self.artist}" 51 52 def __post_init__(self): 53 response = requests.get(self.url, headers=whosyouragent.get_agent(as_dict=True)) 54 if response.status_code != 200: 55 raise RuntimeError( 56 f"Getting album info failed with code {response.status_code}" 57 ) 58 soup = BeautifulSoup(response.text, "html.parser") 59 self.art_url = soup.find("meta", attrs={"property": "og:image"}).get("content") 60 for script in soup.find_all("script"): 61 if script.get("data-cart"): 62 data = script 63 break 64 data = json.loads(data.attrs["data-tralbum"]) 65 self.artist = clean_string(data["artist"]) 66 self.title = clean_string(data["current"]["title"]) 67 self.tracks = [ 68 Track(track["title"], track["track_num"], track["file"]["mp3-128"]) 69 for track in data["trackinfo"] 70 if track.get("file") 71 ]
Album( url: str, artist: str = None, title: str = None, tracks: list[bandripper.bandripper.Track] = None, art_url: str = None)
class
AlbumRipper:
74class AlbumRipper: 75 def __init__( 76 self, album_url: str, no_track_number: bool = False, overwrite: bool = False 77 ): 78 """ 79 :param no_track_number: If True, don't add the track 80 number to the front of the track title.""" 81 self.album = Album(album_url) 82 self.no_track_number = no_track_number 83 self.overwrite = overwrite 84 85 def make_save_path(self): 86 self.save_path = Path.cwd() / self.album.artist / self.album.title 87 self.save_path.mkdir(parents=True, exist_ok=True) 88 89 @property 90 def headers(self) -> dict: 91 """Get a headers dict with a random useragent.""" 92 return whosyouragent.get_agent(as_dict=True) 93 94 def save_track(self, track_title: str, content: bytes) -> Path: 95 """Save track to self.save_path/{track_title}.mp3. 96 Returns the Path object for the save location. 97 98 :param content: The binary data of the track.""" 99 file_path = self.save_path / f"{track_title}.mp3" 100 file_path.write_bytes(content) 101 return file_path 102 103 def get_track_content(self, track_url: str) -> bytes: 104 """Make a request to track_url and return the content. 105 Raises a RunTimeError exception if response.status_code != 200.""" 106 response = requests.get(track_url, headers=self.headers) 107 if response.status_code != 200: 108 raise RuntimeError( 109 f"Downloading track failed with status code {response.status_code}." 110 ) 111 return response.content 112 113 def download_album_art(self): 114 """Download the album art and save as a .jpg.""" 115 file_path = self.save_path / f"{self.album.title}.jpg" 116 try: 117 response = requests.get(self.album.art_url, headers=self.headers) 118 file_path.write_bytes(response.content) 119 except Exception as e: 120 print(f"Failed to download art for {self.album}.") 121 print(e) 122 123 def track_exists(self, track: Track) -> bool: 124 """Return if a track already exists in self.save_path.""" 125 path = self.save_path / ( 126 track.title if self.no_track_number else track.numbered_title 127 ) 128 return path.with_suffix(".mp3").exists() 129 130 def rip(self): 131 """Download and save the album tracks and album art.""" 132 if len(self.album.tracks) == 0: 133 print(f"No public tracks available for {self.album}.") 134 return None 135 self.make_save_path() 136 self.download_album_art() 137 num_tracks = len(self.album.tracks) 138 bar = ProgBar(num_tracks, width_ratio=0.5) 139 fails = [] 140 if not self.overwrite: 141 self.album.tracks = [ 142 track for track in self.album.tracks if not self.track_exists(track) 143 ] 144 for i, track in enumerate(self.album.tracks, 1): 145 bar.display( 146 suffix=f"Downloading track {i}/{num_tracks}: {track.title}", 147 counter_override=1 if len(self.album.tracks) == 1 else None, 148 ) 149 try: 150 content = self.get_track_content(track.url) 151 self.save_track( 152 track.title if self.no_track_number else track.numbered_title, 153 content, 154 ) 155 except Exception as e: 156 fails.append((track, str(e))) 157 print( 158 f"Finished downloading {num_tracks - len(fails)} tracks from {self.album} in {bar.timer.elapsed_str}." 159 ) 160 if fails: 161 print("The following tracks failed to download:") 162 for fail in fails: 163 print(f"{fail[0].title}: {fail[1]}")
AlbumRipper( album_url: str, no_track_number: bool = False, overwrite: bool = False)
75 def __init__( 76 self, album_url: str, no_track_number: bool = False, overwrite: bool = False 77 ): 78 """ 79 :param no_track_number: If True, don't add the track 80 number to the front of the track title.""" 81 self.album = Album(album_url) 82 self.no_track_number = no_track_number 83 self.overwrite = overwrite
Parameters
- no_track_number: If True, don't add the track number to the front of the track title.
def
save_track(self, track_title: str, content: bytes) -> pathlib.Path:
94 def save_track(self, track_title: str, content: bytes) -> Path: 95 """Save track to self.save_path/{track_title}.mp3. 96 Returns the Path object for the save location. 97 98 :param content: The binary data of the track.""" 99 file_path = self.save_path / f"{track_title}.mp3" 100 file_path.write_bytes(content) 101 return file_path
Save track to self.save_path/{track_title}.mp3. Returns the Path object for the save location.
Parameters
- content: The binary data of the track.
def
get_track_content(self, track_url: str) -> bytes:
103 def get_track_content(self, track_url: str) -> bytes: 104 """Make a request to track_url and return the content. 105 Raises a RunTimeError exception if response.status_code != 200.""" 106 response = requests.get(track_url, headers=self.headers) 107 if response.status_code != 200: 108 raise RuntimeError( 109 f"Downloading track failed with status code {response.status_code}." 110 ) 111 return response.content
Make a request to track_url and return the content. Raises a RunTimeError exception if response.status_code != 200.
def
download_album_art(self):
113 def download_album_art(self): 114 """Download the album art and save as a .jpg.""" 115 file_path = self.save_path / f"{self.album.title}.jpg" 116 try: 117 response = requests.get(self.album.art_url, headers=self.headers) 118 file_path.write_bytes(response.content) 119 except Exception as e: 120 print(f"Failed to download art for {self.album}.") 121 print(e)
Download the album art and save as a .jpg.
123 def track_exists(self, track: Track) -> bool: 124 """Return if a track already exists in self.save_path.""" 125 path = self.save_path / ( 126 track.title if self.no_track_number else track.numbered_title 127 ) 128 return path.with_suffix(".mp3").exists()
Return if a track already exists in self.save_path.
def
rip(self):
130 def rip(self): 131 """Download and save the album tracks and album art.""" 132 if len(self.album.tracks) == 0: 133 print(f"No public tracks available for {self.album}.") 134 return None 135 self.make_save_path() 136 self.download_album_art() 137 num_tracks = len(self.album.tracks) 138 bar = ProgBar(num_tracks, width_ratio=0.5) 139 fails = [] 140 if not self.overwrite: 141 self.album.tracks = [ 142 track for track in self.album.tracks if not self.track_exists(track) 143 ] 144 for i, track in enumerate(self.album.tracks, 1): 145 bar.display( 146 suffix=f"Downloading track {i}/{num_tracks}: {track.title}", 147 counter_override=1 if len(self.album.tracks) == 1 else None, 148 ) 149 try: 150 content = self.get_track_content(track.url) 151 self.save_track( 152 track.title if self.no_track_number else track.numbered_title, 153 content, 154 ) 155 except Exception as e: 156 fails.append((track, str(e))) 157 print( 158 f"Finished downloading {num_tracks - len(fails)} tracks from {self.album} in {bar.timer.elapsed_str}." 159 ) 160 if fails: 161 print("The following tracks failed to download:") 162 for fail in fails: 163 print(f"{fail[0].title}: {fail[1]}")
Download and save the album tracks and album art.
class
BandRipper:
166class BandRipper: 167 def __init__( 168 self, band_url: str, no_track_number: bool = False, overwrite: bool = False 169 ): 170 self.band_url = band_url 171 self.albums = [] 172 for url in self.get_album_urls(band_url): 173 try: 174 self.albums.append(AlbumRipper(url, no_track_number, overwrite)) 175 except Exception as e: 176 print(e) 177 178 def get_album_urls(self, band_url: str) -> list[str]: 179 """Get album urls from the main bandcamp url.""" 180 print(f"Fetching discography from {band_url}...") 181 response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True)) 182 if response.status_code != 200: 183 raise RuntimeError( 184 f"Getting {band_url} failed with status code {response.status_code}." 185 ) 186 soup = BeautifulSoup(response.text, "html.parser") 187 grid = soup.find("ol", attrs={"id": "music-grid"}) 188 parsed_url = urlparse(band_url) 189 base_url = f"https://{parsed_url.netloc}" 190 return [base_url + album.a.get("href") for album in grid.find_all("li")] 191 192 def rip(self): 193 print( 194 f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}." 195 ) 196 timer = Timer(subsecond_resolution=True) 197 timer.start() 198 fails = [] 199 for album in self.albums: 200 try: 201 album.rip() 202 except Exception as e: 203 fails.append((album, e)) 204 timer.stop() 205 artist = self.albums[0].album.artist 206 print( 207 f"Finished downloading {len(self.albums)} albums by {artist} in {timer.elapsed_str}." 208 ) 209 if fails: 210 print(f"The following downloads failed:") 211 for fail in fails: 212 print(f"{fail[0]}: {fail[1]}")
BandRipper( band_url: str, no_track_number: bool = False, overwrite: bool = False)
167 def __init__( 168 self, band_url: str, no_track_number: bool = False, overwrite: bool = False 169 ): 170 self.band_url = band_url 171 self.albums = [] 172 for url in self.get_album_urls(band_url): 173 try: 174 self.albums.append(AlbumRipper(url, no_track_number, overwrite)) 175 except Exception as e: 176 print(e)
def
get_album_urls(self, band_url: str) -> list[str]:
178 def get_album_urls(self, band_url: str) -> list[str]: 179 """Get album urls from the main bandcamp url.""" 180 print(f"Fetching discography from {band_url}...") 181 response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True)) 182 if response.status_code != 200: 183 raise RuntimeError( 184 f"Getting {band_url} failed with status code {response.status_code}." 185 ) 186 soup = BeautifulSoup(response.text, "html.parser") 187 grid = soup.find("ol", attrs={"id": "music-grid"}) 188 parsed_url = urlparse(band_url) 189 base_url = f"https://{parsed_url.netloc}" 190 return [base_url + album.a.get("href") for album in grid.find_all("li")]
Get album urls from the main bandcamp url.
def
rip(self):
192 def rip(self): 193 print( 194 f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}." 195 ) 196 timer = Timer(subsecond_resolution=True) 197 timer.start() 198 fails = [] 199 for album in self.albums: 200 try: 201 album.rip() 202 except Exception as e: 203 fails.append((album, e)) 204 timer.stop() 205 artist = self.albums[0].album.artist 206 print( 207 f"Finished downloading {len(self.albums)} albums by {artist} in {timer.elapsed_str}." 208 ) 209 if fails: 210 print(f"The following downloads failed:") 211 for fail in fails: 212 print(f"{fail[0]}: {fail[1]}")
def
page_is_discography(url: str) -> bool:
215def page_is_discography(url: str) -> bool: 216 """Returns whether the url is to a discography page or not.""" 217 response = requests.get(url, headers=whosyouragent.get_agent(as_dict=True)) 218 if response.status_code != 200: 219 raise RuntimeError( 220 f"Getting {url} failed with status code {response.status_code}." 221 ) 222 soup = BeautifulSoup(response.text, "html.parser") 223 # Returns None if it doesn't exist. 224 grid = soup.find("ol", attrs={"id": "music-grid"}) 225 if grid: 226 return True 227 return False
Returns whether the url is to a discography page or not.
def
get_args() -> argparse.Namespace:
230def get_args() -> argparse.Namespace: 231 parser = argparse.ArgumentParser() 232 233 parser.add_argument( 234 "urls", 235 type=str, 236 nargs="*", 237 help=""" The bandcamp url(s) for the album or artist. 238 If the url is to an artists main page, 239 all albums will be downloaded. 240 The tracks will be saved to a subdirectory of 241 your current directory. 242 If a track can't be streamed (i.e. private) it 243 won't be downloaded. Multiple urls can be passed.""", 244 ) 245 246 parser.add_argument( 247 "-n", 248 "--no_track_number", 249 action="store_true", 250 help=""" By default the track number will be added 251 to the front of the track title. Pass this switch 252 to disable the behavior.""", 253 ) 254 255 parser.add_argument( 256 "-o", 257 "--overwrite", 258 action="store_true", 259 help=""" Pass this flag to overwrite existing files. 260 Otherwise don't download tracks that already exist locally.""", 261 ) 262 263 args = parser.parse_args() 264 args.urls = [url.strip("/") for url in args.urls] 265 266 return args
def
main(args: argparse.Namespace = None):
269def main(args: argparse.Namespace = None): 270 if not args: 271 args = get_args() 272 for url in args.urls: 273 if page_is_discography(url): 274 ripper = BandRipper(url, args.no_track_number, args.overwrite) 275 else: 276 ripper = AlbumRipper(url, args.no_track_number, args.overwrite) 277 ripper.rip()