|
| 1 | +# This file is part of Headphones. |
| 2 | +# |
| 3 | +# Headphones is free software: you can redistribute it and/or modify |
| 4 | +# it under the terms of the GNU General Public License as published by |
| 5 | +# the Free Software Foundation, either version 3 of the License, or |
| 6 | +# (at your option) any later version. |
| 7 | +# |
| 8 | +# Headphones is distributed in the hope that it will be useful, |
| 9 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 11 | +# GNU General Public License for more details. |
| 12 | +# |
| 13 | +# You should have received a copy of the GNU General Public License |
| 14 | +# along with Headphones. If not, see <http://www.gnu.org/licenses/> |
| 15 | + |
| 16 | +import headphones |
| 17 | +import json |
| 18 | +import os |
| 19 | +import re |
| 20 | + |
| 21 | +from headphones import logger, helpers, metadata, request |
| 22 | +from headphones.common import USER_AGENT |
| 23 | +from headphones.types import Result |
| 24 | + |
| 25 | +from mediafile import MediaFile, UnreadableFileError |
| 26 | +from bs4 import BeautifulSoup |
| 27 | +from bs4 import FeatureNotFound |
| 28 | + |
| 29 | + |
| 30 | +def search(album, albumlength=None, page=1, resultlist=None): |
| 31 | + dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', |
| 32 | + '"': '', ',': '', '*': '', '.': '', ':': ''} |
| 33 | + if resultlist is None: |
| 34 | + resultlist = [] |
| 35 | + |
| 36 | + cleanalbum = helpers.latinToAscii( |
| 37 | + helpers.replace_all(album['AlbumTitle'], dic) |
| 38 | + ).strip() |
| 39 | + cleanartist = helpers.latinToAscii( |
| 40 | + helpers.replace_all(album['ArtistName'], dic) |
| 41 | + ).strip() |
| 42 | + |
| 43 | + headers = {'User-Agent': USER_AGENT} |
| 44 | + params = { |
| 45 | + "page": page, |
| 46 | + "q": cleanalbum, |
| 47 | + } |
| 48 | + logger.info("Looking up https://bandcamp.com/search with {}".format( |
| 49 | + params)) |
| 50 | + content = request.request_content( |
| 51 | + url='https://bandcamp.com/search', |
| 52 | + params=params, |
| 53 | + headers=headers |
| 54 | + ).decode('utf8') |
| 55 | + try: |
| 56 | + soup = BeautifulSoup(content, "html5lib") |
| 57 | + except FeatureNotFound: |
| 58 | + soup = BeautifulSoup(content, "html.parser") |
| 59 | + |
| 60 | + for item in soup.find_all("li", class_="searchresult"): |
| 61 | + type = item.find('div', class_='itemtype').text.strip().lower() |
| 62 | + if type == "album": |
| 63 | + data = parse_album(item) |
| 64 | + |
| 65 | + cleanartist_found = helpers.latinToAscii(data['artist']) |
| 66 | + cleanalbum_found = helpers.latinToAscii(data['album']) |
| 67 | + |
| 68 | + logger.debug(u"{} - {}".format(data['album'], cleanalbum_found)) |
| 69 | + |
| 70 | + logger.debug("Comparing {} to {}".format( |
| 71 | + cleanalbum, cleanalbum_found)) |
| 72 | + if (cleanartist.lower() == cleanartist_found.lower() and |
| 73 | + cleanalbum.lower() == cleanalbum_found.lower()): |
| 74 | + resultlist.append(Result( |
| 75 | + data['title'], data['size'], data['url'], |
| 76 | + 'bandcamp', 'bandcamp', True)) |
| 77 | + else: |
| 78 | + continue |
| 79 | + |
| 80 | + if(soup.find('a', class_='next')): |
| 81 | + page += 1 |
| 82 | + logger.debug("Calling next page ({})".format(page)) |
| 83 | + search(album, albumlength=albumlength, |
| 84 | + page=page, resultlist=resultlist) |
| 85 | + |
| 86 | + return resultlist |
| 87 | + |
| 88 | + |
| 89 | +def download(album, bestqual): |
| 90 | + html = request.request_content(url=bestqual.url).decode('utf-8') |
| 91 | + trackinfo = [] |
| 92 | + try: |
| 93 | + trackinfo = json.loads( |
| 94 | + re.search(r"trackinfo":(\[.*?\]),", html) |
| 95 | + .group(1) |
| 96 | + .replace('"', '"')) |
| 97 | + except ValueError as e: |
| 98 | + logger.warn("Couldn't load json: {}".format(e)) |
| 99 | + |
| 100 | + directory = os.path.join( |
| 101 | + headphones.CONFIG.BANDCAMP_DIR, |
| 102 | + u'{} - {}'.format( |
| 103 | + album['ArtistName'].replace('/', '_'), |
| 104 | + album['AlbumTitle'].replace('/', '_'))) |
| 105 | + directory = helpers.latinToAscii(directory) |
| 106 | + |
| 107 | + if not os.path.exists(directory): |
| 108 | + try: |
| 109 | + os.makedirs(directory) |
| 110 | + except Exception as e: |
| 111 | + logger.warn("Could not create directory ({})".format(e)) |
| 112 | + |
| 113 | + index = 1 |
| 114 | + for track in trackinfo: |
| 115 | + filename = helpers.replace_illegal_chars( |
| 116 | + u'{:02d} - {}.mp3'.format(index, track['title'])) |
| 117 | + fullname = os.path.join(directory.encode('utf-8'), |
| 118 | + filename.encode('utf-8')) |
| 119 | + logger.debug("Downloading to {}".format(fullname)) |
| 120 | + |
| 121 | + if 'file' in track and track['file'] != None and 'mp3-128' in track['file']: |
| 122 | + content = request.request_content(track['file']['mp3-128']) |
| 123 | + open(fullname, 'wb').write(content) |
| 124 | + try: |
| 125 | + f = MediaFile(fullname) |
| 126 | + date, year = metadata._date_year(album) |
| 127 | + f.update({ |
| 128 | + 'artist': album['ArtistName'].encode('utf-8'), |
| 129 | + 'album': album['AlbumTitle'].encode('utf-8'), |
| 130 | + 'title': track['title'].encode('utf-8'), |
| 131 | + 'track': track['track_num'], |
| 132 | + 'tracktotal': len(trackinfo), |
| 133 | + 'year': year, |
| 134 | + }) |
| 135 | + f.save() |
| 136 | + except UnreadableFileError as ex: |
| 137 | + logger.warn("MediaFile couldn't parse: %s (%s)", |
| 138 | + fullname, |
| 139 | + str(ex)) |
| 140 | + |
| 141 | + index += 1 |
| 142 | + |
| 143 | + return directory |
| 144 | + |
| 145 | + |
| 146 | +def parse_album(item): |
| 147 | + album = item.find('div', class_='heading').text.strip() |
| 148 | + artist = item.find('div', class_='subhead').text.strip().replace("by ", "") |
| 149 | + released = item.find('div', class_='released').text.strip().replace( |
| 150 | + "released ", "") |
| 151 | + year = re.search(r"(\d{4})", released).group(1) |
| 152 | + |
| 153 | + url = item.find('div', class_='heading').find('a')['href'].split("?")[0] |
| 154 | + |
| 155 | + length = item.find('div', class_='length').text.strip() |
| 156 | + tracks, minutes = length.split(",") |
| 157 | + tracks = tracks.replace(" tracks", "").replace(" track", "").strip() |
| 158 | + minutes = minutes.replace(" minutes", "").strip() |
| 159 | + # bandcamp offers mp3 128b with should be 960KB/minute |
| 160 | + size = int(minutes) * 983040 |
| 161 | + |
| 162 | + data = {"title": u'{} - {} [{}]'.format(artist, album, year), |
| 163 | + "artist": artist, "album": album, |
| 164 | + "url": url, "size": size} |
| 165 | + |
| 166 | + return data |
0 commit comments