Skip to content

Commit

Permalink
Add Last.fm authentication flow to get fresh cookies
Browse files Browse the repository at this point in the history
  • Loading branch information
joinemm committed Nov 21, 2023
1 parent 3a6d4ed commit ca5a9b3
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 26 deletions.
3 changes: 2 additions & 1 deletion modules/keychain.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,10 @@ def __init__(self):
self.PROXY_USER: str = ""
self.PROXY_PASS: str = ""
self.IG_COOKIE: str = ""
self.LASTFM_LOGIN_COOKIE: str = ""
self.SHLINK_API_KEY: str = ""
self.GIPHY_API_KEY: str = ""
self.LASTFM_USERNAME: str = ""
self.LASTFM_PASSWORD: str = ""

for name in self.__dict__:
value = os.environ.get(name)
Expand Down
72 changes: 47 additions & 25 deletions modules/lastfm.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,44 @@ def __str__(self):
class LastFmApi:
LASTFM_RED = "b90000"
API_BASE_URL = "http://ws.audioscrobbler.com/2.0/"
USER_AGENT = (
"Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/119.0"
)

def __init__(self, bot: MisoBot):
self.bot = bot

async def login(self, username: str, password: str) -> bool:
"""Login to lastfm for authenticated web scraping requests"""
login_url = "https://www.last.fm/login"
async with self.bot.session.get(login_url) as response:
soup = BeautifulSoup(await response.text(), "lxml")
el = soup.find("input", {"type": "hidden", "name": "csrfmiddlewaretoken"})
csrf = el.attrs.get("value")

async with self.bot.session.post(
login_url,
headers={
"User-Agent": self.USER_AGENT,
"referer": login_url,
},
data={
"csrfmiddlewaretoken": csrf,
"next": "/user/_",
"username_or_email": username,
"password": password,
"submit": "",
},
) as response:
success = (
username.lower()
== response.headers.get("X-PJAX-URL").split("/")[-1].lower()
)
if success:
logger.info("Logged into Last.fm successfully")
else:
logger.warning("Problem logging into Last.fm")

async def api_request(self, method: str, params: dict) -> dict:
"""Make a request to the lastfm api, returns json."""
# add auth params, remove null values and combine to single dict
Expand Down Expand Up @@ -209,6 +243,10 @@ async def user_get_recent_tracks(
# for some reason it appears even if it's not in the requested timeframe.
data["track"] = data["track"][1:]

# actually limit the data to the given limit...
if limit:
data["track"] = data["track"][:limit]

return non_empty(data)

async def user_get_top_albums(
Expand Down Expand Up @@ -351,29 +389,14 @@ async def user_get_now_playing(self, username: str) -> dict:
# WEB SCRAPING #
################

async def scrape_page(
self, page_url: str, params: dict | None = None, authenticated=False
):
async def scrape_page(self, page_url: str, params: dict | None = None):
"""Scrapes the given url returning a Soup."""
headers = {
"Host": "www.last.fm",
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:106.0) Gecko/20100101 Firefox/106.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "fi,en;q=0.7,en-US;q=0.3",
"Accept-Encoding": "gzip, deflate, br",
"DNT": "1",
"Connection": "keep-alive",
"Cookie": self.bot.keychain.LASTFM_LOGIN_COOKIE,
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-User": "?1",
"Pragma": "no-cache",
"Cache-Control": "no-cache",
}
async with self.bot.session.get(
page_url, headers=headers if authenticated else None, params=params
page_url,
params=params,
headers={
"User-Agent": self.USER_AGENT,
},
) as response:
response.raise_for_status()
content = await response.text()
Expand Down Expand Up @@ -407,7 +430,7 @@ async def get_additional_library_pages(self, soup: BeautifulSoup, url: str) -> l

async def get_additional_page(n):
new_url = url + f"&page={n}"
soup = await self.scrape_page(new_url, authenticated=True)
soup = await self.scrape_page(new_url)
return self.get_library_playcounts(soup)

tasks = []
Expand Down Expand Up @@ -493,15 +516,15 @@ async def scrape_album_metadata(self, artist: str, album: str) -> dict | None:
async def library_artist_images(
self,
username: str,
amount,
amount: int,
period: Period,
) -> list[LastFmImage]:
"""Get image hashes for user's top n artists"""
url: str = f"https://www.last.fm/user/{username}/library/artists?date_preset={period.web_format()}"
tasks = []
for i in range(1, math.ceil(amount / 50) + 1):
params = {"page": str(i)} if i > 1 else None
tasks.append(self.scrape_page(url, params, authenticated=True))
tasks.append(self.scrape_page(url, params))

images = []
soup: BeautifulSoup
Expand All @@ -513,4 +536,3 @@ async def library_artist_images(
images += [LastFmImage.from_url(div.attrs["src"]) for div in imagedivs]

return images
return images

0 comments on commit ca5a9b3

Please sign in to comment.