Add Last.fm authentication flow to get fresh cookies

joinemm · Nov 21, 2023 · ca5a9b3 · ca5a9b3
1 parent 3a6d4ed
commit ca5a9b3
Show file tree

Hide file tree

Showing 2 changed files with 49 additions and 26 deletions.
diff --git a/modules/keychain.py b/modules/keychain.py
@@ -35,9 +35,10 @@ def __init__(self):
  self.PROXY_USER: str = ""
  self.PROXY_PASS: str = ""
  self.IG_COOKIE: str = ""
- self.LASTFM_LOGIN_COOKIE: str = ""
  self.SHLINK_API_KEY: str = ""
  self.GIPHY_API_KEY: str = ""
+ self.LASTFM_USERNAME: str = ""
+ self.LASTFM_PASSWORD: str = ""
 
  for name in self.__dict__:
  value = os.environ.get(name)

diff --git a/modules/lastfm.py b/modules/lastfm.py
@@ -123,10 +123,44 @@ def __str__(self):
 class LastFmApi:
  LASTFM_RED = "b90000"
  API_BASE_URL = "http://ws.audioscrobbler.com/2.0/"
+ USER_AGENT = (
+ "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/119.0"
+ )
 
  def __init__(self, bot: MisoBot):
  self.bot = bot
 
+ async def login(self, username: str, password: str) -> bool:
+ """Login to lastfm for authenticated web scraping requests"""
+ login_url = "https://www.last.fm/login"
+ async with self.bot.session.get(login_url) as response:
+ soup = BeautifulSoup(await response.text(), "lxml")
+ el = soup.find("input", {"type": "hidden", "name": "csrfmiddlewaretoken"})
+ csrf = el.attrs.get("value")
+
+ async with self.bot.session.post(
+ login_url,
+ headers={
+ "User-Agent": self.USER_AGENT,
+ "referer": login_url,
+ },
+ data={
+ "csrfmiddlewaretoken": csrf,
+ "next": "/user/_",
+ "username_or_email": username,
+ "password": password,
+ "submit": "",
+ },
+ ) as response:
+ success = (
+ username.lower()
+ == response.headers.get("X-PJAX-URL").split("/")[-1].lower()
+ )
+ if success:
+ logger.info("Logged into Last.fm successfully")
+ else:
+ logger.warning("Problem logging into Last.fm")
+
  async def api_request(self, method: str, params: dict) -> dict:
  """Make a request to the lastfm api, returns json."""
  # add auth params, remove null values and combine to single dict
@@ -209,6 +243,10 @@ async def user_get_recent_tracks(
  # for some reason it appears even if it's not in the requested timeframe.
  data["track"] = data["track"][1:]
 
+ # actually limit the data to the given limit...
+ if limit:
+ data["track"] = data["track"][:limit]
+
  return non_empty(data)
 
  async def user_get_top_albums(
@@ -351,29 +389,14 @@ async def user_get_now_playing(self, username: str) -> dict:
  # WEB SCRAPING #
  ################
 
- async def scrape_page(
- self, page_url: str, params: dict | None = None, authenticated=False
- ):
+ async def scrape_page(self, page_url: str, params: dict | None = None):
  """Scrapes the given url returning a Soup."""
- headers = {
- "Host": "www.last.fm",
- "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:106.0) Gecko/20100101 Firefox/106.0",
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
- "Accept-Language": "fi,en;q=0.7,en-US;q=0.3",
- "Accept-Encoding": "gzip, deflate, br",
- "DNT": "1",
- "Connection": "keep-alive",
- "Cookie": self.bot.keychain.LASTFM_LOGIN_COOKIE,
- "Upgrade-Insecure-Requests": "1",
- "Sec-Fetch-Dest": "document",
- "Sec-Fetch-Mode": "navigate",
- "Sec-Fetch-Site": "same-origin",
- "Sec-Fetch-User": "?1",
- "Pragma": "no-cache",
- "Cache-Control": "no-cache",
- }
  async with self.bot.session.get(
- page_url, headers=headers if authenticated else None, params=params
+ page_url,
+ params=params,
+ headers={
+ "User-Agent": self.USER_AGENT,
+ },
  ) as response:
  response.raise_for_status()
  content = await response.text()
@@ -407,7 +430,7 @@ async def get_additional_library_pages(self, soup: BeautifulSoup, url: str) -> l
 
  async def get_additional_page(n):
  new_url = url + f"&page={n}"
- soup = await self.scrape_page(new_url, authenticated=True)
+ soup = await self.scrape_page(new_url)
  return self.get_library_playcounts(soup)
 
  tasks = []
@@ -493,15 +516,15 @@ async def scrape_album_metadata(self, artist: str, album: str) -> dict | None:
  async def library_artist_images(
  self,
  username: str,
- amount,
+ amount: int,
  period: Period,
  ) -> list[LastFmImage]:
  """Get image hashes for user's top n artists"""
  url: str = f"https://www.last.fm/user/{username}/library/artists?date_preset={period.web_format()}"
  tasks = []
  for i in range(1, math.ceil(amount / 50) + 1):
  params = {"page": str(i)} if i > 1 else None
- tasks.append(self.scrape_page(url, params, authenticated=True))
+ tasks.append(self.scrape_page(url, params))
 
  images = []
  soup: BeautifulSoup
@@ -513,4 +536,3 @@ async def library_artist_images(
  images += [LastFmImage.from_url(div.attrs["src"]) for div in imagedivs]
 
  return images
- return images