diff --git a/service/xhs/logic/detail.py b/service/xhs/logic/detail.py index c3505dc..bb58c71 100644 --- a/service/xhs/logic/detail.py +++ b/service/xhs/logic/detail.py @@ -1,7 +1,9 @@ from .common import common_request, COMMON_HEADERS from lib import requests from lib import logger +from bs4 import BeautifulSoup import re +import json @@ -10,25 +12,20 @@ async def request_detail(id: str, cookie: str) -> tuple[dict, bool]: 请求小红书获取视频信息 """ # 获取xsec_token - url = 'https://www.xiaohongshu.com/explore' + url = f'https://www.xiaohongshu.com/explore/{id}' headers = {"cookie": cookie} headers.update(COMMON_HEADERS) resp = await requests.get(url, headers=headers) if resp.status_code != 200 or resp.text == '': return {}, False - pattern = r'xsec_token=(.*?)&' - match = re.search(pattern, resp.text) - if not match: - logger.error("No xsec_token match found.") + try: + soup = BeautifulSoup(resp.text, 'html.parser') + pattern = re.compile('window\\.__INITIAL_STATE__={.*}') + text = soup.body.find( + 'script', text=pattern).text.replace('window.__INITIAL_STATE__=', '').replace('undefined', '""') + target = json.loads(text) + detail_data = target.get('note', {}).get('noteDetailMap', {}).get(id, {}) + except Exception as e: + logger.error(f"failed to get detail: {id}, err: {e}") return {}, False - xsec_token = match.group(1) - - # 获取详情 - params = {"source_note_id": id, "image_formats": [ - "jpg", "webp", "avif"], "extra": {"need_body_topic": "1"}, "xsec_source": "pc_feed", "xsec_token": xsec_token} - headers = {"cookie": cookie} - resp, succ = await common_request('/api/sns/web/v1/feed', params, headers) - if not succ: - return {}, succ - ret = resp.get('data', {}) - return ret, succ + return detail_data, True