From ea620ed06c3ef70504f2ab787c3fe54cc0dbb836 Mon Sep 17 00:00:00 2001 From: Alan Date: Thu, 5 Dec 2024 18:58:57 -0500 Subject: [PATCH] automated cookie grabbing for config.json (must be logged in on chrome) --- requirements.txt | 3 ++- weibo_spider/config_util.py | 28 ++++++++++++++++++++++++++++ weibo_spider/spider.py | 1 + 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 23b93002..8530e875 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ lxml==4.9.1 requests==2.32.0 tqdm==4.66.3 -absl-py==0.12.0 \ No newline at end of file +absl-py==0.12.0 +git+https://github.com/borisbabic/browser_cookie3.git@refs/pull/215/head \ No newline at end of file diff --git a/weibo_spider/config_util.py b/weibo_spider/config_util.py index ba4676b3..abfdc073 100644 --- a/weibo_spider/config_util.py +++ b/weibo_spider/config_util.py @@ -2,7 +2,9 @@ import logging import os import sys +import browser_cookie3 from datetime import datetime +import json logger = logging.getLogger('spider.config_util') @@ -172,3 +174,29 @@ def add_user_uri_list(user_config_file_path, user_uri_list): user_uri_list[0] = '\n' + user_uri_list[0] with codecs.open(user_config_file_path, 'a', encoding='utf-8') as f: f.write('\n'.join(user_uri_list)) + +def get_cookie(): + """Get weibo.cn cookie from Chrome browser""" + try: + chrome_cookies = browser_cookie3.chrome(domain_name='weibo.cn') + cookies_dict = {cookie.name: cookie.value for cookie in chrome_cookies} + cookie_string = '; '.join(f'{name}={value}' for name, value in cookies_dict.items()) + return cookie_string + except Exception as e: + logger.error(u'Failed to obtain weibo.cn cookie from Chrome browser: %s', str(e)) + raise + +def update_cookie_config(user_config_file_path): + "Update cookie in config.json" + if not user_config_file_path: + user_config_file_path = os.getcwd() + os.sep + 'config.json' + try: + cookie = get_cookie() + with codecs.open(user_config_file_path, 'r', encoding='utf-8') as f: + config = json.load(f) + config['cookie'] = cookie + with codecs.open(user_config_file_path, 'w', encoding='utf-8') as f: + json.dump(config, f, indent=4, ensure_ascii=False) + except Exception as e: + logger.error(u'Failed to update cookie in config file: %s', str(e)) + raise diff --git a/weibo_spider/spider.py b/weibo_spider/spider.py index e0d2e41e..f08dc74d 100644 --- a/weibo_spider/spider.py +++ b/weibo_spider/spider.py @@ -366,6 +366,7 @@ def _get_config(): config_path = FLAGS.config_path elif not os.path.isfile(config_path): shutil.copy(src, config_path) + config_util.update_cookie_config(config_path) logger.info(u'请先配置当前目录(%s)下的config.json文件,' u'如果想了解config.json参数的具体意义及配置方法,请访问\n' u'https://github.com/dataabc/weiboSpider#2程序设置' %