Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update: improved retrieval of cookies workflow #616

Merged
merged 1 commit into from
Dec 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 24 additions & 9 deletions weibo_spider/config_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,29 +174,44 @@ def add_user_uri_list(user_config_file_path, user_uri_list):
user_uri_list[0] = '\n' + user_uri_list[0]
with codecs.open(user_config_file_path, 'a', encoding='utf-8') as f:
f.write('\n'.join(user_uri_list))

def get_cookie():
"""Get weibo.cn cookie from Chrome browser"""
try:
chrome_cookies = browser_cookie3.chrome(domain_name='weibo.cn')
cookies_dict = {cookie.name: cookie.value for cookie in chrome_cookies}
cookie_string = '; '.join(f'{name}={value}' for name, value in cookies_dict.items())
return cookie_string
return cookies_dict
except Exception as e:
logger.error(u'Failed to obtain weibo.cn cookie from Chrome browser: %s', str(e))
raise

def update_cookie_config(user_config_file_path):
"Update cookie in config.json"
def update_cookie_config(cookie, user_config_file_path):
"""Update cookie in config.json"""
if not user_config_file_path:
user_config_file_path = os.getcwd() + os.sep + 'config.json'
try:
cookie = get_cookie()
with codecs.open(user_config_file_path, 'r', encoding='utf-8') as f:
config = json.load(f)
config['cookie'] = cookie
with codecs.open(user_config_file_path, 'w', encoding='utf-8') as f:
json.dump(config, f, indent=4, ensure_ascii=False)

cookie_string = '; '.join(f'{name}={value}' for name, value in cookie.items())

if config['cookie'] != cookie_string:
config['cookie'] = cookie_string
with codecs.open(user_config_file_path, 'w', encoding='utf-8') as f:
json.dump(config, f, indent=4, ensure_ascii=False)
except Exception as e:
logger.error(u'Failed to update cookie in config file: %s', str(e))
raise

def check_cookie(user_config_file_path):
"""Checks if user is logged in"""
try:
cookie = get_cookie()
if cookie["MLOGIN"] == '0':
logger.warning("使用 Chrome 在此登录 %s", "https://passport.weibo.com/sso/signin?entry=wapsso&source=wapssowb&url=https://m.weibo.cn/")
sys.exit()
else:
update_cookie_config(cookie, user_config_file_path)
except Exception as e:
logger.error(u'Check for cookie failed: %s', str(e))
raise
2 changes: 1 addition & 1 deletion weibo_spider/spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,14 +366,14 @@ def _get_config():
config_path = FLAGS.config_path
elif not os.path.isfile(config_path):
shutil.copy(src, config_path)
config_util.update_cookie_config(config_path)
logger.info(u'请先配置当前目录(%s)下的config.json文件,'
u'如果想了解config.json参数的具体意义及配置方法,请访问\n'
u'https://github.com/dataabc/weiboSpider#2程序设置' %
os.getcwd())
sys.exit()
try:
with open(config_path) as f:
config_util.check_cookie(config_path)
config = json.loads(f.read())
return config
except ValueError:
Expand Down
Loading