-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathqsbk.py
executable file
·68 lines (54 loc) · 2.22 KB
/
qsbk.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# _*_coding:utf-8 -*-
import urllib
import urllib.request
import urllib.parse
import re
import urllib.error
import http.cookiejar
__author__ = "muzp"
page = 2
url = 'https://www.qiushibaike.com/hot/page/' + str(page)
user_agent = 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
headers ={'User-Agent': user_agent}
try:
request = urllib.request.Request(url, headers=headers)
response = urllib.request.urlopen(request)
content = response.read().decode("utf-8")
pattern = re.compile('''<div class="author clearfix">.*?<h2>(.*?)</h2>'''+
'''.*?<a href="(.*?)"''' +
'''.*?<span>(.*?)</span>'''+
'''(.*?)</div>'''+
'''.*?<!-- 图片或gif -->(.*?)<div class="stats">''' +
'''.*?<i.*?number">(.*?)</i>''', re.S)
items = re.findall(pattern, content)
for item in items:
haveImg = re.search("img", item[4])
havere = re.search("查看全文",item[3])
temp =""
if havere:
url1 ="https://www.qiushibaike.com"+item[1]
print(url1)
request1 = urllib.request.Request(url1, headers=headers)
response1 = urllib.request.urlopen(request1)
content1 = response1.read().decode("utf-8")
pattern1 = re.compile('<div class="content">(.*?)</div>(.*?)</div>', re.S)
items1 = re.findall(pattern1, content1)
for item1 in items1:
haveImg1 = re.search("img", item1[1])
if not haveImg1:
haveImg = None
temp = item1[0]
else:
haveImg = True
if not haveImg:
print("作者:"+item[0].strip())
if not havere:
print("内容:"+item[2].strip())
else:
print("内容:" + temp.strip())
print("点赞数:"+item[5].strip()+"\n")
except urllib.request.URLError as e:
if(hasattr(e,"code")):
print(e.code)
if(hasattr(e,'reason')):
print(e.reason)