Skip to content

Commit 5c51740

Browse files
committed
add 163 code
1 parent 4f7fee4 commit 5c51740

File tree

4 files changed

+134
-1
lines changed

4 files changed

+134
-1
lines changed

.idea/workspace.xml

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

boss_spider/analyse.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,13 @@
1414
from PIL import Image
1515
import numpy as np
1616

17+
import pymysql
18+
19+
db = pymysql.connect()
20+
cursor = db.cursor()
21+
cursor.execute()
22+
23+
1724

1825
job_conn = MongoClient("mongodb://%s:%[email protected]:51612/boss" % ('boss', 'boss123'))
1926
job_db = job_conn.boss

you163_spider/analyse.py

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
# coding = utf-8
2+
"""
3+
@author: zhou
4+
@time:2019/10/15 10:37
5+
@File: analyse.py
6+
"""
7+
8+
from pymongo import MongoClient
9+
import pandas as pd
10+
from pyecharts.charts import Bar, Pie
11+
from pyecharts import options as opts
12+
import re
13+
from wordcloud import WordCloud
14+
import jieba
15+
from PIL import Image
16+
import numpy as np
17+
18+
# 链接 mongodb 获取数据
19+
conn = MongoClient("mongodb://%s:%[email protected]:49974/you163" % ('you163', 'you163'))
20+
db = conn.you163
21+
mongo_collection = db.you163
22+
23+
# 将数据转换成 DataFrame
24+
data = pd.DataFrame(list(mongo_collection.find()))
25+
26+
# 获取颜色和尺寸
27+
skuinfo = data['skuInfo']
28+
29+
color = []
30+
cup_size = []
31+
for i in skuinfo.values.tolist():
32+
temp_cup = i[1].split(':')
33+
if temp_cup[0] == '杯码':
34+
temp_size = temp_cup[1]
35+
if '(' in temp_size:
36+
size = temp_size.split('(')[0]
37+
cup_size.append(size)
38+
else:
39+
cup_size.append(temp_size)
40+
temp_color = i[0].split(':')[1]
41+
color.append(temp_color)
42+
43+
df = pd.DataFrame(color, columns=['color'])
44+
analyse_color = df['color'].value_counts()
45+
46+
# 颜色可视化
47+
bar = Bar()
48+
bar.add_xaxis(analyse_color.index.values.tolist())
49+
bar.add_yaxis("", analyse_color.values.tolist())
50+
bar.set_global_opts(
51+
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),
52+
title_opts=opts.TitleOpts(title="颜色分布"),
53+
# datazoom_opts=opts.DataZoomOpts(),
54+
)
55+
bar.render_notebook()
56+
57+
pie = Pie()
58+
pie.add("", [list(z) for z in zip(analyse_color.index.values.tolist(), analyse_color.values.tolist())],
59+
radius=["30%", "75%"],
60+
center=["50%", "50%"],
61+
rosetype="area")
62+
pie.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {d}%"))
63+
pie.set_global_opts(
64+
title_opts=opts.TitleOpts(title="各颜色占比"),
65+
legend_opts=opts.LegendOpts(
66+
type_="scroll", pos_left="80%", orient="vertical"
67+
)
68+
)
69+
pie.render_notebook()
70+
71+
# 尺寸可视化
72+
rege = r'\d'
73+
cup_size_new = []
74+
for i in cup_size:
75+
check = re.match(rege, i)
76+
if check:
77+
cup_size_new.append(i)
78+
else:
79+
tmp1 = i[0]
80+
tmp2 = i[1:]
81+
i = tmp2 + tmp1
82+
cup_size_new.append(i)
83+
df2 = pd.DataFrame(cup_size_new, columns=['size'])
84+
analyse_size = df2['size'].value_counts()
85+
86+
bar = Bar()
87+
bar.add_xaxis(analyse_size.index.values.tolist())
88+
bar.add_yaxis("", analyse_size.values.tolist())
89+
bar.set_global_opts(
90+
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),
91+
title_opts=opts.TitleOpts(title="尺寸分布"),
92+
# datazoom_opts=opts.DataZoomOpts(),
93+
)
94+
bar.render_notebook()
95+
96+
# 星级分布
97+
star = data['star'].value_counts()
98+
print(star)
99+
100+
# 评论词语
101+
low_star_content = data[data['star'] == 5]['content']
102+
stopworld = ('这', '那', '你', '我', '他', '她', '它')
103+
104+
font = r'C:\Windows\Fonts\FZSTK.TTF'
105+
106+
107+
def gen_wordcloud(data, pic, world_pic):
108+
tmpstr = ''
109+
for i in range(len(data) - 1):
110+
tmpstr += data[i]
111+
pseg = jieba.lcut(tmpstr)
112+
cut_word = ''
113+
for i in pseg:
114+
if i not in stopworld:
115+
cut_word += i
116+
img = Image.open(pic)
117+
img_array = np.array(img)
118+
wc = WordCloud(width=1800, height=1500, background_color='white', font_path=font, mask=img_array)
119+
wc.generate(cut_word)
120+
wc.to_file(world_pic)
121+
122+
# 产生词语
123+
gen_wordcloud(low_star_content.values.tolist(), 'money.jpg', 'data_wc.png')

you163_spider/money.jpg

30.4 KB
Loading

0 commit comments

Comments
 (0)