1
+ # coding = utf-8
2
+ """
3
+ @author: zhou
4
+ @time:2019/10/15 10:37
5
+ @File: analyse.py
6
+ """
7
+
8
+ from pymongo import MongoClient
9
+ import pandas as pd
10
+ from pyecharts .charts import Bar , Pie
11
+ from pyecharts import options as opts
12
+ import re
13
+ from wordcloud import WordCloud
14
+ import jieba
15
+ from PIL import Image
16
+ import numpy as np
17
+
18
+ # 链接 mongodb 获取数据
19
+ conn = MongoClient (
"mongodb://%s:%[email protected] :49974/you163" % (
'you163' ,
'you163' ))
20
+ db = conn .you163
21
+ mongo_collection = db .you163
22
+
23
+ # 将数据转换成 DataFrame
24
+ data = pd .DataFrame (list (mongo_collection .find ()))
25
+
26
+ # 获取颜色和尺寸
27
+ skuinfo = data ['skuInfo' ]
28
+
29
+ color = []
30
+ cup_size = []
31
+ for i in skuinfo .values .tolist ():
32
+ temp_cup = i [1 ].split (':' )
33
+ if temp_cup [0 ] == '杯码' :
34
+ temp_size = temp_cup [1 ]
35
+ if '(' in temp_size :
36
+ size = temp_size .split ('(' )[0 ]
37
+ cup_size .append (size )
38
+ else :
39
+ cup_size .append (temp_size )
40
+ temp_color = i [0 ].split (':' )[1 ]
41
+ color .append (temp_color )
42
+
43
+ df = pd .DataFrame (color , columns = ['color' ])
44
+ analyse_color = df ['color' ].value_counts ()
45
+
46
+ # 颜色可视化
47
+ bar = Bar ()
48
+ bar .add_xaxis (analyse_color .index .values .tolist ())
49
+ bar .add_yaxis ("" , analyse_color .values .tolist ())
50
+ bar .set_global_opts (
51
+ xaxis_opts = opts .AxisOpts (axislabel_opts = opts .LabelOpts (rotate = - 30 )),
52
+ title_opts = opts .TitleOpts (title = "颜色分布" ),
53
+ # datazoom_opts=opts.DataZoomOpts(),
54
+ )
55
+ bar .render_notebook ()
56
+
57
+ pie = Pie ()
58
+ pie .add ("" , [list (z ) for z in zip (analyse_color .index .values .tolist (), analyse_color .values .tolist ())],
59
+ radius = ["30%" , "75%" ],
60
+ center = ["50%" , "50%" ],
61
+ rosetype = "area" )
62
+ pie .set_series_opts (label_opts = opts .LabelOpts (formatter = "{b}: {d}%" ))
63
+ pie .set_global_opts (
64
+ title_opts = opts .TitleOpts (title = "各颜色占比" ),
65
+ legend_opts = opts .LegendOpts (
66
+ type_ = "scroll" , pos_left = "80%" , orient = "vertical"
67
+ )
68
+ )
69
+ pie .render_notebook ()
70
+
71
+ # 尺寸可视化
72
+ rege = r'\d'
73
+ cup_size_new = []
74
+ for i in cup_size :
75
+ check = re .match (rege , i )
76
+ if check :
77
+ cup_size_new .append (i )
78
+ else :
79
+ tmp1 = i [0 ]
80
+ tmp2 = i [1 :]
81
+ i = tmp2 + tmp1
82
+ cup_size_new .append (i )
83
+ df2 = pd .DataFrame (cup_size_new , columns = ['size' ])
84
+ analyse_size = df2 ['size' ].value_counts ()
85
+
86
+ bar = Bar ()
87
+ bar .add_xaxis (analyse_size .index .values .tolist ())
88
+ bar .add_yaxis ("" , analyse_size .values .tolist ())
89
+ bar .set_global_opts (
90
+ xaxis_opts = opts .AxisOpts (axislabel_opts = opts .LabelOpts (rotate = - 30 )),
91
+ title_opts = opts .TitleOpts (title = "尺寸分布" ),
92
+ # datazoom_opts=opts.DataZoomOpts(),
93
+ )
94
+ bar .render_notebook ()
95
+
96
+ # 星级分布
97
+ star = data ['star' ].value_counts ()
98
+ print (star )
99
+
100
+ # 评论词语
101
+ low_star_content = data [data ['star' ] == 5 ]['content' ]
102
+ stopworld = ('这' , '那' , '你' , '我' , '他' , '她' , '它' )
103
+
104
+ font = r'C:\Windows\Fonts\FZSTK.TTF'
105
+
106
+
107
+ def gen_wordcloud (data , pic , world_pic ):
108
+ tmpstr = ''
109
+ for i in range (len (data ) - 1 ):
110
+ tmpstr += data [i ]
111
+ pseg = jieba .lcut (tmpstr )
112
+ cut_word = ''
113
+ for i in pseg :
114
+ if i not in stopworld :
115
+ cut_word += i
116
+ img = Image .open (pic )
117
+ img_array = np .array (img )
118
+ wc = WordCloud (width = 1800 , height = 1500 , background_color = 'white' , font_path = font , mask = img_array )
119
+ wc .generate (cut_word )
120
+ wc .to_file (world_pic )
121
+
122
+ # 产生词语
123
+ gen_wordcloud (low_star_content .values .tolist (), 'money.jpg' , 'data_wc.png' )
0 commit comments