Skip to content

Commit 4f7fee4

Browse files
committed
add job code
1 parent 29ce7ee commit 4f7fee4

File tree

6 files changed

+108
-2
lines changed

6 files changed

+108
-2
lines changed

.idea/workspace.xml

Lines changed: 3 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

boss_spider/analyse.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
# coding = utf-8
2+
"""
3+
@author: zhou
4+
@time:2019/10/11 15:40
5+
@File: analyse.py
6+
"""
7+
8+
from pymongo import MongoClient
9+
import pandas as pd
10+
from pyecharts.charts import Bar
11+
from pyecharts import options as opts
12+
from wordcloud import WordCloud
13+
import jieba
14+
from PIL import Image
15+
import numpy as np
16+
17+
18+
job_conn = MongoClient("mongodb://%s:%[email protected]:51612/boss" % ('boss', 'boss123'))
19+
job_db = job_conn.boss
20+
job_collection = job_db.boss
21+
details_collection = job_db.job_details
22+
23+
job = pd.DataFrame(list(job_collection.find()))
24+
job.to_csv("job.csv", encoding='utf-8')
25+
26+
job_detail = pd.DataFrame(list(details_collection.find()))
27+
job_detail.to_csv('job_detail.csv', encoding='utf-8')
28+
29+
# 薪资水平
30+
salary_distribute = job['salary'].value_counts()
31+
bar = Bar()
32+
bar.add_xaxis(salary_distribute.index.values.tolist()[:10])
33+
bar.add_yaxis("", salary_distribute.values.tolist()[:10])
34+
bar.set_global_opts(
35+
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),
36+
title_opts=opts.TitleOpts(title="薪资分布", subtitle="薪资分布前10"),
37+
)
38+
bar.render_notebook()
39+
40+
# 高薪企业
41+
height_salary = job[job['salary'] == '30-60K']
42+
height_salary_year_edu = height_salary[['name', 'salary', 'year', 'edu']]
43+
44+
# 职位要求
45+
height_salary_index = height_salary.index.values.tolist()
46+
details = []
47+
for index in height_salary_index:
48+
detail = job_detail[job_detail.index == index]['details']
49+
details.append(detail.values.tolist()[0])
50+
print(details)
51+
52+
53+
# 词云
54+
stopworld = ('职位', '描述', '岗位职责', '岗位', '任职', '要求', '分项')
55+
font = r'C:\Windows\Fonts\FZSTK.TTF'
56+
def gen_wordcloud(data, pic, world_pic):
57+
tmpstr = ''
58+
for i in range(len(data) - 1):
59+
tmpstr += data[i]
60+
pseg = jieba.lcut(tmpstr)
61+
cut_word = ''
62+
for i in pseg:
63+
if i not in stopworld:
64+
cut_word += i
65+
img = Image.open(pic)
66+
img_array = np.array(img)
67+
wc = WordCloud(width=1800, height=1500, background_color='white', font_path=font, mask=img_array)
68+
wc.generate(cut_word)
69+
wc.to_file(world_pic)
70+
71+
72+
gen_wordcloud(details, 'money.jpg', 'money_wc.png')
73+
74+
# 工作年限
75+
year_distribute = job['year'].value_counts()
76+
bar = Bar()
77+
bar.add_xaxis(year_distribute.index.values.tolist()[:10])
78+
bar.add_yaxis("", year_distribute.values.tolist()[:10])
79+
bar.set_global_opts(
80+
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),
81+
title_opts=opts.TitleOpts(title="工龄分布"),
82+
# datazoom_opts=opts.DataZoomOpts(),
83+
)
84+
bar.render_notebook()
85+
86+
# 学历要求
87+
edu_distribute = job['edu'].value_counts()
88+
bar = Bar()
89+
bar.add_xaxis(edu_distribute.index.values.tolist()[:10])
90+
bar.add_yaxis("", edu_distribute.values.tolist()[:10])
91+
bar.set_global_opts(
92+
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),
93+
title_opts=opts.TitleOpts(title="学历分布"),
94+
# datazoom_opts=opts.DataZoomOpts(),
95+
)
96+
bar.render_notebook()
97+
98+
99+
# 招生硕士企业
100+
height_edu = job[job['edu'] == '硕士']
101+
height_edu = height_edu[['name', 'salary', 'year', 'edu']]
102+
103+
# 所有工作描述词云
104+
all_job_detail = job_detail['details'].values.tolist()
105+
gen_wordcloud(all_job_detail, 'job.jpg', 'fulljob_wc.png')

boss_spider/fulljob_wc.png

81.6 KB
Loading

boss_spider/job.jpg

23.5 KB
Loading

boss_spider/money.jpg

30.4 KB
Loading

boss_spider/money_wc.png

251 KB
Loading

0 commit comments

Comments
 (0)