Skip to content

Commit 2b0f33a

Browse files
committed
init
0 parents  commit 2b0f33a

23 files changed

+654
-0
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
*pyc
2+
*__pycache__
3+
.vscode

LICENSE

Whitespace-only changes.

MANIFEST.in

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
include requirements.txt
2+
include nsfc/version/version.json
3+
include nsfc/data/*.db

README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# [LetPub](http://www.letpub.com.cn/index.php?page=grant)
2+
> 查询快,但频率会有限制
3+
> 目前数据只更新到了2019年,2020年数据和官网一样受限
4+
5+
6+
# [MedSci](https://www.medsci.cn/sci/nsfc.do)
7+
> 查询不好(单个查询限制500条,资助类别分类不准确)
8+
> 有2020年数据(不知道全不全)
9+
10+
11+
# [NSFC](http://output.nsfc.gov.cn/)
12+
> 资助项目查询已失效,只能查询结题项目

build.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
find . -name __pycache__ -exec rm -rf {} \;
2+
3+
rm -rf *.egg-info build dist
4+

nsfc/__init__.py

Whitespace-only changes.

nsfc/bin/__init__.py

Whitespace-only changes.

nsfc/bin/build.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
from pprint import pprint
2+
3+
import click
4+
5+
from nsfc.db.model import Project
6+
from nsfc.db.manager import Manager
7+
from nsfc.src.official import Official
8+
from nsfc.util.parse_data import parse
9+
10+
11+
@click.command()
12+
@click.option('-i', '--infile', help='the input filename', required=True)
13+
@click.option('-d', '--dbfile', help='the path of database file', default='project.db')
14+
@click.option('--echo', help='turn echo on for sqlalchemy', is_flag=True)
15+
@click.option('--drop', help='drop table before creating', is_flag=True)
16+
def build(**kwargs):
17+
print(kwargs)
18+
uri = 'sqlite:///{dbfile}'.format(**kwargs)
19+
with Manager(uri=uri, echo=kwargs['echo'], drop=kwargs['drop']) as m:
20+
for data in parse(kwargs['infile']):
21+
22+
query_result = m.query(Project, 'project_id', data['project_id']).first()
23+
if query_result:
24+
print('*** skip ***', query_result)
25+
continue
26+
27+
project = Project(**data)
28+
29+
conc_data = Official.get_conclusion_data(data['project_id'])
30+
if conc_data:
31+
project.finished = True
32+
project.project_type_code = conc_data.get('projectType')
33+
project.abstract = conc_data.get('projectAbstractC')
34+
project.abstract_en = conc_data.get('projectAbstractE')
35+
project.abstract_conc = conc_data.get('conclusionAbstract')
36+
project.keyword = conc_data.get('projectKeywordC')
37+
project.keyword_en = conc_data.get('projectKeywordE')
38+
project.result_stat = conc_data.get('result_stat')
39+
40+
pprint(project.as_dict)
41+
m.insert(Project, 'project_id', project)
42+
break
43+
44+
45+
if __name__ == '__main__':
46+
build()

nsfc/bin/main.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import os
2+
import json
3+
4+
import click
5+
6+
from nsfc.src.letpub import LetPub
7+
from nsfc.src.official import Official
8+
9+
from nsfc.db.model import Project
10+
from nsfc.db.manager import Manager
11+
from nsfc.util.parse_data import parse
12+
13+
14+
@click.command()
15+
@click.option('-y', '--year', help='the year of searching', required=True)
16+
@click.option('-c', '--code', help='the code of subject', required=True)
17+
@click.option('-O', '--outdir', help='the output directory')
18+
def main(**kwargs):
19+
year = kwargs['year']
20+
code = kwargs['code']
21+
letpub = LetPub()
22+
outdir = kwargs['outdir'] or os.path.join('done', code[0], str(year))
23+
if not os.path.exists(outdir):
24+
os.makedirs(outdir)
25+
done = {each.rsplit('.', 1)[0]: 1 for each in os.listdir(outdir)}
26+
27+
for code in sorted(letpub.list_codes[code]):
28+
if f'{code}.{year}' in done:
29+
continue
30+
outfile = f'{outdir}/{code}.{year}.jl'
31+
try:
32+
with open(outfile, 'w') as out:
33+
for context in letpub.search(code, start_year=year, end_year=year):
34+
line = json.dumps(context, ensure_ascii=False) + '\n'
35+
out.write(line)
36+
click.secho(f'save file: {outfile}')
37+
except KeyboardInterrupt:
38+
os.remove(outfile)
39+
break
40+
41+
42+
if __name__ == '__main__':
43+
main()
44+
45+
# root_codes = Official.list_root_codes()
46+
# child_codes = Official.list_child_codes('A')
47+
48+
49+
50+
51+
# done = {each.split('.')[0]: 1 for each in os.listdir('A')}
52+
53+
# year = 2019
54+
# for child in child_codes:
55+
# if child in done:
56+
# # print(f'{child} is done')
57+
# continue
58+
# with open(f'{child}.{year}.jl', 'w') as out:
59+
# for context in letpub.search(child, start_year=year, end_year=year):
60+
# line = json.dumps(context, ensure_ascii=False) + '\n'
61+
# out.write(line)

nsfc/db/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)