Skip to content

Commit 1d266ae

Browse files
committed
add xlsx export
1 parent 051640e commit 1d266ae

File tree

5 files changed

+121
-30
lines changed

5 files changed

+121
-30
lines changed

README.md

Lines changed: 55 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,59 @@
1-
# [LetPub](http://www.letpub.com.cn/index.php?page=grant)
2-
> 查询快,但频率会有限制
3-
> 目前数据只更新到了2019年,2020年数据和官网一样受限
1+
# 国家自然科学基金数据查询系统
42

3+
### 安装
4+
```bash
5+
pip3 install nsfc
6+
```
57

6-
# [MedSci](https://www.medsci.cn/sci/nsfc.do)
7-
> 查询不好(单个查询限制500条,资助类别分类不准确)
8-
> 有2020年数据(但不是很全)
8+
### 数据下载
9+
> 数据库文件较大,可通过百度网盘进行下载
10+
> ([下载链接](https://pan.baidu.com/s/1eadrfUg1ovBF1EAXWSTV-w) 提取码: `2nw5`)
11+
- 下载所需的数据库文件,如project.A.sqlite3, 或全部数据project.all.sqlite3
12+
- 保存至`nsfc`的安装路径下的`data`目录下, 如:`/path/to/site-packages/nsfc/data/project.db`
13+
- 或者保存至`HOME`路径下的`nsfc_data`目录下,如`~/nsfc_data/project.db`
914

15+
### 使用示例
16+
```bash
17+
# 查看帮助
18+
nsfc query
19+
```
1020

11-
# [NSFC](http://output.nsfc.gov.cn/)
12-
> 资助项目查询已失效,只能查询结题项目
13-
> 结题项目查询可用
21+
```bash
22+
# 列出可用的查询字段
23+
nsfc query -K
24+
```
25+
26+
```bash
27+
# 输出数量
28+
nsfc query -C
29+
```
30+
31+
```bash
32+
# 按批准年份查询
33+
nsfc query -C -s approval_year 2019
34+
```
35+
36+
```bash
37+
# 按批准年份+学科代码(模糊)
38+
nsfc query -C -s approval_year 2019 -s subject_code "%A%"
39+
```
40+
41+
```bash
42+
# 批准年份也可以是一个区间
43+
nsfc query -C -s approval_year 2015-2019 -s subject_code "%C01%"
44+
```
45+
46+
```bash
47+
# 结果输出为.jl文件
48+
nsfc query -s approval_year 2019 -s subject_code "%C0501%" -o C0501.2019.jl
49+
```
50+
51+
```bash
52+
# 结果输出为tsv文件
53+
nsfc query -s approval_year 2019 -s subject_code "%C0501%" -o C0501.2019.jl -F tsv
54+
```
55+
56+
```bash
57+
# 限制最大输出条数
58+
nsfc query -L 5 -s approval_year 2019
59+
```

help.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
### 数据来源
2+
#### [LetPub](http://www.letpub.com.cn/index.php?page=grant)
3+
> 查询快,但频率会有限制
4+
> 目前数据只更新到了2019年,2020年数据和官网一样受限
5+
6+
7+
#### [MedSci](https://www.medsci.cn/sci/nsfc.do)
8+
> 查询不好(单个查询限制500条,资助类别分类不准确)
9+
> 有2020年数据(但不是很全)
10+
11+
12+
#### [NSFC](http://output.nsfc.gov.cn/)
13+
> 官网
14+
> 资助项目查询已失效,只能查询结题项目
15+
> 结题项目查询可用

nsfc/bin/main.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,14 @@
88

99
CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
1010

11-
@click.group(help=version_info['desc'], context_settings=CONTEXT_SETTINGS)
12-
@click.version_option(version=version_info['version'])
11+
__epilog__ = click.style(f'''\
12+
contact: {version_info['author']} <{version_info['author_email']}>
13+
''', fg='cyan')
14+
15+
@click.group(help=click.style(version_info['desc'], bold=True, fg='green'),
16+
epilog=__epilog__,
17+
context_settings=CONTEXT_SETTINGS)
18+
@click.version_option(version=version_info['version'], prog_name=version_info['prog'])
1319
def cli(**kwargs):
1420
pass
1521

@@ -21,6 +27,5 @@ def main():
2127
cli()
2228

2329

24-
2530
if __name__ == '__main__':
2631
main()

nsfc/bin/query.py

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
import sys
33
import json
44

5+
import openpyxl
6+
from openpyxl.styles import Font, PatternFill
57
import click
68
from prettytable import PrettyTable
79
from simple_loggers import SimpleLogger
@@ -19,7 +21,7 @@
1921
nsfc query -C # 输出数量
2022
nsfc query -C -s approval_year 2019 # 按批准年份查询
2123
nsfc query -C -s approval_year 2019 -s subject_code "%A%" # 按批准年份+学科代码(模糊)
22-
nsfc query -C -s approval_year 2015-2019 -s subject_code "%C01%" # 批准年份可以是一个区间
24+
nsfc query -C -s approval_year 2015-2019 -s subject_code "%C01%" # 批准年份也可以是一个区间
2325
nsfc query -o A.2019.jl -s approval_year 2019 -s subject_code "%A%" # 结果输出为.jl文件
2426
nsfc query -F tsv -o A.2019.tsv -s approval_year 2019 -s subject_code "%A%" # 结果输出为tsv文件
2527
nsfc query -L 5 -s approval_year 2019 # 限制最大输出条数
@@ -36,7 +38,7 @@
3638
@click.option('-o', '--outfile', help='the output filename')
3739

3840
@click.option('-F', '--format', help='the format of output',
39-
type=click.Choice(['json', 'jl', 'tsv']), default='jl',
41+
type=click.Choice(['json', 'jl', 'tsv', 'xlsx']), default='jl',
4042
show_choices=True, show_default=True)
4143
@click.option('-K', '--keys', help='list the available keys for query', is_flag=True)
4244
@click.option('-C', '--count', help='just output the out of searching', is_flag=True)
@@ -101,22 +103,42 @@ def main(**kwargs):
101103
elif not query.count():
102104
logger.warning('no result for your input')
103105
else:
104-
out = open(outfile, 'w') if outfile else sys.stdout
105-
with out:
106-
if kwargs['format'] == 'json':
107-
data = [{k: v for k, v in row.__dict__.items() if k != '_sa_instance_state'} for row in query]
108-
out.write(json.dumps(data, ensure_ascii=False, indent=2) + '\n')
109-
else:
110-
for n, row in enumerate(query):
111-
context = {k: v for k, v in row.__dict__.items() if k != '_sa_instance_state'}
112-
if n == 0 and kwargs['format'] == 'tsv':
113-
title = '\t'.join(context.keys())
114-
out.write(title + '\n')
115-
if kwargs['format'] == 'tsv':
116-
line = '\t'.join(map(str, context.values()))
117-
else:
118-
line = json.dumps(context, ensure_ascii=False)
119-
out.write(line + '\n')
106+
if outfile and kwargs['format'] == 'xlsx':
107+
wb = openpyxl.Workbook()
108+
ws = wb.active
109+
ws.title = 'NSFC-RESULT'
110+
title = [k for k, v in query.first().__dict__.items() if k != '_sa_instance_state']
111+
ws.append(title)
112+
for col, v in enumerate(title,1 ):
113+
_ = ws.cell(row=1, column=col, value=v)
114+
_.font = Font(color='FFFFFF', bold=True)
115+
_.fill = PatternFill(start_color='000000', end_color='000000', fill_type='solid')
116+
117+
for n, row in enumerate(query):
118+
context = [v for k, v in row.__dict__.items() if k != '_sa_instance_state']
119+
ws.append(context)
120+
121+
ws.freeze_panes = 'A2'
122+
wb.save(outfile)
123+
else:
124+
out = open(outfile, 'w') if outfile else sys.stdout
125+
with out:
126+
if kwargs['format'] == 'json':
127+
data = [{k: v for k, v in row.__dict__.items() if k != '_sa_instance_state'} for row in query]
128+
out.write(json.dumps(data, ensure_ascii=False, indent=2) + '\n')
129+
else:
130+
for n, row in enumerate(query):
131+
context = {k: v for k, v in row.__dict__.items() if k != '_sa_instance_state'}
132+
if n == 0 and kwargs['format'] == 'tsv':
133+
title = '\t'.join(context.keys())
134+
out.write(title + '\n')
135+
if kwargs['format'] == 'tsv':
136+
line = '\t'.join(map(str, context.values()))
137+
else:
138+
line = json.dumps(context, ensure_ascii=False)
139+
out.write(line + '\n')
140+
if outfile:
141+
logger.info(f'save file: {outfile}')
120142

121143

122144
if __name__ == '__main__':

requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
click
2+
openpyxl
3+
requests
24
sqlalchemy
5+
prettytable
36
webrequests
47
simple-loggers

0 commit comments

Comments
 (0)