add xlsx export

suqingdong · suqingdong · commit 1d266ae3d589 · 2021-04-13T12:56:55.000+08:00
diff --git a/README.md b/README.md
@@ -1,13 +1,59 @@
-# [LetPub](http://www.letpub.com.cn/index.php?page=grant)
-> 查询快，但频率会有限制
-> 目前数据只更新到了2019年，2020年数据和官网一样受限
+# 国家自然科学基金数据查询系统
 
+### 安装
+```bash
+pip3 install nsfc
+```
 
-# [MedSci](https://www.medsci.cn/sci/nsfc.do)
-> 查询不好(单个查询限制500条，资助类别分类不准确)
-> 有2020年数据（但不是很全）
+### 数据下载
+> 数据库文件较大，可通过百度网盘进行下载
+> ([下载链接](https://pan.baidu.com/s/1eadrfUg1ovBF1EAXWSTV-w) 提取码: `2nw5`)
+- 下载所需的数据库文件，如project.A.sqlite3, 或全部数据project.all.sqlite3
+- 保存至`nsfc`的安装路径下的`data`目录下, 如：`/path/to/site-packages/nsfc/data/project.db`
+- 或者保存至`HOME`路径下的`nsfc_data`目录下，如`~/nsfc_data/project.db`
 
+### 使用示例
+```bash
+# 查看帮助
+nsfc query
+```
 
-# [NSFC](http://output.nsfc.gov.cn/)
-> 资助项目查询已失效，只能查询结题项目
-> 结题项目查询可用
+```bash
+# 列出可用的查询字段
+nsfc query -K
+```
+
+```bash
+# 输出数量
+nsfc query -C
+```
+
+```bash
+# 按批准年份查询
+nsfc query -C -s approval_year 2019
+```
+
+```bash
+# 按批准年份+学科代码(模糊)
+nsfc query -C -s approval_year 2019 -s subject_code "%A%"
+```
+
+```bash
+# 批准年份也可以是一个区间
+nsfc query -C -s approval_year 2015-2019 -s subject_code "%C01%"
+```
+
+```bash
+# 结果输出为.jl文件
+nsfc query -s approval_year 2019 -s subject_code "%C0501%" -o C0501.2019.jl
+```
+
+```bash
+# 结果输出为tsv文件
+nsfc query -s approval_year 2019 -s subject_code "%C0501%" -o C0501.2019.jl -F tsv
+```
+
+```bash
+# 限制最大输出条数
+nsfc query -L 5 -s approval_year 2019                                           
+```
diff --git a/help.md b/help.md
@@ -0,0 +1,15 @@
+### 数据来源
+#### [LetPub](http://www.letpub.com.cn/index.php?page=grant)
+> 查询快，但频率会有限制
+> 目前数据只更新到了2019年，2020年数据和官网一样受限
+
+
+#### [MedSci](https://www.medsci.cn/sci/nsfc.do)
+> 查询不好(单个查询限制500条，资助类别分类不准确)
+> 有2020年数据（但不是很全）
+
+
+#### [NSFC](http://output.nsfc.gov.cn/)
+> 官网
+> 资助项目查询已失效，只能查询结题项目
+> 结题项目查询可用
diff --git a/nsfc/bin/main.py b/nsfc/bin/main.py
@@ -8,8 +8,14 @@
 
 CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
 
-@click.group(help=version_info['desc'], context_settings=CONTEXT_SETTINGS)
-@click.version_option(version=version_info['version'])
+__epilog__ = click.style(f'''\
+contact: {version_info['author']} <{version_info['author_email']}>
+''', fg='cyan')
+
+@click.group(help=click.style(version_info['desc'], bold=True, fg='green'),
+             epilog=__epilog__,
+             context_settings=CONTEXT_SETTINGS)
+@click.version_option(version=version_info['version'], prog_name=version_info['prog'])
 def cli(**kwargs):
     pass
 
@@ -21,6 +27,5 @@ def main():
     cli()
 
 
-
 if __name__ == '__main__':
     main()
diff --git a/nsfc/bin/query.py b/nsfc/bin/query.py
@@ -2,6 +2,8 @@
 import sys
 import json
 
+import openpyxl
+from openpyxl.styles import Font, PatternFill
 import click
 from prettytable import PrettyTable
 from simple_loggers import SimpleLogger
@@ -19,7 +21,7 @@
     nsfc query -C                                                                   # 输出数量
     nsfc query -C -s approval_year 2019                                             # 按批准年份查询
     nsfc query -C -s approval_year 2019 -s subject_code "%A%"                       # 按批准年份+学科代码(模糊)
-    nsfc query -C -s approval_year 2015-2019 -s subject_code "%C01%"                # 批准年份可以是一个区间
+    nsfc query -C -s approval_year 2015-2019 -s subject_code "%C01%"                # 批准年份也可以是一个区间
     nsfc query -o A.2019.jl -s approval_year 2019 -s subject_code "%A%"             # 结果输出为.jl文件
     nsfc query -F tsv -o A.2019.tsv -s approval_year 2019 -s subject_code "%A%"     # 结果输出为tsv文件
     nsfc query -L 5 -s approval_year 2019                                           # 限制最大输出条数
@@ -36,7 +38,7 @@
 @click.option('-o', '--outfile', help='the output filename')
 
 @click.option('-F', '--format', help='the format of output',
-              type=click.Choice(['json', 'jl', 'tsv']), default='jl',
+              type=click.Choice(['json', 'jl', 'tsv', 'xlsx']), default='jl',
               show_choices=True, show_default=True)
 @click.option('-K', '--keys', help='list the available keys for query', is_flag=True)
 @click.option('-C', '--count', help='just output the out of searching', is_flag=True)
@@ -101,22 +103,42 @@ def main(**kwargs):
         elif not query.count():
             logger.warning('no result for your input')
         else:
-            out = open(outfile, 'w') if outfile else sys.stdout
-            with out:
-                if kwargs['format'] == 'json':
-                    data = [{k: v for k, v in row.__dict__.items() if k != '_sa_instance_state'} for row in query]
-                    out.write(json.dumps(data, ensure_ascii=False, indent=2) + '\n')
-                else:
-                    for n, row in enumerate(query):
-                        context = {k: v for k, v in row.__dict__.items() if k != '_sa_instance_state'}
-                        if n == 0 and kwargs['format'] == 'tsv':
-                            title = '\t'.join(context.keys())
-                            out.write(title + '\n')
-                        if kwargs['format'] == 'tsv':
-                            line = '\t'.join(map(str, context.values()))
-                        else:
-                            line = json.dumps(context, ensure_ascii=False)
-                        out.write(line + '\n')
+            if outfile and kwargs['format'] == 'xlsx':
+                wb = openpyxl.Workbook()
+                ws = wb.active
+                ws.title = 'NSFC-RESULT'
+                title = [k for k, v in query.first().__dict__.items() if k != '_sa_instance_state']
+                ws.append(title)
+                for col, v in enumerate(title,1 ):
+                    _ = ws.cell(row=1, column=col, value=v)
+                    _.font = Font(color='FFFFFF', bold=True)
+                    _.fill = PatternFill(start_color='000000', end_color='000000', fill_type='solid')
+
+                for n, row in enumerate(query):
+                    context = [v for k, v in row.__dict__.items() if k != '_sa_instance_state']
+                    ws.append(context)
+
+                ws.freeze_panes = 'A2'
+                wb.save(outfile)
+            else:
+                out = open(outfile, 'w') if outfile else sys.stdout
+                with out:
+                    if kwargs['format'] == 'json':
+                        data = [{k: v for k, v in row.__dict__.items() if k != '_sa_instance_state'} for row in query]
+                        out.write(json.dumps(data, ensure_ascii=False, indent=2) + '\n')
+                    else:
+                        for n, row in enumerate(query):
+                            context = {k: v for k, v in row.__dict__.items() if k != '_sa_instance_state'}
+                            if n == 0 and kwargs['format'] == 'tsv':
+                                title = '\t'.join(context.keys())
+                                out.write(title + '\n')
+                            if kwargs['format'] == 'tsv':
+                                line = '\t'.join(map(str, context.values()))
+                            else:
+                                line = json.dumps(context, ensure_ascii=False)
+                            out.write(line + '\n')
+            if outfile:
+                logger.info(f'save file: {outfile}')
 
 
 if __name__ == '__main__':
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,7 @@
 click
+openpyxl
+requests
 sqlalchemy
+prettytable
 webrequests
 simple-loggers