Skip to content

Commit e01d58c

Browse files
committed
first version
0 parents  commit e01d58c

File tree

5 files changed

+255
-0
lines changed

5 files changed

+255
-0
lines changed

.gitignore

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
2+
3+
# dependencies
4+
**/node_modules
5+
# roadhog-api-doc ignore
6+
/src/utils/request-temp.js
7+
_roadhog-api-doc
8+
9+
# production
10+
/dist
11+
/.vscode
12+
13+
# misc
14+
.DS_Store
15+
npm-debug.log*
16+
yarn-error.log
17+
18+
/coverage
19+
.idea
20+
yarn.lock
21+
package-lock.json
22+
*bak
23+
.vscode
24+
25+
# visual studio code
26+
.history
27+
*.log
28+
functions/*
29+
.temp/**
30+
31+
# umi
32+
.umi
33+
.umi-production
34+
35+
# screenshot
36+
screenshot
37+
.firebase
38+
.eslintcache
39+
40+
build

README.MD

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
### 功能描述:
2+
3+
将深圳市电子发票贴到成公司报销单模板中,并导出为pdf文件。
4+
5+
### 注意事项:
6+
7+
只能运行在windows上,且已经安装了ms office。
8+
9+
### 依赖:
10+
11+
- python 3.7
12+
- python-pptx 负责操作pptx
13+
- PyMuPDF 负责操作pdf
14+
- comtypes 负责调用系统已安装的ppt程序,导出pdf
15+
- PyPDF2用于合并多个pdf文件
16+
17+
### 安装:
18+
19+
1. 安装python3.7
20+
21+
2. 安装对应的第三方库
22+
23+
```
24+
pip install -r requirements.txt
25+
```
26+
27+
### 使用步骤:
28+
29+
1、将电子发票pdf文件放到"inputs"文件夹;
30+
31+
2、运行如下脚本,在"outputs"文件夹生成的pdf票据文件;
32+
33+
```shell
34+
python .\invoice_converter.py
35+
```
36+
37+
### 实现思路:
38+
39+
完全按照手动贴票的流程进行:
40+
41+
电子发票pdf------>PNG图片------>插入pptx模板(调整位置、大小)------>导出pdf文件
42+

invoice_converter.py

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
#!/usr/bin/env Python
2+
# coding=utf-8
3+
4+
import sys, datetime, fitz, os, codecs
5+
from pptx import Presentation
6+
from pptx.util import Cm
7+
import comtypes.client
8+
from PyPDF2 import PdfFileReader, PdfFileWriter, PdfFileMerger
9+
10+
invoicePath = './inputs'
11+
tempImagePath = './temp/images'
12+
tempPptxPath = './temp/pptx'
13+
templatePptxPath = './凭证粘贴模板.pptx'
14+
outPath = './outputs'
15+
16+
def pyMuPDF_fitz(pdfPath, imagePath):
17+
# startTime_pdf2img = datetime.datetime.now()#开始时间
18+
19+
# print("imagePath="+imagePath)
20+
baseName = os.path.basename(pdfPath)
21+
pdfName = os.path.splitext(baseName)[0]
22+
pdfDoc = fitz.open(pdfPath)
23+
24+
page = pdfDoc[0]
25+
rotate = int(0)
26+
# 每个尺寸的缩放系数为1.3,这将为我们生成分辨率提高2.6的图像。
27+
# 此处若是不做设置,默认图片大小为:792X612, dpi=96
28+
zoom_x = 2.5 #(1.33333333-->1056x816) (2-->1584x1224)
29+
zoom_y = 2.5
30+
mat = fitz.Matrix(zoom_x, zoom_y).preRotate(rotate)
31+
pix = page.getPixmap(matrix=mat, alpha=False)
32+
33+
if not os.path.exists(imagePath):#判断存放图片的文件夹是否存在
34+
os.makedirs(imagePath) # 若图片文件夹不存在就创建
35+
36+
pix.writePNG('%s/%s.png' % (imagePath, pdfName))#将图片写入指定的文件夹内
37+
38+
# for pg in range(pdfDoc.pageCount):
39+
# page = pdfDoc[pg]
40+
# rotate = int(0)
41+
# # 每个尺寸的缩放系数为1.3,这将为我们生成分辨率提高2.6的图像。
42+
# # 此处若是不做设置,默认图片大小为:792X612, dpi=96
43+
# zoom_x = 2.2 #(1.33333333-->1056x816) (2-->1584x1224)
44+
# zoom_y = 2.2
45+
# mat = fitz.Matrix(zoom_x, zoom_y).preRotate(rotate)
46+
# pix = page.getPixmap(matrix=mat, alpha=False)
47+
48+
# if not os.path.exists(imagePath):#判断存放图片的文件夹是否存在
49+
# os.makedirs(imagePath) # 若图片文件夹不存在就创建
50+
51+
# # pix.writePNG(imagePath+'/'+'images_%s.png' % pg)#将图片写入指定的文件夹内
52+
# pix.writePNG(imagePath+'/'+'%s_%s.png' % (pdfName, pg))#将图片写入指定的文件夹内
53+
54+
# endTime_pdf2img = datetime.datetime.now()#结束时间
55+
# print('pdf2img时间=',(endTime_pdf2img - startTime_pdf2img).seconds)
56+
57+
def batchPdf2Png(invoicePath, outPngPath):
58+
files = os.listdir(invoicePath)
59+
pdfFiles = [f for f in files if f.endswith((".pdf"))]
60+
for pdfFile in pdfFiles:
61+
fullpath = os.path.join(invoicePath, pdfFile)
62+
pyMuPDF_fitz(fullpath, outPngPath)
63+
64+
def insertPngInSlide(path_to_presentation, img_path):
65+
prs = Presentation(path_to_presentation)
66+
67+
slide = prs.slides[0]
68+
left, top, width, height= Cm(4.28), Cm(2.79), Cm(25.58), Cm(16.59)
69+
pic = slide.shapes.add_picture(img_path, left, top, height=height, width=width)
70+
prs.save('test.pptx')
71+
72+
def batchInsertPngInSlide(path_to_tmpl_presentation, imgsPath):
73+
left, top, width, height= Cm(4.28), Cm(2.79), Cm(25.58), Cm(16.59)
74+
75+
files = os.listdir(imgsPath)
76+
pngfiles = [f for f in files if f.endswith((".png"))]
77+
for pngfile in pngfiles:
78+
fullpath = os.path.join(imgsPath, pngfile)
79+
80+
prs = Presentation(path_to_tmpl_presentation)
81+
slide = prs.slides[0]
82+
pic = slide.shapes.add_picture(fullpath, left, top, height=height, width=width)
83+
pptxPath = os.path.join(tempPptxPath, os.path.splitext(pngfile)[0]+'.pptx')
84+
prs.save(pptxPath)
85+
86+
def init_powerpoint():
87+
powerpoint = comtypes.client.CreateObject("Powerpoint.Application")
88+
powerpoint.Visible = 1
89+
return powerpoint
90+
91+
def ppt_to_pdf(powerpoint, inputFileName, outputFileName, formatType = 32):
92+
if outputFileName[-3:] != 'pdf':
93+
outputFileName = outputFileName.replace(".pptx","").replace(".ppt","") + ".pdf"
94+
# print(inputFileName)
95+
deck = powerpoint.Presentations.Open(inputFileName)
96+
deck.SaveAs(outputFileName, formatType) # formatType = 32 for ppt to pdf
97+
deck.Close()
98+
# print('转换%s文件完成'%outputFileName)
99+
100+
def convert_files_in_folder(powerpoint, folder, outPath):
101+
files = os.listdir(folder)
102+
pptfiles = [f for f in files if f.endswith((".ppt", ".pptx"))]
103+
for pptfile in pptfiles:
104+
fullpath = os.path.join(folder, pptfile)
105+
pdfpath = os.path.join(outPath, os.path.splitext(pptfile)[0]+'.pdf')
106+
ppt_to_pdf(powerpoint, fullpath, pdfpath)
107+
108+
def del_file(path_data):
109+
for i in os.listdir(path_data) :# os.listdir(path_data)#返回一个列表,里面是当前目录下面的所有东西的相对路径
110+
file_data = path_data + "\\" + i#当前文件夹的下面的所有东西的绝对路径
111+
if os.path.isfile(file_data) == True:#os.path.isfile判断是否为文件,如果是文件,就删除.如果是文件夹.递归给del_file.
112+
os.remove(file_data)
113+
else:
114+
del_file(file_data)
115+
116+
def getfilenames(filepath='',filelist_out=[],file_ext='all'):
117+
# 遍历filepath下的所有文件,包括子目录下的文件
118+
for fpath, dirs, fs in os.walk(filepath):
119+
for f in fs:
120+
fi_d = os.path.join(fpath, f)
121+
if file_ext == 'all':
122+
filelist_out.append(fi_d)
123+
elif os.path.splitext(fi_d)[1] == file_ext:
124+
filelist_out.append(fi_d)
125+
else:
126+
pass
127+
return filelist_out
128+
129+
def mergefiles(path, output_filename, import_bookmarks=False):
130+
# 遍历目录下的所有pdf将其合并输出到一个pdf文件中,输出的pdf文件默认带书签,书签名为之前的文件名
131+
# 默认情况下原始文件的书签不会导入,使用import_bookmarks=True可以将原文件所带的书签也导入到输出的pdf文件中
132+
merger = PdfFileMerger()
133+
filelist = getfilenames(filepath=path, file_ext='.pdf')
134+
if len(filelist) == 0:
135+
print("当前目录及子目录下不存在pdf文件")
136+
sys.exit()
137+
for filename in filelist:
138+
f = codecs.open(filename, 'rb')
139+
file_rd = PdfFileReader(f)
140+
short_filename = os.path.basename(os.path.splitext(filename)[0])
141+
if file_rd.isEncrypted == True:
142+
print('不支持的加密文件:%s'%(filename))
143+
continue
144+
merger.append(file_rd, bookmark=short_filename, import_bookmarks=import_bookmarks)
145+
print('合并文件:%s'%(filename))
146+
f.close()
147+
out_filename=os.path.join(os.path.abspath(path), output_filename)
148+
merger.write(out_filename)
149+
print('合并后的输出文件:%s'%(out_filename))
150+
merger.close()
151+
152+
def excetue():
153+
# pdf转图片
154+
batchPdf2Png(invoicePath, tempImagePath)
155+
# 图片插入pptx模板
156+
batchInsertPngInSlide(templatePptxPath, tempImagePath)
157+
# pptx导出pdf
158+
powerpoint = init_powerpoint()
159+
absPptxPath = os.path.abspath(tempPptxPath)
160+
absOutPath = os.path.abspath(outPath)
161+
convert_files_in_folder(powerpoint, absPptxPath, absOutPath)
162+
powerpoint.Quit()
163+
# 清理临时文件
164+
del_file(tempImagePath)
165+
del_file(tempPptxPath)
166+
# 合并pdf文件
167+
mergefiles(outPath, 'All.pdf')
168+
if __name__ == '__main__':
169+
excetue()

requirements.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
python-pptx == 0.6.18
2+
PyMuPDF == 1.17.0
3+
comtypes == 1.1.7
4+
PyPDF2 == 1.26.0

凭证粘贴模板.pptx

1.01 MB
Binary file not shown.

0 commit comments

Comments
 (0)