|
| 1 | +#!/usr/bin/env Python |
| 2 | +# coding=utf-8 |
| 3 | + |
| 4 | +import sys, datetime, fitz, os, codecs |
| 5 | +from pptx import Presentation |
| 6 | +from pptx.util import Cm |
| 7 | +import comtypes.client |
| 8 | +from PyPDF2 import PdfFileReader, PdfFileWriter, PdfFileMerger |
| 9 | + |
| 10 | +invoicePath = './inputs' |
| 11 | +tempImagePath = './temp/images' |
| 12 | +tempPptxPath = './temp/pptx' |
| 13 | +templatePptxPath = './凭证粘贴模板.pptx' |
| 14 | +outPath = './outputs' |
| 15 | + |
| 16 | +def pyMuPDF_fitz(pdfPath, imagePath): |
| 17 | + # startTime_pdf2img = datetime.datetime.now()#开始时间 |
| 18 | + |
| 19 | + # print("imagePath="+imagePath) |
| 20 | + baseName = os.path.basename(pdfPath) |
| 21 | + pdfName = os.path.splitext(baseName)[0] |
| 22 | + pdfDoc = fitz.open(pdfPath) |
| 23 | + |
| 24 | + page = pdfDoc[0] |
| 25 | + rotate = int(0) |
| 26 | + # 每个尺寸的缩放系数为1.3,这将为我们生成分辨率提高2.6的图像。 |
| 27 | + # 此处若是不做设置,默认图片大小为:792X612, dpi=96 |
| 28 | + zoom_x = 2.5 #(1.33333333-->1056x816) (2-->1584x1224) |
| 29 | + zoom_y = 2.5 |
| 30 | + mat = fitz.Matrix(zoom_x, zoom_y).preRotate(rotate) |
| 31 | + pix = page.getPixmap(matrix=mat, alpha=False) |
| 32 | + |
| 33 | + if not os.path.exists(imagePath):#判断存放图片的文件夹是否存在 |
| 34 | + os.makedirs(imagePath) # 若图片文件夹不存在就创建 |
| 35 | + |
| 36 | + pix.writePNG('%s/%s.png' % (imagePath, pdfName))#将图片写入指定的文件夹内 |
| 37 | + |
| 38 | + # for pg in range(pdfDoc.pageCount): |
| 39 | + # page = pdfDoc[pg] |
| 40 | + # rotate = int(0) |
| 41 | + # # 每个尺寸的缩放系数为1.3,这将为我们生成分辨率提高2.6的图像。 |
| 42 | + # # 此处若是不做设置,默认图片大小为:792X612, dpi=96 |
| 43 | + # zoom_x = 2.2 #(1.33333333-->1056x816) (2-->1584x1224) |
| 44 | + # zoom_y = 2.2 |
| 45 | + # mat = fitz.Matrix(zoom_x, zoom_y).preRotate(rotate) |
| 46 | + # pix = page.getPixmap(matrix=mat, alpha=False) |
| 47 | + |
| 48 | + # if not os.path.exists(imagePath):#判断存放图片的文件夹是否存在 |
| 49 | + # os.makedirs(imagePath) # 若图片文件夹不存在就创建 |
| 50 | + |
| 51 | + # # pix.writePNG(imagePath+'/'+'images_%s.png' % pg)#将图片写入指定的文件夹内 |
| 52 | + # pix.writePNG(imagePath+'/'+'%s_%s.png' % (pdfName, pg))#将图片写入指定的文件夹内 |
| 53 | + |
| 54 | + # endTime_pdf2img = datetime.datetime.now()#结束时间 |
| 55 | + # print('pdf2img时间=',(endTime_pdf2img - startTime_pdf2img).seconds) |
| 56 | + |
| 57 | +def batchPdf2Png(invoicePath, outPngPath): |
| 58 | + files = os.listdir(invoicePath) |
| 59 | + pdfFiles = [f for f in files if f.endswith((".pdf"))] |
| 60 | + for pdfFile in pdfFiles: |
| 61 | + fullpath = os.path.join(invoicePath, pdfFile) |
| 62 | + pyMuPDF_fitz(fullpath, outPngPath) |
| 63 | + |
| 64 | +def insertPngInSlide(path_to_presentation, img_path): |
| 65 | + prs = Presentation(path_to_presentation) |
| 66 | + |
| 67 | + slide = prs.slides[0] |
| 68 | + left, top, width, height= Cm(4.28), Cm(2.79), Cm(25.58), Cm(16.59) |
| 69 | + pic = slide.shapes.add_picture(img_path, left, top, height=height, width=width) |
| 70 | + prs.save('test.pptx') |
| 71 | + |
| 72 | +def batchInsertPngInSlide(path_to_tmpl_presentation, imgsPath): |
| 73 | + left, top, width, height= Cm(4.28), Cm(2.79), Cm(25.58), Cm(16.59) |
| 74 | + |
| 75 | + files = os.listdir(imgsPath) |
| 76 | + pngfiles = [f for f in files if f.endswith((".png"))] |
| 77 | + for pngfile in pngfiles: |
| 78 | + fullpath = os.path.join(imgsPath, pngfile) |
| 79 | + |
| 80 | + prs = Presentation(path_to_tmpl_presentation) |
| 81 | + slide = prs.slides[0] |
| 82 | + pic = slide.shapes.add_picture(fullpath, left, top, height=height, width=width) |
| 83 | + pptxPath = os.path.join(tempPptxPath, os.path.splitext(pngfile)[0]+'.pptx') |
| 84 | + prs.save(pptxPath) |
| 85 | + |
| 86 | +def init_powerpoint(): |
| 87 | + powerpoint = comtypes.client.CreateObject("Powerpoint.Application") |
| 88 | + powerpoint.Visible = 1 |
| 89 | + return powerpoint |
| 90 | + |
| 91 | +def ppt_to_pdf(powerpoint, inputFileName, outputFileName, formatType = 32): |
| 92 | + if outputFileName[-3:] != 'pdf': |
| 93 | + outputFileName = outputFileName.replace(".pptx","").replace(".ppt","") + ".pdf" |
| 94 | + # print(inputFileName) |
| 95 | + deck = powerpoint.Presentations.Open(inputFileName) |
| 96 | + deck.SaveAs(outputFileName, formatType) # formatType = 32 for ppt to pdf |
| 97 | + deck.Close() |
| 98 | + # print('转换%s文件完成'%outputFileName) |
| 99 | + |
| 100 | +def convert_files_in_folder(powerpoint, folder, outPath): |
| 101 | + files = os.listdir(folder) |
| 102 | + pptfiles = [f for f in files if f.endswith((".ppt", ".pptx"))] |
| 103 | + for pptfile in pptfiles: |
| 104 | + fullpath = os.path.join(folder, pptfile) |
| 105 | + pdfpath = os.path.join(outPath, os.path.splitext(pptfile)[0]+'.pdf') |
| 106 | + ppt_to_pdf(powerpoint, fullpath, pdfpath) |
| 107 | + |
| 108 | +def del_file(path_data): |
| 109 | + for i in os.listdir(path_data) :# os.listdir(path_data)#返回一个列表,里面是当前目录下面的所有东西的相对路径 |
| 110 | + file_data = path_data + "\\" + i#当前文件夹的下面的所有东西的绝对路径 |
| 111 | + if os.path.isfile(file_data) == True:#os.path.isfile判断是否为文件,如果是文件,就删除.如果是文件夹.递归给del_file. |
| 112 | + os.remove(file_data) |
| 113 | + else: |
| 114 | + del_file(file_data) |
| 115 | + |
| 116 | +def getfilenames(filepath='',filelist_out=[],file_ext='all'): |
| 117 | + # 遍历filepath下的所有文件,包括子目录下的文件 |
| 118 | + for fpath, dirs, fs in os.walk(filepath): |
| 119 | + for f in fs: |
| 120 | + fi_d = os.path.join(fpath, f) |
| 121 | + if file_ext == 'all': |
| 122 | + filelist_out.append(fi_d) |
| 123 | + elif os.path.splitext(fi_d)[1] == file_ext: |
| 124 | + filelist_out.append(fi_d) |
| 125 | + else: |
| 126 | + pass |
| 127 | + return filelist_out |
| 128 | + |
| 129 | +def mergefiles(path, output_filename, import_bookmarks=False): |
| 130 | + # 遍历目录下的所有pdf将其合并输出到一个pdf文件中,输出的pdf文件默认带书签,书签名为之前的文件名 |
| 131 | + # 默认情况下原始文件的书签不会导入,使用import_bookmarks=True可以将原文件所带的书签也导入到输出的pdf文件中 |
| 132 | + merger = PdfFileMerger() |
| 133 | + filelist = getfilenames(filepath=path, file_ext='.pdf') |
| 134 | + if len(filelist) == 0: |
| 135 | + print("当前目录及子目录下不存在pdf文件") |
| 136 | + sys.exit() |
| 137 | + for filename in filelist: |
| 138 | + f = codecs.open(filename, 'rb') |
| 139 | + file_rd = PdfFileReader(f) |
| 140 | + short_filename = os.path.basename(os.path.splitext(filename)[0]) |
| 141 | + if file_rd.isEncrypted == True: |
| 142 | + print('不支持的加密文件:%s'%(filename)) |
| 143 | + continue |
| 144 | + merger.append(file_rd, bookmark=short_filename, import_bookmarks=import_bookmarks) |
| 145 | + print('合并文件:%s'%(filename)) |
| 146 | + f.close() |
| 147 | + out_filename=os.path.join(os.path.abspath(path), output_filename) |
| 148 | + merger.write(out_filename) |
| 149 | + print('合并后的输出文件:%s'%(out_filename)) |
| 150 | + merger.close() |
| 151 | + |
| 152 | +def excetue(): |
| 153 | + # pdf转图片 |
| 154 | + batchPdf2Png(invoicePath, tempImagePath) |
| 155 | + # 图片插入pptx模板 |
| 156 | + batchInsertPngInSlide(templatePptxPath, tempImagePath) |
| 157 | + # pptx导出pdf |
| 158 | + powerpoint = init_powerpoint() |
| 159 | + absPptxPath = os.path.abspath(tempPptxPath) |
| 160 | + absOutPath = os.path.abspath(outPath) |
| 161 | + convert_files_in_folder(powerpoint, absPptxPath, absOutPath) |
| 162 | + powerpoint.Quit() |
| 163 | + # 清理临时文件 |
| 164 | + del_file(tempImagePath) |
| 165 | + del_file(tempPptxPath) |
| 166 | + # 合并pdf文件 |
| 167 | + mergefiles(outPath, 'All.pdf') |
| 168 | +if __name__ == '__main__': |
| 169 | + excetue() |
0 commit comments