-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path_main_payslipA.py
154 lines (122 loc) · 4.4 KB
/
_main_payslipA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
from PyPDF2 import PdfFileReader
import re as r
import passFile
from passFile import password
import os
import datetime
import time
import matplotlib.pyplot as plt
print("Started")
start=time.time()
today=datetime.date.today()
oneYearAgo=today-datetime.timedelta(365)
payslipsArray=[]
def withinOneYear(page_content):
oneYearAgo=datetime.datetime.today()-datetime.timedelta(365)
processedDate=r.findall("(\d{2}\D\d{2}\D\d{4})",page_content)[0]
date=datetime.datetime.strptime(processedDate,"%d/%m/%Y")
if date > oneYearAgo:
return date
def withinOneYearV2(date):
today=datetime.datetime.today().date()
if (today - date.date()).days < 365:
return True
return False
def findHourPay(source):
data=r.findall("Lounge1(\d{1,4}\W\d{2})",source)
return [float(n) for n in data]
def findHolidayPay(source):
data=r.findall("Holiday Lounge\d(\d{1,4}\W\d{2})",source)
return [float(n) for n in data]
def totalGrossPay(hourPay,grossPay):
return sum(hourPay,grossPay)
class Payslip:
def __init__(self,date,weekOne,weekTwo,tax,NI,pension,grossPay,name):
self.name=name
self.date=date
self.weekOne=weekOne
self.weekTwo=weekTwo
self.tax=tax
self.NI=NI
self.pension=pension
self.grossPay=grossPay
class PayslipWeekClass:
def __init__(self,tronc,holiday,hour,totalPay):
self.tronc=tronc
self.holiday=holiday
self.hour=hour
self.totalPay=totalPay
def processData(page_content,n,correctDate):
troncs=r.findall("Troncs Lounge\d(\d{1,3}\W\d{2})",page_content)
hours=r.findall("Hour Lounge(\d{1,3}\W\d{2,4})\d\W",page_content)[0]
holidayFirstWeek=r.findall("Holiday Lounge(\d)(\d{2}\W\d{2})(\d{2,3}\W\d{2})Hour",page_content)
holidaySecondWeek=r.findall("Holiday Lounge(\d)(\d{2}\W\d{2})(\d{2,3}\W\d{2})Deductions",page_content)
holidayFirstWeekV2=r.findall("Holiday Lounge(\d)(\d{2}\W\d{2})(\d{2,3}\W\d{2})Holiday",page_content)
if not holidayFirstWeek:
holidayFirstWeek=[0]*3
else:holidayFirstWeek=holidayFirstWeek[0]
if not holidaySecondWeek:
holidaySecondWeek=[0]*3
else:
holidaySecondWeek=holidaySecondWeek[0]
hoursPay=r.findall("\W\d{2}(\d{1,4}\W\d{2})Hour Lounge",page_content)
#print(hoursPay)
if len(hoursPay)==1:
hoursPay.append(0)
tax, NI, pension = r.findall("Tax€(\d{1,4}\W\d{2})€NI€(\d{1,3}\W\d{2})€D and D Peoples Pension .{3}€(\d{1,3}\W\d{2})",page_content)[0]
weekOne=PayslipWeekClass(float(troncs[0]),float(holidayFirstWeek[2]),hours[0],float(hoursPay[0]))
weekTwo=PayslipWeekClass(float(troncs[1]),float(holidaySecondWeek[2]),hours[1],float(hoursPay[1]))
biweeklyPayslip=Payslip(correctDate,weekOne,weekTwo,float(tax),float(NI),float(pension),(weekOne.totalPay+weekOne.tronc+weekTwo.totalPay+weekTwo.tronc+weekOne.holiday+weekTwo.holiday),n)
return biweeklyPayslip
def loadAllFiles():
directory = os.fsencode("Payslips/")
arrayOfFiles=[]
for file in os.listdir(directory):
filename = os.fsdecode(file)
if filename.endswith(".pdf"):
l=os.path.join(filename)
arrayOfFiles.append(l)
return arrayOfFiles
def filtratePayslips(listOfPayslips):
payslipsArray=[]
for n in listOfPayslips:
m=open("Payslips/"+str(n),"rb")
pdf=PdfFileReader(m)
pdf.decrypt(password)
pageObj = pdf.getPage(0)
page_content = pageObj.extractText()
correctDate=withinOneYear(page_content)
if correctDate:
payslipsArray.append(processData(page_content,n,correctDate))
return sorted(payslipsArray,key=lambda x: x.date)
class Total():
def __init__(self,Tax,NI,Paid,Pension,Holiday,Gross):
self.Tax=Tax
self.NI=NI
self.Paid=Paid
self.Pension=Pension
self.Holiday=Holiday
self.Gross=Gross
loadedFiles=loadAllFiles()
howManyPayslips=len(loadedFiles)
payslips=filtratePayslips(loadedFiles)
total=Total(0,0,0,0,0,0)
total.Tax=sum([f.tax for f in payslips])
total.NI=sum([f.NI for f in payslips])
payslipValues=[f.grossPay for f in payslips]
totalPaid=sum(payslipValues)
totalPensionPaid=sum([f.pension for f in payslips])
totalHoliday=sum([f.weekOne.holiday + f.weekTwo.holiday for f in payslips])
payslipDates=[f.date.date() for f in payslips]
totalGrossPay=totalPaid+total.NI+totalPensionPaid+total.Tax
plt.plot([f.tax for f in payslips])
plt.plot(payslipValues)
plt.axhline(sum(payslipValues)/
len(payslipValues))
plt.grid()
print(f"Total Net pay is £{round(sum(payslipValues),2)}")
print(f"Average pay is £{round(sum(payslipValues)/len(payslipValues),2)}")
plt.show()
finish=time.time()
print(finish-start)
o=sorted(payslipsArray,key=lambda nunu : nunu.date)