Skip to content

Commit f69e3ff

Browse files
author
Paul Drage
committed
Adds lloydsbank csv support
Added example invocation script Classify.py
1 parent a97ecb7 commit f69e3ff

File tree

2 files changed

+45
-0
lines changed

2 files changed

+45
-0
lines changed

BankClassify.py

+40
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ def add_data(self, filename, bank="santander"):
3131
elif bank == "nationwide":
3232
print("adding nationwide data!")
3333
self.new_data = self._read_nationwide_file(filename)
34+
elif bank == "lloyds":
35+
print("adding Lloyds Bank data!")
36+
self.new_data = self._read_lloyds_csv(filename)
3437

3538
self._ask_with_guess(self.new_data)
3639

@@ -218,14 +221,51 @@ def _read_santander_file(self, filename):
218221
just_numbers = re.sub("[^0-9\.-]", "", data)
219222
amounts.append(just_numbers.strip())
220223

224+
221225
df = pd.DataFrame({'date':dates, 'desc':descs, 'amount':amounts})
222226

227+
223228
df['amount'] = df.amount.astype(float)
224229
df['desc'] = df.desc.astype(str)
225230
df['date'] = df.date.astype(str)
226231

227232
return df
228233

234+
def _read_lloyds_csv(self, filename):
235+
"""Read a file in the CSV format that Lloyds Bank provides downloads in.
236+
237+
Returns a pd.DataFrame with columns of 'date' 0 , 'desc' 4 and 'amount' 5 ."""
238+
239+
df = pd.read_csv(filename, skiprows=0)
240+
241+
"""Rename columns """
242+
#df.columns = ['date', 'desc', 'amount']
243+
df.rename(
244+
columns={
245+
"Transaction Date" : 'date',
246+
"Transaction Description" : 'desc',
247+
"Debit Amount": 'amount',
248+
"Credit Amount": 'creditAmount'
249+
},
250+
inplace=True
251+
)
252+
253+
# if its income we still want it in the amount col!
254+
# manually correct each using 2 cols to create 1 col with either + or - figure
255+
# lloyds outputs 2 cols, credit and debit, we want 1 col representing a +- figure
256+
for index, row in df.iterrows():
257+
if (row['amount'] > 0):
258+
print('send it negative')
259+
df.at[index, 'amount'] = -row['amount']
260+
elif (row['creditAmount'] > 0):
261+
df.at[index, 'amount'] = row['creditAmount']
262+
263+
# cast types to columns for math
264+
df = df.astype({"desc": str, "date": str, "amount": float})
265+
266+
return df
267+
268+
229269
def _get_training(self, df):
230270
"""Get training data for the classifier, consisting of tuples of
231271
(text, category)"""

Classify.py

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from BankClassify import BankClassify
2+
3+
bc = BankClassify()
4+
5+
bc.add_data("85561768_20205411_0903.csv", "lloyds")

0 commit comments

Comments
 (0)