Skip to content

Commit

Permalink
fileformats into toolkit
Browse files Browse the repository at this point in the history
  • Loading branch information
leela committed Jun 30, 2020
1 parent 1a26419 commit adb21e8
Show file tree
Hide file tree
Showing 13 changed files with 1,096 additions and 1 deletion.
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
web.py
psycopg2-binary
psycopg2-binary
pandas
pyYAML
59 changes: 59 additions & 0 deletions tests/fileformats/test_default.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
name: test default
fileformat:
name: test
description: test format
columns:
- name: id
label: ID
datatype: string
- name: name
label: NAME
datatype: string
- name: city
label: CITY
datatype: string
default: MISSING
inputfile:
columns: ['ID', 'NAME', 'CITY']
data:
- ['S1', 'Store 1', 'BLR']
- ['S2', 'Store 2', 'DEL']
- ['S3', 'Store 3', '']
- ['S4', 'Store 4', null]
result:
columns: ["id", "name", "city"]
data:
- ['S1', 'Store 1', 'BLR']
- ['S2', 'Store 2', 'DEL']
- ['S3', 'Store 3', 'MISSING']
- ['S4', 'Store 4', 'MISSING']
---
name: test missing
fileformat:
name: test
description: test format
columns:
- name: id
label: ID
datatype: string
- name: name
label: NAME
datatype: string
- name: city
label: CITY
datatype: string
default: MISSING
inputfile:
columns: ['ID', 'NAME', 'CITY']
data:
- ['S1', 'Store 1', 'BLR']
- ['', 'Store 2', 'DEL']
- ['S3', 'Store 3', '']
- ['S4', 'Store 4', null]
errors:
- error_level: row
error_code: missing_value
error_message: "Found missing value: ''"
row_index: 1
column_name: ID
value: ''
30 changes: 30 additions & 0 deletions tests/fileformats/test_missing_values.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: test repeats
fileformat:
name: test
description: test
columns:
- name: id
label: ID
datatype: string
- name: date
label: DATE
datatype: string
missing_values:
- missing
required: false
inputfile:
columns: ['ID', 'DATE']
data:
- ['100', '2019-10-10']
- ['101', '2010-10-11']
- ['102', 'missing']
- ['103', '']
- ['104', '2010-10-12']
result:
columns: ['id', 'date']
data:
- ['100', '2019-10-10']
- ['101', '2010-10-11']
- ['102', null]
- ['103', null]
- ['104', '2010-10-12']
29 changes: 29 additions & 0 deletions tests/fileformats/test_options.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: test options
fileformat:
name: test
description: test format
columns:
- name: id
label: ID
datatype: string
unique: true
- name: name
label: NAME
datatype: string
- name: active
label: ACTIVE
datatype: string
options: ['Y', 'N']
inputfile:
columns: ['ID', 'NAME', 'ACTIVE']
data:
- ['BLR', 'Bangalore', 'Y']
- ['DEL', 'Delhi', 'N']
- ['MUM', 'Mumbai', 'Yes']
errors:
- error_level: row
error_code: invalid_value
error_message: "The value is not one of the allowed options: 'Yes'"
row_index: 2
column_name: ACTIVE
value: 'Yes'
33 changes: 33 additions & 0 deletions tests/fileformats/test_regex.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: test options
fileformat:
name: test
description: test format
columns:
- name: id
label: ID
datatype: string
unique: true
regex: 'S[0-9]+'
- name: name
label: NAME
datatype: string
inputfile:
columns: ['ID', 'NAME']
data:
- ['S1', 'Store 1']
- ['S2', 'Store 2']
- ['S 3', 'Store 3']
- ['x4', 'Store 3']
errors:
- error_level: row
error_code: invalid_pattern
error_message: "The value is not matching the pattern S[0-9]+: 'S 3'"
row_index: 2
column_name: ID
value: S 3
- error_level: row
error_code: invalid_pattern
error_message: "The value is not matching the pattern S[0-9]+: 'x4'"
row_index: 3
column_name: ID
value: x4
36 changes: 36 additions & 0 deletions tests/fileformats/test_rejected_rows.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: test options
fileformat:
name: test
description: test format
options:
on_error: reject_row
columns:
- name: id
label: ID
datatype: string
unique: true
- name: name
label: NAME
datatype: string
- name: active
label: ACTIVE
datatype: string
options: ['Y', 'N']
inputfile:
columns: ['ID', 'NAME', 'ACTIVE']
data:
- ['BLR', 'Bangalore', 'Y']
- ['DEL', 'Delhi', 'N']
- ['MUM', 'Mumbai', 'Yes']
result:
columns: ['id', 'name', 'active']
data:
- ['BLR', 'Bangalore', 'Y']
- ['DEL', 'Delhi', 'N']
errors:
- error_level: row
error_code: invalid_value
error_message: "The value is not one of the allowed options: 'Yes'"
row_index: 2
column_name: ACTIVE
value: 'Yes'
30 changes: 30 additions & 0 deletions tests/fileformats/test_repeat_last_column.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: test repeats
fileformat:
name: test
description: test
options:
skiprows: 2
repeat_last_column: true
columns:
- name: id
label: ID
datatype: string
- name: item
label: ITEM
datatype: string
required: false
inputfile_contents: |
line 1
line 2
ID,ITEM
100,Idly,Sambar
101,Idly
102,Idly,Dosa
103,Idly,Dosa,Paratha
result:
columns: ['id', 'item']
data:
- ['100', ['Idly', 'Sambar']]
- ['101', ['Idly']]
- ['102', ['Idly', 'Dosa']]
- ['103', ['Idly', 'Dosa', 'Paratha']]
21 changes: 21 additions & 0 deletions tests/fileformats/test_simple.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: test fileformats with a simple input
fileformat:
name: test
description: first format
columns:
- name: id
label: ID
datatype: string
- name: name
label: NAME
datatype: string
inputfile:
columns: ['ID', 'NAME']
data:
- ['BLR', 'Bangalore']
- ['DEL', 'Delhi']
result:
columns: ['id', 'name']
data:
- ['BLR', 'Bangalore']
- ['DEL', 'Delhi']
24 changes: 24 additions & 0 deletions tests/fileformats/test_skiprows.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: test skiprows
fileformat:
name: test
description: test
options:
skiprows: 2
columns:
- name: id
label: ID
datatype: string
- name: name
label: NAME
datatype: string
inputfile_contents: |
line 1
line 2
ID,NAME
BLR,Bangalore
DEL,Delhi
result:
columns: ['id', 'name']
data:
- ['BLR', 'Bangalore']
- ['DEL', 'Delhi']
89 changes: 89 additions & 0 deletions tests/fileformats/test_unique.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
name: test unique
fileformat:
name: test
description: test format
columns:
- name: id
label: ID
datatype: string
unique: true
- name: name
label: NAME
datatype: string
inputfile:
columns: ['ID', 'NAME']
data:
- ['BLR', 'Bangalore']
- ['DEL', 'Delhi']
- ['BLR', 'Bengaluru']
errors:
- error_level: row
error_code: duplicate_value
error_message: "Found duplicate value: 'BLR'"
row_index: 2
column_name: ID
value: 'BLR'

---
name: test unique with multiple duplicate values
fileformat:
name: test
description: test format
columns:
- name: id
label: ID
datatype: string
unique: true
- name: name
label: NAME
datatype: string
inputfile:
columns: ['ID', 'NAME']
data:
- ['BLR', 'Bangalore']
- ['DEL', 'Delhi']
- ['BLR', 'Bengaluru']
- ['DEL', 'Delhi']
- ['BLR', 'Bengaluru']
errors:
- error_level: row
error_code: duplicate_value
error_message: "Found duplicate value: 'BLR'"
row_index: 2
column_name: ID
value: 'BLR'
- error_level: row
error_code: duplicate_value
error_message: "Found duplicate value: 'DEL'"
row_index: 3
column_name: ID
value: 'DEL'
- error_level: row
error_code: duplicate_value
error_message: "Found duplicate value: 'BLR'"
row_index: 4
column_name: ID
value: 'BLR'
---
name: test unique with success
fileformat:
name: test
description: test format
columns:
- name: id
label: ID
datatype: string
unique: true
- name: name
label: NAME
datatype: string
inputfile:
columns: ['ID', 'NAME']
data:
- ['BLR', 'Bangalore']
- ['DEL', 'Delhi']
result:
columns: ['id', 'name']
data:
- ['BLR', 'Bangalore']
- ['DEL', 'Delhi']
Loading

0 comments on commit adb21e8

Please sign in to comment.