Skip to content

Commit

Permalink
Continued CSV progress
Browse files Browse the repository at this point in the history
Toward #114.
  • Loading branch information
waldoj committed Jun 17, 2017
1 parent 88bbadb commit 075aee3
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 22 deletions.
39 changes: 21 additions & 18 deletions crump
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,15 @@ def main():
current_map = None
csv_writer = None
json_file = None


# Read Tables.csv into memory to use elsewhere.
table_1 = csvkit.CSVKitDictReader(open("cisbemon/Tables.csv", 'rb'))
lookup_table = []
for row in table_1:
lookup_table.append(row)

# List the names of the files, in the order in which we'll iterate through them.
data_files = ['Tables', 'Corp', 'LP', 'Amendment', 'Officer', 'Name.History', 'Merger', 'Reserved.Name', 'LLC',]
data_files = ['Corp', 'LP', 'Amendment', 'Officer', 'Name.History', 'Merger', 'Reserved.Name', 'LLC',]

# Iterate through each file, one by one.
for data_file in data_files:
Expand Down Expand Up @@ -308,11 +314,11 @@ def main():

# Indicate whether a corporation is foreign or domestic, using True/False
# values instead of the F/0 values the SCC uses in 2_corporate.
elif name == 'foreign':
if line[name] == '0':
line[name] = False
elif name == 'state_formed':
if line[name] == 'VA':
line['foreign'] = False
else:
line[name] = True
line['foreign'] = True

# Indicate whether a corporation is foreign or domestic, using True/False
# values instead of the M/L values the SCC uses in 3_lp.
Expand All @@ -332,15 +338,19 @@ def main():
line[name] = None

# Remove extraneous internal whitespace.
if type(line[name]) is str:
line[name] = re.sub("\s{2,}", " ", line[name])
try:
if type(line[name]) is str:
line[name] = re.sub("\s{2,}", " ", line[name])
except:
pass

# Replace shorthand values with the full version, from the lookup table.
if table_id != None:
print lookup_table
for index, conversion in enumerate(lookup_table):
if int(conversion["table-identifier"]) == table_id:
if conversion["table-code"] == line[name]:
line[name] = conversion["table-desc"]
if int(conversion["TableID"]) == table_id:
if conversion["ColumnValue"] == line[name]:
line[name] = conversion["Description"]
break

# If we have geodata to be inserted.
Expand Down Expand Up @@ -423,13 +433,6 @@ def main():
if elasticsearch == False:
json_file.write(']')

# If we've just finished with file 1, read it into memory to use elsewhere.
if data_file == 'Tables':
table_1 = csvkit.CSVKitDictReader(open(output_dir + "/1_tables.csv", 'rb'))
lookup_table = []
for row in table_1:
lookup_table.append(row)

# Now that all files are output, break them into Elasticsearch-sized chunks.
if elasticsearch:
for json_file in glob.glob(output_dir + "/*.json"):
Expand Down
4 changes: 0 additions & 4 deletions table_maps/corp.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
- name: corp-foreign
alt_name: foreign
description: Whether the corporation was formed outside of VA
type: A
- name: EntityID
alt_name: id
description: Unique number assigned to Corp
Expand Down

0 comments on commit 075aee3

Please sign in to comment.