Skip to content

Commit

Permalink
Merge pull request #53 from tsdataclinic/census_multiple_csas
Browse files Browse the repository at this point in the history
Census update, multiple msas closes #51
  • Loading branch information
CanyonFoot authored Oct 5, 2023
2 parents 25bd858 + 14f580c commit c59bbf9
Show file tree
Hide file tree
Showing 7 changed files with 99 additions and 108 deletions.
4 changes: 2 additions & 2 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ name = "pypi"
[packages]
pandas = "*"
geopandas = "*"
cenpy = "*"
censusdis = "*"
osmnx = "*"
jupyterlab = "*"
cookiecutter = "*"
Expand All @@ -29,4 +29,4 @@ psycopg2 = "*"
[dev-packages]

[requires]
python_version = "3.7"
python_version = "3.9.18"
17 changes: 9 additions & 8 deletions analysis/src/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
{
"city_name": "New York City",
"city_code": "nyc",
"msa_code": "C3562",
"msa_code": ["C3562"],
"state":"NY",
"transit_feeds": {
"mta_subway": "f-dr5r-nyctsubway",
Expand All @@ -27,7 +27,7 @@
{
"city_name": "Hampton Roads",
"city_code": "hr",
"msa_code": "C4726",
"msa_code": ["C4726"],
"state":"VA",
"transit_feeds": {
"hrt": "f-dq9-hrt",
Expand All @@ -38,7 +38,7 @@
{
"city_name": "Chicago",
"city_code": "chi",
"msa_code": "C1698",
"msa_code": ["C1698"],
"state":"IL",
"transit_feeds": {
"cta": "f-dp3-cta",
Expand All @@ -49,7 +49,7 @@
{
"city_name": "Philadelphia",
"city_code": "phi",
"msa_code": "C3798",
"msa_code": ["C3798"],
"state":"PA",
"transit_feeds": {
"septa": "f-dr4-septa~bus"
Expand All @@ -60,7 +60,7 @@
{
"city_name": "New Orleans",
"city_code": "nola",
"msa_code": "C3538",
"msa_code": ["C3538"],
"state":"LA",
"transit_feeds": {
"norta": "f-9vrf-neworleansrta",
Expand All @@ -72,17 +72,18 @@
{
"city_name": "San Francisco",
"city_code": "sf",
"msa_code": "C4186",
"msa_code": ["C4186", "C4194"],
"state":"CA",
"transit_feeds": {
"sf_mta": "f-sf~bay~area~rg"
}
},
"FEMA_file_name": "NFHL_06_20230608"
},
"pitt":
{
"city_name": "Pittsburgh",
"city_code": "pitt",
"msa_code":"C3830",
"msa_code":["C3830"],
"state":"PA",
"transit_feeds": {
"prt": "f-dppn-portauthorityofalleghenycounty",
Expand Down
93 changes: 0 additions & 93 deletions analysis/src/data/get_census_data.R

This file was deleted.

79 changes: 79 additions & 0 deletions analysis/src/data/get_census_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from optparse import OptionParser
import pandas as pd
import geopandas as gpd
import json
from pathlib import Path
import os
import censusdis.data as ced
from pandas import json_normalize


def get_census(config, city_key):
"""
Retreives census shapefiles for city based on msa codes supplied in config
"""

base_path = config['base_path']
city_config = json_normalize(config[city_key])
msa_codes = city_config[city_config['city_code'] == city_key]['msa_code'].iloc[0]

# File Paths
path = f"{base_path}/cities/{city_key}/census/geo/"
msa_path = f"{base_path}/national/qcew-county-msa-csa-crosswalk-csv.csv"
tract_path = "tracts.geojson"
tract_2010_path = "tracts_2010.geojson"
block_group_path = "block_groups.geojson"

if not os.path.isdir(path):
os.makedirs(path)

all_msa = pd.read_csv(msa_path, encoding="ISO-8859-1")
selected_msa = all_msa[all_msa['MSA Code'].isin(msa_codes)]
selected_msa_counties_five_digits = selected_msa['County Code'].astype(str).str.zfill(5)
selected_msa_states = list(set(state[:2] for state in selected_msa_counties_five_digits))

# 2020 block groups
gdf_block_group = ced.download("acs/acs5", 2020, ["NAME"], state=selected_msa_states, county = "*", block_group = "*", with_geometry=True)

gdf_block_group["GEOID"] = gdf_block_group["STATE"] + gdf_block_group["COUNTY"] + gdf_block_group["TRACT"] + gdf_block_group["BLOCK_GROUP"]
gdf_block_group["county"] = gdf_block_group["STATE"] + gdf_block_group["COUNTY"]
gdf_block_group = gdf_block_group[["GEOID", "NAME", "county", "geometry"]]
gdf_block_group = gdf_block_group.query("county.isin(@selected_msa_counties_five_digits)")

gdf_block_group.to_file(f'{path}{block_group_path}')

# 2020 tracts
gdf_tract = ced.download("acs/acs5", 2020, ["NAME"], state=selected_msa_states, county = "*", tract = "*", with_geometry=True)

gdf_tract["GEOID"] = gdf_tract["STATE"] + gdf_tract["COUNTY"] + gdf_tract["TRACT"]
gdf_tract["county"] = gdf_tract["STATE"] + gdf_tract["COUNTY"]
gdf_tract = gdf_tract[["GEOID", "NAME", "county", "geometry"]]
gdf_tract = gdf_tract.query("county.isin(@selected_msa_counties_five_digits)")

gdf_tract.to_file(f'{path}{tract_path}')

# 2010 tracts
gdf_tract_2010 = ced.download("acs/acs5", 2010, ["NAME"], state=selected_msa_states, county = "*", tract = "*", with_geometry=True)

gdf_tract_2010["GEOID"] = gdf_tract_2010["STATE"] + gdf_tract_2010["COUNTY"] + gdf_tract_2010["TRACT"]
gdf_tract_2010["county"] = gdf_tract_2010["STATE"] + gdf_tract_2010["COUNTY"]
gdf_tract_2010 = gdf_tract_2010[["GEOID", "NAME", "county", "geometry"]]
gdf_tract_2010 = gdf_tract_2010.query("county.isin(@selected_msa_counties_five_digits)")

gdf_tract_2010.to_file(f'{path}{tract_2010_path}')

def main():
parser = argparse.ArgumentParser("Get Census")
parser.add_argument("--config", required=True)
parser.add_argument("--city", required=True)

opts = parser.parse_args()

with open(opts.config) as f:
config = json.load(f)

get_census(config, opts.city)
print("Census geographies written")

if __name__ == "__main__":
main()
4 changes: 3 additions & 1 deletion analysis/src/data/get_osm_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from utils.geo import create_extent
import argparse
import sys
import pickle
import os
import json
ox.config(log_console=True)
Expand Down Expand Up @@ -44,7 +45,8 @@ def get_osm_data(config, city_key):
G = get_walk_graph(extent_path)
graph = ox.project_graph(G, to_crs='epsg:4326')
print("Graph created. Writing it")
nx.write_gpickle(G, out_path+"walk_graph.gpickle")
with open(out_path + "walk_graph.gpickle", 'wb') as f:
pickle.dump(G, f, pickle.HIGHEST_PROTOCOL)

def main():
parser = argparse.ArgumentParser("OSM Graph builder")
Expand Down
4 changes: 2 additions & 2 deletions analysis/src/data/get_raw_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from data.get_LODES import get_LODES
from data.get_POI_data import get_poi_data
from data.get_osm_data import get_osm_data
import subprocess
from data.get_census_data import get_census
import argparse
import os
import json
Expand All @@ -21,7 +21,7 @@ def get_raw_data(config, city_key):
print("Getting Transit feeds")
get_transit_feeds(config_file, city_key)
print("Getting Census geographies")
subprocess.run(["Rscript", f"{CWD}/src/data/get_census_data.R","--config",config,"--city",city_key])
get_census(config_file, city_key)
print("Getting LODES data")
get_LODES(config_file, city_key)
print("Getting OSM data")
Expand Down
6 changes: 4 additions & 2 deletions analysis/src/process/process_walksheds.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import os
ox.config(log_console=True)
ox.__version__
import pickle
import json


Expand Down Expand Up @@ -43,7 +44,7 @@ def fix_walkshed(graph, polygons):
walksheds['osmid'] = unique_nodes
walkshed_lines_fixed = walkshed_lines.drop(columns=['geometry']).merge(walksheds,on='osmid').set_geometry(col='geometry')
poly_a = polygons[~polygons.id.isin(walkshed_lines_fixed.id)]
poly_fixed = poly_a.append(walkshed_lines_fixed)
poly_fixed = pd.concat([poly_a, walkshed_lines_fixed], ignore_index=True)

return poly_fixed

Expand Down Expand Up @@ -126,7 +127,8 @@ def create_walk_shed(points, graph, speed=4.5, trip_time=15, combine=False):
def process_walksheds(config, city_key):

graph_path = f"{config['base_path']}/cities/{city_key}/osm/walk_graph.gpickle"
graph = nx.read_gpickle(graph_path)
with open(graph_path, 'rb') as f:
graph = pickle.load(f)
out_path = f"{config['base_path']}/cities/{city_key}/osm/walksheds/"

if not os.path.isdir(out_path):
Expand Down

0 comments on commit c59bbf9

Please sign in to comment.