-
Notifications
You must be signed in to change notification settings - Fork 0
/
fldm2_map.py
64 lines (50 loc) · 3.53 KB
/
fldm2_map.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# Information
name = 'fldm2_map' # Inptu file name with topic, subtopic, and type
path = 'fracture-proof/version_2/fldm2/' # Input relative path to file
directory = '/home/drewc/GitHub/' # Input working directory
title = 'FractureProof v2.1 - Diabetes Mortality in Florida' # Input descriptive title
author = 'Andrew S. Cistola, MPH' # Input Author
## Create FL Map from Zip Code Data
### Import python libraries
import os # Operating system navigation
from datetime import datetime
from datetime import date
### Import data science libraries
import pandas as pd # Widely used data manipulation library with R/Excel like tables named 'data frames'
import numpy as np # Widely used matrix library for numerical processes
### Import Visualization Libraries
import matplotlib.pyplot as plt # Comprehensive graphing package in python
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import geopandas as gp # Simple mapping library for csv shape files with pandas like syntax for creating plots using matplotlib
import descartes # Mapping library for use with geopandas
import mapclassify # Mapping library for use with geopandas
### Set Directory
os.chdir(directory) # Set wd to project repository
### Set Timestamps
day = str(date.today())
stamp = str(datetime.now())
### Preprocess First Dataset
df_d1 = pd.read_csv('fracture-proof/version_2/_data/FDOH_5Y2018_ZCTA.csv') # Import first dataset saved as csv in _data folder
df_d1 = df_d1[df_d1['POPULATION'] > 500] # Susbet numeric column by condition
df_d1 = df_d1.filter(['K00_K99_R1000', 'ZCTA']) # Drop or filter columns to keep only feature values and idenitifer
df_d1 = df_d1.rename(columns = {'ZCTA': 'ID', 'K00_K99_R1000': 'quant'}) # Apply standard name to identifier and quantitative outcome
df_d1.info() # Get class, memory, and column info: names, data types, obs
### Geojoin outcome table with polygons
gdf_d1 = gp.read_file('fracture-proof/version_2/_data/cb_2018_us_zcta510_500k/cb_2018_us_zcta510_500k.shp') # Import shape files from folder with all other files downloaded
gdf_d1['ID'] = gdf_d1['ZCTA5CE10'].astype('str') # Change data type of column in data frame
gdf_d1['ID'] = gdf_d1['ID'].str.rjust(5, '0') # add leading zeros of character column using rjust() function
gdf_d1['ID'] = 'ZCTA' + gdf_d1['ID'] # Combine string with column
gdf_d1 = gdf_d1.filter(['ID', 'geometry']) # Keep only selected columns
gdf_d1 = pd.merge(gdf_d1, df_d1, on = 'ID', how = 'inner') # Geojoins can use pandas merge as long as geo data is first passed in function
gdf_d1 = gdf_d1.filter(['quant', 'geometry']) # Keep only selected columns
gdf_d1.info() # Get class, memory, and column info: names, data types, obs
### Create choropleth for FL
map = gdf_d1.plot(column = 'quant', cmap = 'Blues', figsize = (10, 10), scheme = 'equal_interval', k = 9, legend = True, legend_kwds={'title': 'Deaths per 1000', 'loc': 'center left'})
map.set_title('Diabetes Mortality Rates by Zip Code in Florida 204-2018', fontdict = {'fontsize': 16}, loc = 'center')
map.set_axis_off()
map.annotate('', xy = (0.5, 0.1), xytext = (0.5, 0.0), arrowprops = dict(arrowstyle = 'simple'), fontsize = 10, xycoords= 'axes fraction')
map.annotate('N', xy = (0.51, 0.01), xycoords = 'axes fraction', fontsize = 16)
map.annotate('Andrew S. Cistola, MPH', xy = (0.0, 0.15), xycoords = 'axes fraction', fontsize = 10)
map.annotate(stamp, xy = (0.0, 0.1), xycoords = 'axes fraction', fontsize = 10)
map.add_artist(AnchoredSizeBar(map.transData, 3, '300 km', loc = 'lower left'))
plt.savefig(path + '_fig/' + name + '.png', dpi = 1000, bbox_inches = 'tight')