-
Notifications
You must be signed in to change notification settings - Fork 22
/
fetch_shapefiles.py
208 lines (167 loc) · 6.34 KB
/
fetch_shapefiles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
'''
This script will download TIGER data shapefiles from the Census FTP site.
It can be used to download a set of geographies defined in GEO_TYPES_LIST,
or can be used to fetch files for a single state and/or single geography type.
Pass an -s argument to limit by state, pass a -g argument to limit
to a single geography type, and/or pass a -y argument to change the year
from 2012 to something else (e.g. 2015).
>> python fetch_shapefiles.py
>> python fetch_shapefiles.py -s WA
>> python fetch_shapefiles.py -g place
>> python fetch_shapefiles.py -y 2015
>> python fetch_shapefiles.py -s WA -g place -y 2015
If you use the -s argument to fetch files for a single state, the script
will also download the national county, state and congressional district
files that include data for your chosen state.
The script will create DOWNLOAD_DIR and EXTRACT_DIR directories
if necessary, fetch a zipfile or set of zipfiles from the Census website,
then extract the shapefiles from each zipfile retrieved.
DISABLE_AUTO_DOWNLOADS will prevent certain geography types from being
automatically downloaded if no -g argument is passed to fetch_shapefiles.py.
This may be useful because certain files, such as those for Zip Code
Tabulation Areas, are extremely large. You can still target any geography
in GEO_TYPES_LIST specifically, however. So to fetch the ZCTA data:
>> python fetch_shapefiles.py -g zcta5
'''
import optparse
import os
import sys
import zipfile
from os.path import isdir, join, normpath
try:
from six.moves.urllib import request as urllib2
except ImportError:
import urllib2
from __init__ import (DOWNLOAD_DIR, EXTRACT_DIR, STATE_ABBREV_LIST,
GEO_TYPES_LIST, DISABLE_AUTO_DOWNLOADS,
get_fips_code_for_state)
FTP_HOME = 'ftp://ftp2.census.gov/geo/tiger/TIGER2012/'
def get_filename_list_from_ftp(target, state):
target_files = urllib2.urlopen(target).read().splitlines()
filename_list = []
for line in target_files:
filename = '%s%s' % (target, line.decode().split()[-1])
filename_list.append(filename)
if state:
state_check = '_%s_' % get_fips_code_for_state(state)
filename_list = filter(
lambda filename:
state_check in filename or
('_us_' in filename and
'_us_zcta5' not in filename),
filename_list
)
return filename_list
def get_content_length(u):
# u is returned by urllib2.urlopen
if sys.version_info[0] == 2:
return int(u.info().getheader("Content-Length"))
else:
return int(u.headers["Content-Length"])
def download_files_in_list(filename_list, force=False):
downloaded_filename_list = []
for file_location in filename_list:
filename = '%s/%s' % (DOWNLOAD_DIR, file_location.split('/')[-1])
if force or not os.path.exists(filename):
# Only download if required.
u = urllib2.urlopen(file_location)
f = open(filename, 'wb')
file_size = get_content_length(u)
print("Downloading: %s Bytes: %s" % (filename, file_size))
file_size_dl = 0
block_sz = 8192
while True:
buffer = u.read(block_sz)
if not buffer:
break
file_size_dl += len(buffer)
f.write(buffer)
status = r"%10d [%3.2f%%]" % (
file_size_dl, file_size_dl * 100. / file_size)
status = status + chr(8) * (len(status) + 1)
sys.stdout.write(status)
sys.stdout.flush()
f.close()
downloaded_filename_list.append(filename)
return downloaded_filename_list
def extract_downloaded_file(filename, remove_on_error=True):
zip_dir = filename.replace('.zip', '').split('/')[-1]
target_dir = normpath(join(EXTRACT_DIR, zip_dir))
print("Extracting: " + filename + " ...")
try:
zipped = zipfile.ZipFile(filename, 'r')
except zipfile.BadZipFile as ze:
if remove_on_error:
os.remove(filename)
raise Exception(
"Removed corrupt zip file (%s). Retry download." % filename)
raise ze
zipped.extractall(target_dir)
zipped.close()
def get_one_geo_type(geo_type, state=None, year='2012'):
target = '%s%s/' % (FTP_HOME.replace('2012', year), geo_type.upper())
print("Finding files in: " + target + " ...")
filename_list = get_filename_list_from_ftp(target, state)
downloaded_filename_list = download_files_in_list(filename_list)
for filename in downloaded_filename_list:
extract_downloaded_file(filename)
def get_all_geo_types(state=None, year='2012'):
AUTO_DOWNLOADS = filter(
lambda geo_type: geo_type not in DISABLE_AUTO_DOWNLOADS,
GEO_TYPES_LIST
)
for geo_type in AUTO_DOWNLOADS:
get_one_geo_type(geo_type, state, year)
def process_options(arglist=None):
global options, args
parser = optparse.OptionParser()
parser.add_option(
'-s', '--state',
dest='state',
help='specific state to download',
choices=STATE_ABBREV_LIST,
default=None
)
parser.add_option(
'-g', '--geo', '--geo_type',
dest='geo_type',
help='specific geographic type to download',
choices=GEO_TYPES_LIST,
default=None
)
parser.add_option(
'-y', '--year',
dest='year',
help='specific year to download',
default='2012'
)
options, args = parser.parse_args(arglist)
return options, args
def main(args=None):
"""
>> python fetch_shapefiles.py
>> python fetch_shapefiles.py -s WA
>> python fetch_shapefiles.py -g place
>> python fetch_shapefiles.py -s WA -g place
"""
if args is None:
args = sys.argv[1:]
options, args = process_options(args)
# make sure we have the expected directories
for path in [DOWNLOAD_DIR, EXTRACT_DIR]:
if not isdir(path):
os.makedirs(path)
# get one geo_type or all geo_types
if options.geo_type:
get_one_geo_type(
geo_type = options.geo_type,
state = options.state,
year=options.year
)
else:
get_all_geo_types(
state = options.state,
year=options.year
)
if __name__ == '__main__':
main()