Skip to content

Commit

Permalink
Trying to handle the differences in the 2017 5-year release
Browse files Browse the repository at this point in the history
  • Loading branch information
iandees committed Dec 21, 2018
1 parent 0020ee0 commit 6f64337
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 11 deletions.
27 changes: 21 additions & 6 deletions 02_download_acs_2017_5yr.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,28 @@ cd /mnt/tmp
mkdir -p acs2017_5yr
cd acs2017_5yr
sudo apt-get -y install unzip aria2
aria2c --dir=/mnt/tmp/acs2017_5yr --max-connection-per-server=5 --force-sequential=true \
"https://www2.census.gov/programs-surveys/acs/summary_file/2017/data/5_year_entire_sf/All_Geographies_Not_Tracts_Block_Groups.tar.gz" \
"https://www2.census.gov/programs-surveys/acs/summary_file/2017/data/5_year_entire_sf/Tracts_Block_Groups_Only.tar.gz" \
aria2c \
--allow-overwrite=true \
--auto-file-renaming=false \
--dir=/mnt/tmp/acs2017_5yr \
--max-connection-per-server=5 \
--force-sequential=true \
--header='Connection: keep-alive' \
--header='User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36' \
--header='Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8' \
--header='Accept-Encoding: gzip, deflate, br' \
--header='Accept-Language: en-US,en;q=0.9' \
"https://www2.census.gov/programs-surveys/acs/summary_file/2017/data/5_year_entire_sf/All_Geographies_Not_Tracts_Block_Groups.tar" \
"https://www2.census.gov/programs-surveys/acs/summary_file/2017/data/5_year_entire_sf/Tracts_Block_Groups_Only.tar" \
"https://www2.census.gov/programs-surveys/acs/summary_file/2017/data/5_year_entire_sf/2017_ACS_Geography_Files.zip" \
"https://www2.census.gov/programs-surveys/acs/summary_file/2017/documentation/user_tools/ACS_5yr_Seq_Table_Number_Lookup.txt"
tar -xzf All_Geographies_Not_Tracts_Block_Groups.tar.gz
tar -xzf Tracts_Block_Groups_Only.tar.gz
unzip -q 2017_ACS_Geography_Files.zip
# 2017 5 year release switched to .tar's of .zip's
tar -xf All_Geographies_Not_Tracts_Block_Groups.tar && rm All_Geographies_Not_Tracts_Block_Groups.tar
tar -xf Tracts_Block_Groups_Only.tar && rm Tracts_Block_Groups_Only.tar

# The .tar contains .zip's for each state, but they need to be placed in their own directory or the contents of the .zips will clobber each other
mkdir -p tracts_block_groups_only all_geographies_not_tracts_block_groups
for i in *_Tracts_Block_Groups_Only.zip; do unzip -q -d tracts_block_groups_only $i && rm $i; done
for i in *_All_Geographies_Not_Tracts_Block_Groups.zip; do unzip -q -d all_geographies_not_tracts_block_groups $i && rm $i; done

mv ACS_5yr_Seq_Table_Number_Lookup.txt Sequence_Number_and_Table_Number_Lookup.txt
10 changes: 5 additions & 5 deletions 03_import_acs_2017_5yr.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ fi
# Slurp in the actual data
# We're doing the COPY FROM STDIN so we don't have to be a psql superuser
echo "Importing geoheader"
cat /mnt/tmp/acs2017_5yr/geo/g20175*txt | psql -d census -h $PGHOST -U census -v ON_ERROR_STOP=1 -q -c "COPY acs2017_5yr.tmp_geoheader FROM STDIN WITH ENCODING 'latin1';"
cat /mnt/tmp/acs2017_5yr/all_geographies_not_tracts_block_groups/g20175*txt | psql -d census -h $PGHOST -U census -v ON_ERROR_STOP=1 -q -c "COPY acs2017_5yr.tmp_geoheader FROM STDIN WITH ENCODING 'latin1';"
if [[ $? != 0 ]]; then
echo "Failed importing geoheader."
exit 1
Expand All @@ -62,22 +62,22 @@ fi
for s in $(seq -f "%04g" 1 133)
do
echo "Importing sequence $s"
cat /mnt/tmp/acs2017_5yr/data/tab4/sumfile/prod/2012thru2017/group1/e20175[a-z][a-z]${s}*txt | psql -d census -h $PGHOST -U census -v ON_ERROR_STOP=1 -q -c "COPY acs2017_5yr.tmp_seq${s} FROM STDIN WITH CSV ENCODING 'latin1';"
cat /mnt/tmp/acs2017_5yr/tracts_block_groups_only/e20175[a-z][a-z]${s}*txt | psql -d census -h $PGHOST -U census -v ON_ERROR_STOP=1 -q -c "COPY acs2017_5yr.tmp_seq${s} FROM STDIN WITH CSV ENCODING 'latin1';"
if [[ $? != 0 ]]; then
echo "Failed importing sequences."
exit 1
fi
cat /mnt/tmp/acs2017_5yr/data/tab4/sumfile/prod/2012thru2017/group2/e20175[a-z][a-z]${s}*txt | psql -d census -h $PGHOST -U census -v ON_ERROR_STOP=1 -q -c "COPY acs2017_5yr.tmp_seq${s} FROM STDIN WITH CSV ENCODING 'latin1';"
cat /mnt/tmp/acs2017_5yr/tracts_block_groups_only/m20175[a-z][a-z]${s}*txt | psql -d census -h $PGHOST -U census -v ON_ERROR_STOP=1 -q -c "COPY acs2017_5yr.tmp_seq${s}_moe FROM STDIN WITH CSV ENCODING 'latin1';"
if [[ $? != 0 ]]; then
echo "Failed importing sequences."
exit 1
fi
cat /mnt/tmp/acs2017_5yr/data/tab4/sumfile/prod/2012thru2017/group1/m20175[a-z][a-z]${s}*txt | psql -d census -h $PGHOST -U census -v ON_ERROR_STOP=1 -q -c "COPY acs2017_5yr.tmp_seq${s}_moe FROM STDIN WITH CSV ENCODING 'latin1';"
cat /mnt/tmp/acs2017_5yr/all_geographies_not_tracts_block_groups/e20175[a-z][a-z]${s}*txt | psql -d census -h $PGHOST -U census -v ON_ERROR_STOP=1 -q -c "COPY acs2017_5yr.tmp_seq${s} FROM STDIN WITH CSV ENCODING 'latin1';"
if [[ $? != 0 ]]; then
echo "Failed importing sequences."
exit 1
fi
cat /mnt/tmp/acs2017_5yr/data/tab4/sumfile/prod/2012thru2017/group2/m20175[a-z][a-z]${s}*txt | psql -d census -h $PGHOST -U census -v ON_ERROR_STOP=1 -q -c "COPY acs2017_5yr.tmp_seq${s}_moe FROM STDIN WITH CSV ENCODING 'latin1';"
cat /mnt/tmp/acs2017_5yr/all_geographies_not_tracts_block_groups/m20175[a-z][a-z]${s}*txt | psql -d census -h $PGHOST -U census -v ON_ERROR_STOP=1 -q -c "COPY acs2017_5yr.tmp_seq${s}_moe FROM STDIN WITH CSV ENCODING 'latin1';"
if [[ $? != 0 ]]; then
echo "Failed importing sequences."
exit 1
Expand Down

0 comments on commit 6f64337

Please sign in to comment.