From 40982d0e191f2ff905d5bacc464553b6bb36d9ac Mon Sep 17 00:00:00 2001 From: Amy-Xu Date: Wed, 6 Dec 2017 17:42:19 -0500 Subject: [PATCH 1/6] add tests for dist & agg --- cps_stage3/test_cps_benefits.py | 158 ++++++++++++++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 cps_stage3/test_cps_benefits.py diff --git a/cps_stage3/test_cps_benefits.py b/cps_stage3/test_cps_benefits.py new file mode 100644 index 00000000..4573591a --- /dev/null +++ b/cps_stage3/test_cps_benefits.py @@ -0,0 +1,158 @@ +import sys +import pandas as pd +import numpy as np +from pandas.util.testing import assert_frame_equal + +programs = ['ss', 'ssi', 'medicaid', 'medicare', 'vb', 'snap'] + +def read_files(): + ''' import weights, benefit, and raw cps file''' + + # import from taxdata repo + weights = pd.read_csv('../cps_stage2/cps_weights.csv.gz', compression='gzip') + cps_income = pd.read_csv('../cps_data/cps.csv.gz', compression='gzip')[['e00200', 's006']] + cps_benefit = pd.read_csv('cps_benefits.csv') + + assert len(cps_benefit) == len(weights) + assert len(cps_income) == len(cps_benefit) + + # merge all essential variables + cps = cps_benefit.join(cps_income) + cps = cps.join(weights) + + # rename to facilitate for loops + cps.rename(columns={'s006': 'WT2014'}, inplace=True) + + # create decile ranks by wage + cps = cps.sort_values(by='e00200') + cps['WT2015_cumsum'] = cps.WT2015.cumsum() + cps['WT2015_decile'] = np.ceil(cps.WT2015_cumsum/(max(cps.WT2015_cumsum)/9.99)) + + return cps + +def test_decile_dist(): + + ''' total participation, total benefits and average benefits + by decile + ''' + cps = read_files() + benefits_vars = [x + '_benefits_2015' for x in programs] + p_vars = [x + '_recipients_2015' for x in programs] + + + decile2015 = pd.DataFrame(np.linspace(1,10, num=10), columns=['2015_decile']) + delta = 1e06 + + for i in range(6): + + # create weighted benefit + cps[benefits_vars[i] + '_weighted'] = cps[benefits_vars[i]] * cps['WT2015'] + + # temporary variable for weighted participation + cps['dummy'] = np.where(cps[p_vars[i]]!=0, cps['WT2015'], 0) + + # calculate total benefits, participation (# tax units), and average per decile + bp = cps[[benefits_vars[i] + '_weighted', 'dummy']].groupby(cps.WT2015_decile, as_index=False).sum()/1000000 + bp['average'] = bp[benefits_vars[i] + '_weighted']/(bp['dummy'] + delta) + + # rename and save + bp.columns = [programs[i]+'_benefits', programs[i]+'_taxunits', programs[i]+'_average'] + decile2015 = pd.concat([decile2015, bp], axis=1) + + decile2015.to_csv('decile2015_new.csv', float_format='%.2f', index=False) + + decile_old = pd.read_csv('decile2015.csv') + assert_frame_equal(decile2015.round(1), decile_old) + + +def test_aggregates(): + + '''total individual & taxunit participation, total benefits from 2014-2026''' + + cps = read_files() + + benefits = pd.DataFrame(programs, columns=['programs']) + taxunits = pd.DataFrame(programs, columns=['programs']) + participants = pd.DataFrame(programs, columns=['programs']) + + for year in range(2014, 2025): + #benefits + benefits_vars = [x + '_benefits_' + str(year) for x in programs] + raw_benefits = cps.loc[:,benefits_vars] + weighted_benefits = raw_benefits.multiply(cps['WT' + str(year)], axis='index') + benefit_total = pd.DataFrame(weighted_benefits.sum()/1000000000) + benefits[year] = benefit_total.values + + #participants + p_vars = [x + '_recipients_'+ str(year) for x in programs] + raw_participants = cps.loc[:, p_vars] + weighted_par = raw_participants.multiply(cps['WT' + str(year)], axis='index') + participant_total = pd.DataFrame(weighted_par.sum()/1000000) + participants[year] = participant_total.values + + # tax units + dummy = raw_participants.astype(bool) + weighted_taxunits = dummy.multiply(cps['WT' + str(year)], axis='index') + taxunit_total = pd.DataFrame(weighted_taxunits.sum()/1000000) + taxunits[year] = taxunit_total.values + + pd.options.display.float_format = '{:,.1f}'.format + with open('aggregates_new.txt', 'w') as file: + file.write("Total benefits (billions)\n" + benefits.to_string(index=False) + '\n\n') + file.write('Total participating tax units (millions)\n' + taxunits.to_string(index=False) + '\n\n') + file.write('Total participants (millions)\n' + participants.to_string(index=False) + '\n\n') + + # import the current version + agg_old = pd.read_csv('aggregates.txt', delim_whitespace=True, skiprows=[0,9,18], thousands=',') + agg_old.columns = ['programs'] + list(range(2014, 2025)) + + benefits_old = agg_old.loc[0:5] + assert_frame_equal(benefits.round(1), benefits_old) + + taxunits_old = agg_old.loc[7:12].reset_index().drop(['index'], axis=1) + assert_frame_equal(taxunits.round(1), taxunits_old) + + participants_old = agg_old.loc[14:19].reset_index().drop(['index'], axis=1) + assert_frame_equal(participants.round(1), participants_old) + + +def test_tabs(): + + ''' tabulation of number of participants per tax unit from 2014 to 2026''' + + tabs = {} + cps = read_files() + + # inline function to create single year program tabulation + p_tab = lambda program: cps[program].value_counts() + + for program in programs: + program_tab = {} + for year in range(2014, 2025): + program_tab[year] = p_tab(program+"_recipients_"+str(year)) + program_tab = pd.DataFrame(program_tab) + + tabs[program] = program_tab + + with open('tabs_new.txt', 'w') as file: + for key, dfs in tabs.iteritems(): + file.write(key + '\n') + file.write(dfs.to_string() + '\n\n') + + tabs_old = pd.read_csv('tabs.txt', delim_whitespace=True, + names=['index'] + list(range(2014, 2025))) + tabs_old = tabs_old[tabs_old['index']!='2014'] + + for program in programs: + + unitmax = len(tabs[program]) + start_row = (tabs_old.index[tabs_old['index']==program] + 1).values[0] + end_row = start_row + unitmax + + participation_new = tabs_old.loc[start_row: end_row] + participation_new = participation_new.reset_index().drop(['level_0'], axis=1) + + assert_frame_equal(participation_new.astype(int), tabs[program].reset_index(), + check_column_type=False, check_index_type=False) + + From bfb66730a004e52c021a3f1bd8872bf3a4fe17ab Mon Sep 17 00:00:00 2001 From: Amy-Xu Date: Thu, 7 Dec 2017 13:50:57 -0500 Subject: [PATCH 2/6] modified for the updated benefit file --- cps_stage3/test_cps_benefits.py | 37 ++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/cps_stage3/test_cps_benefits.py b/cps_stage3/test_cps_benefits.py index 4573591a..adb45ca2 100644 --- a/cps_stage3/test_cps_benefits.py +++ b/cps_stage3/test_cps_benefits.py @@ -4,20 +4,25 @@ from pandas.util.testing import assert_frame_equal programs = ['ss', 'ssi', 'medicaid', 'medicare', 'vb', 'snap'] +billion = 10e9 +million = 10e6 def read_files(): ''' import weights, benefit, and raw cps file''' # import from taxdata repo + # weights and wage are for 10-year and decile tables weights = pd.read_csv('../cps_stage2/cps_weights.csv.gz', compression='gzip') - cps_income = pd.read_csv('../cps_data/cps.csv.gz', compression='gzip')[['e00200', 's006']] - cps_benefit = pd.read_csv('cps_benefits.csv') + cps_income = pd.read_csv('../cps_data/cps.csv.gz', + compression='gzip')[['e00200', 's006', 'RECID']] + # the benefit file that includes both benefits and recipients + cps_benefit = pd.read_csv('cps_benefits_extrap_full.csv.gz') - assert len(cps_benefit) == len(weights) - assert len(cps_income) == len(cps_benefit) + assert len(cps_income) == len(weights) # merge all essential variables - cps = cps_benefit.join(cps_income) + cps = cps_income.merge(cps_benefit, on='RECID', how='left') + cps.fillna(0, inplace=True) cps = cps.join(weights) # rename to facilitate for loops @@ -52,14 +57,15 @@ def test_decile_dist(): cps['dummy'] = np.where(cps[p_vars[i]]!=0, cps['WT2015'], 0) # calculate total benefits, participation (# tax units), and average per decile - bp = cps[[benefits_vars[i] + '_weighted', 'dummy']].groupby(cps.WT2015_decile, as_index=False).sum()/1000000 + bp = cps[[benefits_vars[i] + '_weighted', 'dummy']].groupby(cps.WT2015_decile, + as_index=False).sum()/million bp['average'] = bp[benefits_vars[i] + '_weighted']/(bp['dummy'] + delta) # rename and save bp.columns = [programs[i]+'_benefits', programs[i]+'_taxunits', programs[i]+'_average'] decile2015 = pd.concat([decile2015, bp], axis=1) - decile2015.to_csv('decile2015_new.csv', float_format='%.2f', index=False) + decile2015.to_csv('decile2015_new.csv', float_format='%.1f', index=False) decile_old = pd.read_csv('decile2015.csv') assert_frame_equal(decile2015.round(1), decile_old) @@ -80,20 +86,20 @@ def test_aggregates(): benefits_vars = [x + '_benefits_' + str(year) for x in programs] raw_benefits = cps.loc[:,benefits_vars] weighted_benefits = raw_benefits.multiply(cps['WT' + str(year)], axis='index') - benefit_total = pd.DataFrame(weighted_benefits.sum()/1000000000) + benefit_total = pd.DataFrame(weighted_benefits.sum()/billion) benefits[year] = benefit_total.values #participants p_vars = [x + '_recipients_'+ str(year) for x in programs] raw_participants = cps.loc[:, p_vars] weighted_par = raw_participants.multiply(cps['WT' + str(year)], axis='index') - participant_total = pd.DataFrame(weighted_par.sum()/1000000) + participant_total = pd.DataFrame(weighted_par.sum()/million) participants[year] = participant_total.values # tax units dummy = raw_participants.astype(bool) weighted_taxunits = dummy.multiply(cps['WT' + str(year)], axis='index') - taxunit_total = pd.DataFrame(weighted_taxunits.sum()/1000000) + taxunit_total = pd.DataFrame(weighted_taxunits.sum()/million) taxunits[year] = taxunit_total.values pd.options.display.float_format = '{:,.1f}'.format @@ -131,8 +137,8 @@ def test_tabs(): for year in range(2014, 2025): program_tab[year] = p_tab(program+"_recipients_"+str(year)) program_tab = pd.DataFrame(program_tab) - - tabs[program] = program_tab + program_tab.fillna(0, inplace=True) + tabs[program] = program_tab.astype(int) with open('tabs_new.txt', 'w') as file: for key, dfs in tabs.iteritems(): @@ -149,10 +155,11 @@ def test_tabs(): start_row = (tabs_old.index[tabs_old['index']==program] + 1).values[0] end_row = start_row + unitmax - participation_new = tabs_old.loc[start_row: end_row] - participation_new = participation_new.reset_index().drop(['level_0'], axis=1) + participation_old = tabs_old.loc[start_row: end_row] + participation_old = participation_old.reset_index().drop(['level_0'], axis=1) - assert_frame_equal(participation_new.astype(int), tabs[program].reset_index(), + assert_frame_equal(participation_old.astype(float), + tabs[program].reset_index().astype(float), check_column_type=False, check_index_type=False) From 535f478fc952464d3fde4627e92facaff7c352a1 Mon Sep 17 00:00:00 2001 From: Amy-Xu Date: Thu, 7 Dec 2017 13:51:25 -0500 Subject: [PATCH 3/6] summary files --- cps_stage3/aggregates.txt | 27 ++++++++++++++++ cps_stage3/decile2015.csv | 11 +++++++ cps_stage3/tabs.txt | 68 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+) create mode 100644 cps_stage3/aggregates.txt create mode 100644 cps_stage3/decile2015.csv create mode 100644 cps_stage3/tabs.txt diff --git a/cps_stage3/aggregates.txt b/cps_stage3/aggregates.txt new file mode 100644 index 00000000..c5d7289f --- /dev/null +++ b/cps_stage3/aggregates.txt @@ -0,0 +1,27 @@ +Total benefits (billions) +programs 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 + ss 84.9 9,016.7 9,354.7 9,766.6 10,490.9 11,239.9 12,036.5 12,830.5 13,719.6 14,680.9 15,703.3 + ssi 5.4 547.7 549.1 552.6 570.3 589.1 608.7 628.4 647.8 669.2 690.8 +medicaid 36.9 4,127.6 3,840.9 3,806.6 3,923.1 3,916.4 3,916.8 3,912.4 3,910.5 3,909.1 3,910.2 +medicare 57.6 6,024.5 6,298.8 6,679.0 7,183.1 7,779.4 8,416.6 9,071.1 9,785.5 10,562.1 11,357.7 + vb 14.7 1,523.0 1,582.1 1,589.9 1,598.3 1,607.1 1,616.7 1,626.8 1,637.7 1,649.3 1,661.6 + snap 8.3 825.7 789.5 789.5 789.5 789.5 789.5 789.5 789.5 789.5 789.5 + +Total participating tax units (millions) +programs 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 + ss 4.5 451.6 458.1 464.6 471.3 478.1 484.9 491.5 498.9 506.1 513.4 + ssi 0.7 68.4 68.5 68.5 68.6 69.6 69.3 70.3 70.0 70.9 71.0 +medicaid 2.8 297.0 308.2 312.4 316.6 325.0 329.4 333.8 338.3 342.9 347.5 +medicare 3.9 396.1 409.8 421.4 433.4 446.2 461.0 476.0 491.1 505.4 519.3 + vb 0.5 49.1 45.8 45.8 45.9 46.0 46.1 46.1 46.2 46.3 46.3 + snap 2.9 279.7 267.7 266.9 265.6 264.4 263.3 262.1 260.9 259.5 258.1 + +Total participants (millions) +programs 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 + ss 5.7 577.7 593.8 609.1 624.6 639.9 655.4 664.2 690.1 709.4 728.7 + ssi 0.8 75.5 75.4 75.2 75.2 76.2 76.0 77.0 76.7 77.7 77.8 +medicaid 5.2 563.6 581.4 589.2 597.2 611.9 620.2 628.6 637.0 645.6 654.4 +medicare 5.0 506.6 523.5 538.5 554.1 570.3 587.2 604.3 621.6 638.5 654.9 + vb 0.5 49.9 46.4 46.5 46.6 46.7 46.7 46.8 46.9 46.9 47.0 + snap 4.3 421.9 407.6 407.6 407.6 407.6 407.6 407.6 407.6 407.6 407.6 + diff --git a/cps_stage3/decile2015.csv b/cps_stage3/decile2015.csv new file mode 100644 index 00000000..cb14f8da --- /dev/null +++ b/cps_stage3/decile2015.csv @@ -0,0 +1,11 @@ +2015_decile,ss_benefits,ss_taxunits,ss_average,ssi_benefits,ssi_taxunits,ssi_average,medicaid_benefits,medicaid_taxunits,medicaid_average,medicare_benefits,medicare_taxunits,medicare_average,vb_benefits,vb_taxunits,vb_average,snap_benefits,snap_taxunits,snap_average +1.0,2356481.6,118.3,2.4,154464.2,19.1,0.2,598590.7,42.2,0.6,1725226.7,109.2,1.7,365777.6,12.1,0.4,145995.5,58.4,0.1 +2.0,2411408.0,120.5,2.4,157525.7,19.9,0.2,820662.6,41.7,0.8,1868043.5,109.6,1.9,329554.2,10.3,0.3,138925.4,58.0,0.1 +3.0,1783238.9,91.1,1.8,127787.3,16.2,0.1,687520.4,45.1,0.7,1307569.7,83.0,1.3,248260.4,7.9,0.2,127714.4,51.3,0.1 +4.0,519415.4,25.5,0.5,31510.3,3.9,0.0,497498.9,41.3,0.5,289250.9,21.1,0.3,65792.8,2.2,0.1,155344.1,45.3,0.2 +5.0,406772.9,20.8,0.4,28972.0,3.5,0.0,481824.6,38.9,0.5,196159.7,16.1,0.2,67573.9,2.3,0.1,135604.2,38.2,0.1 +6.0,342122.4,17.9,0.3,23688.3,2.9,0.0,379137.5,29.9,0.4,168967.5,13.7,0.2,72378.6,2.4,0.1,74535.5,18.1,0.1 +7.0,340631.7,16.9,0.3,17511.1,2.2,0.0,267361.9,22.2,0.3,154586.2,12.6,0.2,80965.4,2.7,0.1,32853.3,7.4,0.0 +8.0,335573.5,16.7,0.3,6008.1,0.7,0.0,180877.7,15.8,0.2,148036.4,12.5,0.1,100201.4,3.3,0.1,12601.8,2.5,0.0 +9.0,274467.2,13.1,0.3,187.1,0.0,0.0,129812.1,11.5,0.1,90364.1,10.0,0.1,103412.4,3.2,0.1,2097.1,0.4,0.0 +10.0,246626.0,10.8,0.2,0.0,0.0,0.0,84352.4,8.4,0.1,76339.9,8.5,0.1,89057.8,2.8,0.1,30.1,0.0,0.0 diff --git a/cps_stage3/tabs.txt b/cps_stage3/tabs.txt new file mode 100644 index 00000000..4a473f83 --- /dev/null +++ b/cps_stage3/tabs.txt @@ -0,0 +1,68 @@ +vb + 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 +0.0 440860 440860 441980 442081 442213 442327 442487 442570 442685 442755 442864 +1.0 15394 15394 14322 14222 14091 13978 13822 13741 13627 13557 13450 +2.0 210 210 162 161 160 159 156 154 153 153 151 +3.0 1 1 1 1 1 1 0 0 0 0 0 + +ss + 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 +0.0 328317 326525 326111 325606 325403 325331 325242 325242 324970 324803 324728 +1.0 83781 84533 84328 84234 84211 84192 84185 84185 84139 84113 84097 +2.0 44108 44607 44462 44418 44394 44391 44389 44389 44366 44361 44340 +3.0 258 473 472 474 475 474 473 473 473 473 473 +4.0 1 68 68 68 69 69 69 69 66 65 65 + +medicaid + 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 +0.0 392005 388485 387573 387573 387573 386957 386957 386957 386957 386957 386957 +1.0 32353 34347 34984 34984 34984 35390 35390 35390 35390 35390 35390 +2.0 16027 16313 16503 16503 16503 16602 16602 16602 16602 16602 16602 +3.0 7810 8183 8240 8240 8240 8271 8271 8271 8271 8271 8271 +4.0 4910 5337 5358 5358 5358 5408 5408 5408 5408 5408 5408 +5.0 2085 2346 2352 2352 2352 2371 2371 2371 2371 2371 2371 +6.0 775 871 872 872 872 877 877 877 877 877 877 +7.0 351 419 419 419 419 425 425 425 425 425 425 +8.0 86 97 97 97 97 96 96 96 96 96 96 +9.0 30 33 33 33 33 34 34 34 34 34 34 +11.0 20 21 21 21 21 21 21 21 21 21 21 +10.0 10 9 9 9 9 9 9 9 9 9 9 +12.0 2 3 3 3 3 3 3 3 3 3 3 +14.0 1 1 1 1 1 1 1 1 1 1 1 + +medicare + 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 +0.0 344758 342573 339832 337965 336186 334385 332111 329739 327392 325338 323332 +1.0 71106 73109 75094 76324 77319 78511 80682 82909 85152 87003 88823 +2.0 40148 40310 41060 41693 42471 43073 43172 43311 43412 43605 43784 +3.0 349 369 359 363 369 376 380 386 388 395 402 +4.0 49 49 65 64 64 64 64 64 65 67 67 +7.0 31 31 31 31 31 31 31 31 31 32 32 +5.0 16 16 16 17 17 16 16 16 16 16 16 +6.0 7 7 7 7 7 8 8 8 8 8 8 +8.0 1 1 1 1 1 1 1 1 1 1 1 + +snap + 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 +0.0 408953 410079 412791 413483 414187 414915 415626 416229 416892 417594 418304 +1.0 34020 33272 31160 30597 29967 29322 28736 28205 27631 26976 26322 +2.0 6932 6675 6251 6168 6113 6054 5952 5903 5853 5830 5796 +3.0 2811 2719 2608 2575 2563 2550 2534 2518 2503 2493 2477 +4.0 1919 1901 1859 1852 1848 1840 1836 1830 1810 1802 1798 +5.0 1083 1076 1056 1051 1050 1047 1044 1044 1042 1039 1037 +6.0 479 475 472 472 471 471 471 470 469 467 467 +7.0 175 175 175 174 174 174 174 174 173 172 172 +8.0 58 58 58 58 57 57 57 57 57 57 57 +9.0 26 26 26 26 26 26 26 26 26 26 26 +11.0 5 5 5 5 5 5 5 5 5 5 5 +10.0 3 3 3 3 3 3 3 3 3 3 3 +12.0 1 1 1 1 1 1 1 1 1 1 1 + +ssi + 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 +0.0 443520 443520 443771 443995 444219 444219 444463 444463 444705 444705 444851 +1.0 11643 11643 11443 11262 11080 11080 10876 10876 10646 10646 10507 +2.0 1206 1206 1164 1134 1099 1099 1059 1059 1048 1048 1042 +3.0 89 89 81 69 62 62 62 62 61 61 60 +4.0 7 7 6 5 5 5 5 5 5 5 5 + From 441b9f49f00466da31fcf480d98d3b0d7de70c63 Mon Sep 17 00:00:00 2001 From: Amy-Xu Date: Thu, 7 Dec 2017 14:02:20 -0500 Subject: [PATCH 4/6] fixed delta and updated decile table --- cps_stage3/decile2015.csv | 20 ++++++++++---------- cps_stage3/test_cps_benefits.py | 9 +++------ 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/cps_stage3/decile2015.csv b/cps_stage3/decile2015.csv index cb14f8da..9b827f26 100644 --- a/cps_stage3/decile2015.csv +++ b/cps_stage3/decile2015.csv @@ -1,11 +1,11 @@ 2015_decile,ss_benefits,ss_taxunits,ss_average,ssi_benefits,ssi_taxunits,ssi_average,medicaid_benefits,medicaid_taxunits,medicaid_average,medicare_benefits,medicare_taxunits,medicare_average,vb_benefits,vb_taxunits,vb_average,snap_benefits,snap_taxunits,snap_average -1.0,2356481.6,118.3,2.4,154464.2,19.1,0.2,598590.7,42.2,0.6,1725226.7,109.2,1.7,365777.6,12.1,0.4,145995.5,58.4,0.1 -2.0,2411408.0,120.5,2.4,157525.7,19.9,0.2,820662.6,41.7,0.8,1868043.5,109.6,1.9,329554.2,10.3,0.3,138925.4,58.0,0.1 -3.0,1783238.9,91.1,1.8,127787.3,16.2,0.1,687520.4,45.1,0.7,1307569.7,83.0,1.3,248260.4,7.9,0.2,127714.4,51.3,0.1 -4.0,519415.4,25.5,0.5,31510.3,3.9,0.0,497498.9,41.3,0.5,289250.9,21.1,0.3,65792.8,2.2,0.1,155344.1,45.3,0.2 -5.0,406772.9,20.8,0.4,28972.0,3.5,0.0,481824.6,38.9,0.5,196159.7,16.1,0.2,67573.9,2.3,0.1,135604.2,38.2,0.1 -6.0,342122.4,17.9,0.3,23688.3,2.9,0.0,379137.5,29.9,0.4,168967.5,13.7,0.2,72378.6,2.4,0.1,74535.5,18.1,0.1 -7.0,340631.7,16.9,0.3,17511.1,2.2,0.0,267361.9,22.2,0.3,154586.2,12.6,0.2,80965.4,2.7,0.1,32853.3,7.4,0.0 -8.0,335573.5,16.7,0.3,6008.1,0.7,0.0,180877.7,15.8,0.2,148036.4,12.5,0.1,100201.4,3.3,0.1,12601.8,2.5,0.0 -9.0,274467.2,13.1,0.3,187.1,0.0,0.0,129812.1,11.5,0.1,90364.1,10.0,0.1,103412.4,3.2,0.1,2097.1,0.4,0.0 -10.0,246626.0,10.8,0.2,0.0,0.0,0.0,84352.4,8.4,0.1,76339.9,8.5,0.1,89057.8,2.8,0.1,30.1,0.0,0.0 +1.0,2356481.6,118.3,19921.4,154464.2,19.1,8107.0,598590.7,42.2,14176.1,1725226.7,109.2,15795.9,365777.6,12.1,30144.8,145995.5,58.4,2497.8 +2.0,2411408.0,120.5,20015.0,157525.7,19.9,7897.6,820662.6,41.7,19685.8,1868043.5,109.6,17045.8,329554.2,10.3,32144.8,138925.4,58.0,2393.2 +3.0,1783238.9,91.1,19583.3,127787.3,16.2,7887.8,687520.4,45.1,15243.9,1307569.7,83.0,15761.9,248260.4,7.9,31511.8,127714.4,51.3,2490.8 +4.0,519415.4,25.5,20342.1,31510.3,3.9,8036.8,497498.9,41.3,12045.7,289250.9,21.1,13732.1,65792.8,2.2,30099.9,155344.1,45.3,3427.0 +5.0,406772.9,20.8,19530.2,28972.0,3.5,8313.7,481824.6,38.9,12390.0,196159.7,16.1,12192.1,67573.9,2.3,29786.4,135604.2,38.2,3547.3 +6.0,342122.4,17.9,19104.6,23688.3,2.9,8174.8,379137.5,29.9,12682.0,168967.5,13.7,12358.4,72378.6,2.4,30033.3,74535.5,18.1,4120.2 +7.0,340631.7,16.9,20112.0,17511.1,2.2,8117.0,267361.9,22.2,12041.1,154586.2,12.6,12305.4,80965.4,2.7,30167.1,32853.3,7.4,4434.1 +8.0,335573.5,16.7,20101.1,6008.1,0.7,8645.3,180877.7,15.8,11424.3,148036.4,12.5,11869.2,100201.4,3.3,30366.0,12601.8,2.5,5091.7 +9.0,274467.2,13.1,21001.4,187.1,0.0,9282.4,129812.1,11.5,11289.6,90364.1,10.0,9069.6,103412.4,3.2,32213.4,2097.1,0.4,5714.2 +10.0,246626.0,10.8,22787.3,0.0,0.0,0.0,84352.4,8.4,10073.5,76339.9,8.5,8941.8,89057.8,2.8,32209.2,30.1,0.0,1910.3 diff --git a/cps_stage3/test_cps_benefits.py b/cps_stage3/test_cps_benefits.py index adb45ca2..c199c730 100644 --- a/cps_stage3/test_cps_benefits.py +++ b/cps_stage3/test_cps_benefits.py @@ -6,6 +6,7 @@ programs = ['ss', 'ssi', 'medicaid', 'medicare', 'vb', 'snap'] billion = 10e9 million = 10e6 +delta = 1e-06 def read_files(): ''' import weights, benefit, and raw cps file''' @@ -35,18 +36,17 @@ def read_files(): return cps +cps = read_files() + def test_decile_dist(): ''' total participation, total benefits and average benefits by decile ''' - cps = read_files() benefits_vars = [x + '_benefits_2015' for x in programs] p_vars = [x + '_recipients_2015' for x in programs] - decile2015 = pd.DataFrame(np.linspace(1,10, num=10), columns=['2015_decile']) - delta = 1e06 for i in range(6): @@ -74,8 +74,6 @@ def test_decile_dist(): def test_aggregates(): '''total individual & taxunit participation, total benefits from 2014-2026''' - - cps = read_files() benefits = pd.DataFrame(programs, columns=['programs']) taxunits = pd.DataFrame(programs, columns=['programs']) @@ -127,7 +125,6 @@ def test_tabs(): ''' tabulation of number of participants per tax unit from 2014 to 2026''' tabs = {} - cps = read_files() # inline function to create single year program tabulation p_tab = lambda program: cps[program].value_counts() From ebc9478c495b77c59ea0f400b2ae9a017d05178b Mon Sep 17 00:00:00 2001 From: Amy-Xu Date: Thu, 7 Dec 2017 15:08:44 -0500 Subject: [PATCH 5/6] fixed global typo & divide weight by 100 --- cps_stage3/aggregates.txt | 38 ++++++++++++++++----------------- cps_stage3/test_cps_benefits.py | 12 ++++++----- 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/cps_stage3/aggregates.txt b/cps_stage3/aggregates.txt index c5d7289f..30e4a7a0 100644 --- a/cps_stage3/aggregates.txt +++ b/cps_stage3/aggregates.txt @@ -1,27 +1,27 @@ Total benefits (billions) -programs 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 - ss 84.9 9,016.7 9,354.7 9,766.6 10,490.9 11,239.9 12,036.5 12,830.5 13,719.6 14,680.9 15,703.3 - ssi 5.4 547.7 549.1 552.6 570.3 589.1 608.7 628.4 647.8 669.2 690.8 -medicaid 36.9 4,127.6 3,840.9 3,806.6 3,923.1 3,916.4 3,916.8 3,912.4 3,910.5 3,909.1 3,910.2 -medicare 57.6 6,024.5 6,298.8 6,679.0 7,183.1 7,779.4 8,416.6 9,071.1 9,785.5 10,562.1 11,357.7 - vb 14.7 1,523.0 1,582.1 1,589.9 1,598.3 1,607.1 1,616.7 1,626.8 1,637.7 1,649.3 1,661.6 - snap 8.3 825.7 789.5 789.5 789.5 789.5 789.5 789.5 789.5 789.5 789.5 +programs 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 + ss 849.2 901.7 935.5 976.7 1,049.1 1,124.0 1,203.6 1,283.0 1,372.0 1,468.1 1,570.3 + ssi 54.1 54.8 54.9 55.3 57.0 58.9 60.9 62.8 64.8 66.9 69.1 +medicaid 368.6 412.8 384.1 380.7 392.3 391.6 391.7 391.2 391.1 390.9 391.0 +medicare 576.1 602.5 629.9 667.9 718.3 777.9 841.7 907.1 978.5 1,056.2 1,135.8 + vb 146.8 152.3 158.2 159.0 159.8 160.7 161.7 162.7 163.8 164.9 166.2 + snap 83.0 82.6 79.0 79.0 79.0 79.0 79.0 79.0 79.0 79.0 79.0 Total participating tax units (millions) programs 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 - ss 4.5 451.6 458.1 464.6 471.3 478.1 484.9 491.5 498.9 506.1 513.4 - ssi 0.7 68.4 68.5 68.5 68.6 69.6 69.3 70.3 70.0 70.9 71.0 -medicaid 2.8 297.0 308.2 312.4 316.6 325.0 329.4 333.8 338.3 342.9 347.5 -medicare 3.9 396.1 409.8 421.4 433.4 446.2 461.0 476.0 491.1 505.4 519.3 - vb 0.5 49.1 45.8 45.8 45.9 46.0 46.1 46.1 46.2 46.3 46.3 - snap 2.9 279.7 267.7 266.9 265.6 264.4 263.3 262.1 260.9 259.5 258.1 + ss 44.6 45.2 45.8 46.5 47.1 47.8 48.5 49.1 49.9 50.6 51.3 + ssi 6.8 6.8 6.8 6.9 6.9 7.0 6.9 7.0 7.0 7.1 7.1 +medicaid 27.9 29.7 30.8 31.2 31.7 32.5 32.9 33.4 33.8 34.3 34.8 +medicare 38.6 39.6 41.0 42.1 43.3 44.6 46.1 47.6 49.1 50.5 51.9 + vb 4.9 4.9 4.6 4.6 4.6 4.6 4.6 4.6 4.6 4.6 4.6 + snap 28.5 28.0 26.8 26.7 26.6 26.4 26.3 26.2 26.1 26.0 25.8 Total participants (millions) programs 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 - ss 5.7 577.7 593.8 609.1 624.6 639.9 655.4 664.2 690.1 709.4 728.7 - ssi 0.8 75.5 75.4 75.2 75.2 76.2 76.0 77.0 76.7 77.7 77.8 -medicaid 5.2 563.6 581.4 589.2 597.2 611.9 620.2 628.6 637.0 645.6 654.4 -medicare 5.0 506.6 523.5 538.5 554.1 570.3 587.2 604.3 621.6 638.5 654.9 - vb 0.5 49.9 46.4 46.5 46.6 46.7 46.7 46.8 46.9 46.9 47.0 - snap 4.3 421.9 407.6 407.6 407.6 407.6 407.6 407.6 407.6 407.6 407.6 + ss 56.5 57.8 59.4 60.9 62.5 64.0 65.5 66.4 69.0 70.9 72.9 + ssi 7.6 7.6 7.5 7.5 7.5 7.6 7.6 7.7 7.7 7.8 7.8 +medicaid 52.4 56.4 58.1 58.9 59.7 61.2 62.0 62.9 63.7 64.6 65.4 +medicare 49.6 50.7 52.4 53.8 55.4 57.0 58.7 60.4 62.2 63.8 65.5 + vb 5.0 5.0 4.6 4.7 4.7 4.7 4.7 4.7 4.7 4.7 4.7 + snap 43.0 42.2 40.8 40.8 40.8 40.8 40.8 40.8 40.8 40.8 40.8 diff --git a/cps_stage3/test_cps_benefits.py b/cps_stage3/test_cps_benefits.py index c199c730..7605e1fc 100644 --- a/cps_stage3/test_cps_benefits.py +++ b/cps_stage3/test_cps_benefits.py @@ -4,8 +4,8 @@ from pandas.util.testing import assert_frame_equal programs = ['ss', 'ssi', 'medicaid', 'medicare', 'vb', 'snap'] -billion = 10e9 -million = 10e6 +billion = 1e09 +million = 1e06 delta = 1e-06 def read_files(): @@ -24,7 +24,7 @@ def read_files(): # merge all essential variables cps = cps_income.merge(cps_benefit, on='RECID', how='left') cps.fillna(0, inplace=True) - cps = cps.join(weights) + cps = cps.join(weights/100) # rename to facilitate for loops cps.rename(columns={'s006': 'WT2014'}, inplace=True) @@ -57,8 +57,10 @@ def test_decile_dist(): cps['dummy'] = np.where(cps[p_vars[i]]!=0, cps['WT2015'], 0) # calculate total benefits, participation (# tax units), and average per decile - bp = cps[[benefits_vars[i] + '_weighted', 'dummy']].groupby(cps.WT2015_decile, - as_index=False).sum()/million + variables = [benefits_vars[i] + '_weighted', 'dummy'] + bp = cps[variables].groupby(cps.WT2015_decile, as_index=False).sum() + + bp['average'] = bp[benefits_vars[i] + '_weighted']/(bp['dummy'] + delta) # rename and save From 9e7949f5573bb3b7c259a09c526e2971fd75bcc0 Mon Sep 17 00:00:00 2001 From: Amy-Xu Date: Thu, 7 Dec 2017 16:30:41 -0500 Subject: [PATCH 6/6] added docs --- cps_stage3/decile2015.csv | 20 ++++++++--------- cps_stage3/test_cps_benefits.py | 40 +++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 10 deletions(-) diff --git a/cps_stage3/decile2015.csv b/cps_stage3/decile2015.csv index 9b827f26..6d10e0bd 100644 --- a/cps_stage3/decile2015.csv +++ b/cps_stage3/decile2015.csv @@ -1,11 +1,11 @@ 2015_decile,ss_benefits,ss_taxunits,ss_average,ssi_benefits,ssi_taxunits,ssi_average,medicaid_benefits,medicaid_taxunits,medicaid_average,medicare_benefits,medicare_taxunits,medicare_average,vb_benefits,vb_taxunits,vb_average,snap_benefits,snap_taxunits,snap_average -1.0,2356481.6,118.3,19921.4,154464.2,19.1,8107.0,598590.7,42.2,14176.1,1725226.7,109.2,15795.9,365777.6,12.1,30144.8,145995.5,58.4,2497.8 -2.0,2411408.0,120.5,20015.0,157525.7,19.9,7897.6,820662.6,41.7,19685.8,1868043.5,109.6,17045.8,329554.2,10.3,32144.8,138925.4,58.0,2393.2 -3.0,1783238.9,91.1,19583.3,127787.3,16.2,7887.8,687520.4,45.1,15243.9,1307569.7,83.0,15761.9,248260.4,7.9,31511.8,127714.4,51.3,2490.8 -4.0,519415.4,25.5,20342.1,31510.3,3.9,8036.8,497498.9,41.3,12045.7,289250.9,21.1,13732.1,65792.8,2.2,30099.9,155344.1,45.3,3427.0 -5.0,406772.9,20.8,19530.2,28972.0,3.5,8313.7,481824.6,38.9,12390.0,196159.7,16.1,12192.1,67573.9,2.3,29786.4,135604.2,38.2,3547.3 -6.0,342122.4,17.9,19104.6,23688.3,2.9,8174.8,379137.5,29.9,12682.0,168967.5,13.7,12358.4,72378.6,2.4,30033.3,74535.5,18.1,4120.2 -7.0,340631.7,16.9,20112.0,17511.1,2.2,8117.0,267361.9,22.2,12041.1,154586.2,12.6,12305.4,80965.4,2.7,30167.1,32853.3,7.4,4434.1 -8.0,335573.5,16.7,20101.1,6008.1,0.7,8645.3,180877.7,15.8,11424.3,148036.4,12.5,11869.2,100201.4,3.3,30366.0,12601.8,2.5,5091.7 -9.0,274467.2,13.1,21001.4,187.1,0.0,9282.4,129812.1,11.5,11289.6,90364.1,10.0,9069.6,103412.4,3.2,32213.4,2097.1,0.4,5714.2 -10.0,246626.0,10.8,22787.3,0.0,0.0,0.0,84352.4,8.4,10073.5,76339.9,8.5,8941.8,89057.8,2.8,32209.2,30.1,0.0,1910.3 +1.0,235648164574.3,11828872.7,19921.4,15446420820.6,1905326.1,8107.0,59859073657.8,4222520.3,14176.1,172522666333.8,10921985.5,15795.9,36577757385.0,1213400.6,30144.8,14599547226.0,5844984.0,2497.8 +2.0,241140803199.8,12047991.5,20015.0,15752573846.5,1994613.0,7897.6,82066261681.9,4168809.1,19685.8,186804345030.4,10958997.0,17045.8,32955416319.7,1025216.7,32144.8,13892541220.2,5804971.9,2393.2 +3.0,178323885867.7,9105893.6,19583.3,12778730438.7,1620057.0,7887.8,68752039985.8,4510126.1,15243.9,130756967800.6,8295777.6,15761.9,24826044904.9,787832.6,31511.8,12771442403.3,5127523.1,2490.8 +4.0,51941541201.6,2553402.2,20342.1,3151031056.1,392073.0,8036.8,49749892872.5,4130106.4,12045.7,28925090583.7,2106380.3,13732.1,6579280519.5,218581.0,30100.0,15534412690.7,4532945.8,3427.0 +5.0,40677288594.5,2082786.3,19530.2,2897197915.0,348485.8,8313.7,48182460457.7,3888821.7,12390.0,19615969305.1,1608914.5,12192.1,6757385855.2,226861.1,29786.4,13560422068.9,3822778.9,3547.3 +6.0,34212236748.2,1790789.2,19104.6,2368829570.4,289770.5,8174.8,37913747915.4,2989568.3,12682.0,16896754076.9,1367227.1,12358.4,7237855735.1,240994.4,30033.3,7453550856.0,1809022.0,4120.2 +7.0,34063165570.0,1693676.4,20112.0,1751105764.4,215732.5,8117.0,26736187395.1,2220411.7,12041.1,15458622689.6,1256250.2,12305.4,8096538973.5,268389.7,30167.1,3285332871.7,740924.7,4434.1 +8.0,33557347102.4,1669427.0,20101.1,600805574.6,69494.9,8645.3,18087766855.8,1583274.5,11424.3,14803643416.6,1247236.2,11869.2,10020136903.8,329978.9,30366.0,1260181184.6,247498.8,5091.7 +9.0,27446719012.6,1306899.4,21001.4,18709682.5,2015.5,9282.9,12981209061.1,1149838.7,11289.6,9036414388.8,996337.3,9069.6,10341242490.4,321022.8,32213.4,209709592.0,36699.4,5714.2 +10.0,24662603933.5,1082297.0,22787.3,0.0,0.0,0.0,8435236248.0,837370.0,10073.5,7633994376.3,853738.3,8941.8,8905775130.9,276498.0,32209.2,3008985.8,1575.0,1910.5 diff --git a/cps_stage3/test_cps_benefits.py b/cps_stage3/test_cps_benefits.py index 7605e1fc..7c783b6f 100644 --- a/cps_stage3/test_cps_benefits.py +++ b/cps_stage3/test_cps_benefits.py @@ -3,6 +3,46 @@ import numpy as np from pandas.util.testing import assert_frame_equal + +''' +The tests in this script check distribution and aggregates for the benefit data. +Whenever the benefit data gets updated, the tests would create new statistics +and compare with the previous version. + +This file needs three inputs: CPS weights (cps_weights.csv.gz), CPS tax unit +database (cps.csv.gz), and CPS benefit (cps_benefits_extrap_full.csv.gz). +The first two input files are in their corresponding folders as indicated +in the code, but the third benefit file is different from the current version +in that it also includes recipient information for each tax unit. Because this +recipient per tax unit information is not used in the tax-calculator, we have +dropped those variable to save space. But the recipients is essential for +checking aggregates and distribution. So anyone who wants to run the tests needs +to recreate the full version of benefit data. + +It is relatively simple to generate the full version: + +1. find the extrapolation.py in the current folder and comment out the six +lines of code (line 310 - 315) that drop all recipients +2. Rename the output to cps_benefits_extrap_full.csv.gz +3. Run the extrapolation script with 'python extrapolation.py' + +The tests will create three new files for statistic summary: + +1. decile2015_new.csv: participation, total benefit and average benefit +by wage decile +2. aggregates_new.csv: total participation and benefits for each program +from 2014 to 2026 +3. tabs_new.csv: tabulations of tax unit participation for each program +from 2014 to 2026 + +If all three files are exactly the same as the previous version, then the tests +will pass. If the tests fail, compare the new version with the previous version +carefully and then replace the previous with the new version if the difference is +reasonable. + +''' + + programs = ['ss', 'ssi', 'medicaid', 'medicare', 'vb', 'snap'] billion = 1e09 million = 1e06