From 40982d0e191f2ff905d5bacc464553b6bb36d9ac Mon Sep 17 00:00:00 2001
From: Amy-Xu <amy.xu@aei.org>
Date: Wed, 6 Dec 2017 17:42:19 -0500
Subject: [PATCH 1/6] add tests for dist & agg

---
 cps_stage3/test_cps_benefits.py | 158 ++++++++++++++++++++++++++++++++
 1 file changed, 158 insertions(+)
 create mode 100644 cps_stage3/test_cps_benefits.py

diff --git a/cps_stage3/test_cps_benefits.py b/cps_stage3/test_cps_benefits.py
new file mode 100644
index 00000000..4573591a
--- /dev/null
+++ b/cps_stage3/test_cps_benefits.py
@@ -0,0 +1,158 @@
+import sys
+import pandas as pd
+import numpy as np
+from pandas.util.testing import assert_frame_equal
+
+programs = ['ss', 'ssi', 'medicaid', 'medicare', 'vb', 'snap']
+
+def read_files():
+    ''' import weights, benefit, and raw cps file'''
+    
+    # import from taxdata repo
+    weights = pd.read_csv('../cps_stage2/cps_weights.csv.gz', compression='gzip')
+    cps_income = pd.read_csv('../cps_data/cps.csv.gz', compression='gzip')[['e00200', 's006']]
+    cps_benefit = pd.read_csv('cps_benefits.csv')
+
+    assert len(cps_benefit) == len(weights)
+    assert len(cps_income) == len(cps_benefit)
+    
+    # merge all essential variables
+    cps = cps_benefit.join(cps_income)
+    cps = cps.join(weights)
+    
+    # rename to facilitate for loops
+    cps.rename(columns={'s006': 'WT2014'}, inplace=True)
+    
+    # create decile ranks by wage
+    cps = cps.sort_values(by='e00200')
+    cps['WT2015_cumsum'] = cps.WT2015.cumsum()
+    cps['WT2015_decile'] = np.ceil(cps.WT2015_cumsum/(max(cps.WT2015_cumsum)/9.99))
+
+    return cps
+
+def test_decile_dist():
+    
+    ''' total participation, total benefits and average benefits
+        by decile
+    '''
+    cps = read_files()
+    benefits_vars = [x + '_benefits_2015' for x in programs]
+    p_vars = [x + '_recipients_2015' for x in programs]
+    
+    
+    decile2015 = pd.DataFrame(np.linspace(1,10, num=10), columns=['2015_decile'])
+    delta = 1e06
+
+    for i in range(6):
+
+        # create weighted benefit
+        cps[benefits_vars[i] + '_weighted'] = cps[benefits_vars[i]] * cps['WT2015']
+
+        # temporary variable for weighted participation
+        cps['dummy'] = np.where(cps[p_vars[i]]!=0, cps['WT2015'], 0)
+        
+        # calculate total benefits, participation (# tax units), and average per decile
+        bp = cps[[benefits_vars[i] + '_weighted', 'dummy']].groupby(cps.WT2015_decile, as_index=False).sum()/1000000
+        bp['average'] = bp[benefits_vars[i] + '_weighted']/(bp['dummy'] + delta)
+
+        # rename and save
+        bp.columns = [programs[i]+'_benefits', programs[i]+'_taxunits', programs[i]+'_average']
+        decile2015 = pd.concat([decile2015, bp], axis=1)
+        
+        decile2015.to_csv('decile2015_new.csv', float_format='%.2f', index=False)
+
+    decile_old = pd.read_csv('decile2015.csv')
+    assert_frame_equal(decile2015.round(1), decile_old)
+
+
+def test_aggregates():
+    
+    '''total individual & taxunit participation, total benefits from 2014-2026'''
+
+    cps = read_files()
+    
+    benefits = pd.DataFrame(programs, columns=['programs'])
+    taxunits = pd.DataFrame(programs, columns=['programs'])
+    participants = pd.DataFrame(programs, columns=['programs'])
+    
+    for year in range(2014, 2025):
+        #benefits
+        benefits_vars = [x + '_benefits_' + str(year) for x in programs]
+        raw_benefits = cps.loc[:,benefits_vars]
+        weighted_benefits = raw_benefits.multiply(cps['WT' + str(year)], axis='index')
+        benefit_total = pd.DataFrame(weighted_benefits.sum()/1000000000)
+        benefits[year] = benefit_total.values
+
+        #participants
+        p_vars = [x + '_recipients_'+ str(year) for x in programs]
+        raw_participants = cps.loc[:, p_vars]
+        weighted_par = raw_participants.multiply(cps['WT' + str(year)], axis='index')
+        participant_total = pd.DataFrame(weighted_par.sum()/1000000)
+        participants[year] = participant_total.values
+
+        # tax units
+        dummy = raw_participants.astype(bool)
+        weighted_taxunits = dummy.multiply(cps['WT' + str(year)], axis='index')
+        taxunit_total = pd.DataFrame(weighted_taxunits.sum()/1000000)
+        taxunits[year] = taxunit_total.values
+
+    pd.options.display.float_format = '{:,.1f}'.format
+    with open('aggregates_new.txt', 'w') as file:
+        file.write("Total benefits (billions)\n" + benefits.to_string(index=False) + '\n\n')
+        file.write('Total participating tax units (millions)\n' + taxunits.to_string(index=False) + '\n\n')
+        file.write('Total participants (millions)\n' + participants.to_string(index=False) + '\n\n')
+
+    # import the current version
+    agg_old = pd.read_csv('aggregates.txt', delim_whitespace=True, skiprows=[0,9,18], thousands=',')
+    agg_old.columns = ['programs'] + list(range(2014, 2025))
+
+    benefits_old = agg_old.loc[0:5]
+    assert_frame_equal(benefits.round(1), benefits_old)
+
+    taxunits_old = agg_old.loc[7:12].reset_index().drop(['index'], axis=1)
+    assert_frame_equal(taxunits.round(1), taxunits_old)
+
+    participants_old = agg_old.loc[14:19].reset_index().drop(['index'], axis=1)
+    assert_frame_equal(participants.round(1), participants_old)
+
+
+def test_tabs():
+    
+    ''' tabulation of number of participants per tax unit from 2014 to 2026'''
+    
+    tabs = {}
+    cps = read_files()
+    
+    # inline function to create single year program tabulation
+    p_tab = lambda program: cps[program].value_counts()
+
+    for program in programs:
+        program_tab = {}
+        for year in range(2014, 2025): 
+            program_tab[year] = p_tab(program+"_recipients_"+str(year))
+            program_tab = pd.DataFrame(program_tab)
+        
+        tabs[program] = program_tab
+
+    with open('tabs_new.txt', 'w') as file:
+        for key, dfs in tabs.iteritems():
+            file.write(key + '\n')
+            file.write(dfs.to_string() + '\n\n')
+
+    tabs_old = pd.read_csv('tabs.txt', delim_whitespace=True,
+                           names=['index'] + list(range(2014, 2025)))
+    tabs_old = tabs_old[tabs_old['index']!='2014']
+
+    for program in programs:
+    
+        unitmax = len(tabs[program])
+        start_row = (tabs_old.index[tabs_old['index']==program] + 1).values[0]
+        end_row = start_row + unitmax
+    
+        participation_new = tabs_old.loc[start_row: end_row]
+        participation_new = participation_new.reset_index().drop(['level_0'], axis=1)
+    
+        assert_frame_equal(participation_new.astype(int), tabs[program].reset_index(),
+                           check_column_type=False, check_index_type=False)
+
+

From bfb66730a004e52c021a3f1bd8872bf3a4fe17ab Mon Sep 17 00:00:00 2001
From: Amy-Xu <amy.xu@aei.org>
Date: Thu, 7 Dec 2017 13:50:57 -0500
Subject: [PATCH 2/6] modified for the updated benefit file

---
 cps_stage3/test_cps_benefits.py | 37 ++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/cps_stage3/test_cps_benefits.py b/cps_stage3/test_cps_benefits.py
index 4573591a..adb45ca2 100644
--- a/cps_stage3/test_cps_benefits.py
+++ b/cps_stage3/test_cps_benefits.py
@@ -4,20 +4,25 @@
 from pandas.util.testing import assert_frame_equal
 
 programs = ['ss', 'ssi', 'medicaid', 'medicare', 'vb', 'snap']
+billion = 10e9
+million = 10e6
 
 def read_files():
     ''' import weights, benefit, and raw cps file'''
     
     # import from taxdata repo
+    # weights and wage are for 10-year and decile tables
     weights = pd.read_csv('../cps_stage2/cps_weights.csv.gz', compression='gzip')
-    cps_income = pd.read_csv('../cps_data/cps.csv.gz', compression='gzip')[['e00200', 's006']]
-    cps_benefit = pd.read_csv('cps_benefits.csv')
+    cps_income = pd.read_csv('../cps_data/cps.csv.gz',
+                             compression='gzip')[['e00200', 's006', 'RECID']]
+    # the benefit file that includes both benefits and recipients
+    cps_benefit = pd.read_csv('cps_benefits_extrap_full.csv.gz')
 
-    assert len(cps_benefit) == len(weights)
-    assert len(cps_income) == len(cps_benefit)
+    assert len(cps_income) == len(weights)
     
     # merge all essential variables
-    cps = cps_benefit.join(cps_income)
+    cps = cps_income.merge(cps_benefit, on='RECID', how='left')
+    cps.fillna(0, inplace=True)
     cps = cps.join(weights)
     
     # rename to facilitate for loops
@@ -52,14 +57,15 @@ def test_decile_dist():
         cps['dummy'] = np.where(cps[p_vars[i]]!=0, cps['WT2015'], 0)
         
         # calculate total benefits, participation (# tax units), and average per decile
-        bp = cps[[benefits_vars[i] + '_weighted', 'dummy']].groupby(cps.WT2015_decile, as_index=False).sum()/1000000
+        bp = cps[[benefits_vars[i] + '_weighted', 'dummy']].groupby(cps.WT2015_decile,
+                                                                    as_index=False).sum()/million
         bp['average'] = bp[benefits_vars[i] + '_weighted']/(bp['dummy'] + delta)
 
         # rename and save
         bp.columns = [programs[i]+'_benefits', programs[i]+'_taxunits', programs[i]+'_average']
         decile2015 = pd.concat([decile2015, bp], axis=1)
         
-        decile2015.to_csv('decile2015_new.csv', float_format='%.2f', index=False)
+        decile2015.to_csv('decile2015_new.csv', float_format='%.1f', index=False)
 
     decile_old = pd.read_csv('decile2015.csv')
     assert_frame_equal(decile2015.round(1), decile_old)
@@ -80,20 +86,20 @@ def test_aggregates():
         benefits_vars = [x + '_benefits_' + str(year) for x in programs]
         raw_benefits = cps.loc[:,benefits_vars]
         weighted_benefits = raw_benefits.multiply(cps['WT' + str(year)], axis='index')
-        benefit_total = pd.DataFrame(weighted_benefits.sum()/1000000000)
+        benefit_total = pd.DataFrame(weighted_benefits.sum()/billion)
         benefits[year] = benefit_total.values
 
         #participants
         p_vars = [x + '_recipients_'+ str(year) for x in programs]
         raw_participants = cps.loc[:, p_vars]
         weighted_par = raw_participants.multiply(cps['WT' + str(year)], axis='index')
-        participant_total = pd.DataFrame(weighted_par.sum()/1000000)
+        participant_total = pd.DataFrame(weighted_par.sum()/million)
         participants[year] = participant_total.values
 
         # tax units
         dummy = raw_participants.astype(bool)
         weighted_taxunits = dummy.multiply(cps['WT' + str(year)], axis='index')
-        taxunit_total = pd.DataFrame(weighted_taxunits.sum()/1000000)
+        taxunit_total = pd.DataFrame(weighted_taxunits.sum()/million)
         taxunits[year] = taxunit_total.values
 
     pd.options.display.float_format = '{:,.1f}'.format
@@ -131,8 +137,8 @@ def test_tabs():
         for year in range(2014, 2025): 
             program_tab[year] = p_tab(program+"_recipients_"+str(year))
             program_tab = pd.DataFrame(program_tab)
-        
-        tabs[program] = program_tab
+            program_tab.fillna(0, inplace=True)
+        tabs[program] = program_tab.astype(int)
 
     with open('tabs_new.txt', 'w') as file:
         for key, dfs in tabs.iteritems():
@@ -149,10 +155,11 @@ def test_tabs():
         start_row = (tabs_old.index[tabs_old['index']==program] + 1).values[0]
         end_row = start_row + unitmax
     
-        participation_new = tabs_old.loc[start_row: end_row]
-        participation_new = participation_new.reset_index().drop(['level_0'], axis=1)
+        participation_old = tabs_old.loc[start_row: end_row]
+        participation_old = participation_old.reset_index().drop(['level_0'], axis=1)
     
-        assert_frame_equal(participation_new.astype(int), tabs[program].reset_index(),
+        assert_frame_equal(participation_old.astype(float),
+                           tabs[program].reset_index().astype(float),
                            check_column_type=False, check_index_type=False)
 
 

From 535f478fc952464d3fde4627e92facaff7c352a1 Mon Sep 17 00:00:00 2001
From: Amy-Xu <amy.xu@aei.org>
Date: Thu, 7 Dec 2017 13:51:25 -0500
Subject: [PATCH 3/6] summary files

---
 cps_stage3/aggregates.txt | 27 ++++++++++++++++
 cps_stage3/decile2015.csv | 11 +++++++
 cps_stage3/tabs.txt       | 68 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 106 insertions(+)
 create mode 100644 cps_stage3/aggregates.txt
 create mode 100644 cps_stage3/decile2015.csv
 create mode 100644 cps_stage3/tabs.txt

diff --git a/cps_stage3/aggregates.txt b/cps_stage3/aggregates.txt
new file mode 100644
index 00000000..c5d7289f
--- /dev/null
+++ b/cps_stage3/aggregates.txt
@@ -0,0 +1,27 @@
+Total benefits (billions)
+programs  2014    2015    2016    2017     2018     2019     2020     2021     2022     2023     2024
+      ss  84.9 9,016.7 9,354.7 9,766.6 10,490.9 11,239.9 12,036.5 12,830.5 13,719.6 14,680.9 15,703.3
+     ssi   5.4   547.7   549.1   552.6    570.3    589.1    608.7    628.4    647.8    669.2    690.8
+medicaid  36.9 4,127.6 3,840.9 3,806.6  3,923.1  3,916.4  3,916.8  3,912.4  3,910.5  3,909.1  3,910.2
+medicare  57.6 6,024.5 6,298.8 6,679.0  7,183.1  7,779.4  8,416.6  9,071.1  9,785.5 10,562.1 11,357.7
+      vb  14.7 1,523.0 1,582.1 1,589.9  1,598.3  1,607.1  1,616.7  1,626.8  1,637.7  1,649.3  1,661.6
+    snap   8.3   825.7   789.5   789.5    789.5    789.5    789.5    789.5    789.5    789.5    789.5
+
+Total participating tax units (millions)
+programs  2014  2015  2016  2017  2018  2019  2020  2021  2022  2023  2024
+      ss   4.5 451.6 458.1 464.6 471.3 478.1 484.9 491.5 498.9 506.1 513.4
+     ssi   0.7  68.4  68.5  68.5  68.6  69.6  69.3  70.3  70.0  70.9  71.0
+medicaid   2.8 297.0 308.2 312.4 316.6 325.0 329.4 333.8 338.3 342.9 347.5
+medicare   3.9 396.1 409.8 421.4 433.4 446.2 461.0 476.0 491.1 505.4 519.3
+      vb   0.5  49.1  45.8  45.8  45.9  46.0  46.1  46.1  46.2  46.3  46.3
+    snap   2.9 279.7 267.7 266.9 265.6 264.4 263.3 262.1 260.9 259.5 258.1
+
+Total participants (millions)
+programs  2014  2015  2016  2017  2018  2019  2020  2021  2022  2023  2024
+      ss   5.7 577.7 593.8 609.1 624.6 639.9 655.4 664.2 690.1 709.4 728.7
+     ssi   0.8  75.5  75.4  75.2  75.2  76.2  76.0  77.0  76.7  77.7  77.8
+medicaid   5.2 563.6 581.4 589.2 597.2 611.9 620.2 628.6 637.0 645.6 654.4
+medicare   5.0 506.6 523.5 538.5 554.1 570.3 587.2 604.3 621.6 638.5 654.9
+      vb   0.5  49.9  46.4  46.5  46.6  46.7  46.7  46.8  46.9  46.9  47.0
+    snap   4.3 421.9 407.6 407.6 407.6 407.6 407.6 407.6 407.6 407.6 407.6
+
diff --git a/cps_stage3/decile2015.csv b/cps_stage3/decile2015.csv
new file mode 100644
index 00000000..cb14f8da
--- /dev/null
+++ b/cps_stage3/decile2015.csv
@@ -0,0 +1,11 @@
+2015_decile,ss_benefits,ss_taxunits,ss_average,ssi_benefits,ssi_taxunits,ssi_average,medicaid_benefits,medicaid_taxunits,medicaid_average,medicare_benefits,medicare_taxunits,medicare_average,vb_benefits,vb_taxunits,vb_average,snap_benefits,snap_taxunits,snap_average
+1.0,2356481.6,118.3,2.4,154464.2,19.1,0.2,598590.7,42.2,0.6,1725226.7,109.2,1.7,365777.6,12.1,0.4,145995.5,58.4,0.1
+2.0,2411408.0,120.5,2.4,157525.7,19.9,0.2,820662.6,41.7,0.8,1868043.5,109.6,1.9,329554.2,10.3,0.3,138925.4,58.0,0.1
+3.0,1783238.9,91.1,1.8,127787.3,16.2,0.1,687520.4,45.1,0.7,1307569.7,83.0,1.3,248260.4,7.9,0.2,127714.4,51.3,0.1
+4.0,519415.4,25.5,0.5,31510.3,3.9,0.0,497498.9,41.3,0.5,289250.9,21.1,0.3,65792.8,2.2,0.1,155344.1,45.3,0.2
+5.0,406772.9,20.8,0.4,28972.0,3.5,0.0,481824.6,38.9,0.5,196159.7,16.1,0.2,67573.9,2.3,0.1,135604.2,38.2,0.1
+6.0,342122.4,17.9,0.3,23688.3,2.9,0.0,379137.5,29.9,0.4,168967.5,13.7,0.2,72378.6,2.4,0.1,74535.5,18.1,0.1
+7.0,340631.7,16.9,0.3,17511.1,2.2,0.0,267361.9,22.2,0.3,154586.2,12.6,0.2,80965.4,2.7,0.1,32853.3,7.4,0.0
+8.0,335573.5,16.7,0.3,6008.1,0.7,0.0,180877.7,15.8,0.2,148036.4,12.5,0.1,100201.4,3.3,0.1,12601.8,2.5,0.0
+9.0,274467.2,13.1,0.3,187.1,0.0,0.0,129812.1,11.5,0.1,90364.1,10.0,0.1,103412.4,3.2,0.1,2097.1,0.4,0.0
+10.0,246626.0,10.8,0.2,0.0,0.0,0.0,84352.4,8.4,0.1,76339.9,8.5,0.1,89057.8,2.8,0.1,30.1,0.0,0.0
diff --git a/cps_stage3/tabs.txt b/cps_stage3/tabs.txt
new file mode 100644
index 00000000..4a473f83
--- /dev/null
+++ b/cps_stage3/tabs.txt
@@ -0,0 +1,68 @@
+vb
+       2014    2015    2016    2017    2018    2019    2020    2021    2022    2023    2024
+0.0  440860  440860  441980  442081  442213  442327  442487  442570  442685  442755  442864
+1.0   15394   15394   14322   14222   14091   13978   13822   13741   13627   13557   13450
+2.0     210     210     162     161     160     159     156     154     153     153     151
+3.0       1       1       1       1       1       1       0       0       0       0       0
+
+ss
+       2014    2015    2016    2017    2018    2019    2020    2021    2022    2023    2024
+0.0  328317  326525  326111  325606  325403  325331  325242  325242  324970  324803  324728
+1.0   83781   84533   84328   84234   84211   84192   84185   84185   84139   84113   84097
+2.0   44108   44607   44462   44418   44394   44391   44389   44389   44366   44361   44340
+3.0     258     473     472     474     475     474     473     473     473     473     473
+4.0       1      68      68      68      69      69      69      69      66      65      65
+
+medicaid
+        2014    2015    2016    2017    2018    2019    2020    2021    2022    2023    2024
+0.0   392005  388485  387573  387573  387573  386957  386957  386957  386957  386957  386957
+1.0    32353   34347   34984   34984   34984   35390   35390   35390   35390   35390   35390
+2.0    16027   16313   16503   16503   16503   16602   16602   16602   16602   16602   16602
+3.0     7810    8183    8240    8240    8240    8271    8271    8271    8271    8271    8271
+4.0     4910    5337    5358    5358    5358    5408    5408    5408    5408    5408    5408
+5.0     2085    2346    2352    2352    2352    2371    2371    2371    2371    2371    2371
+6.0      775     871     872     872     872     877     877     877     877     877     877
+7.0      351     419     419     419     419     425     425     425     425     425     425
+8.0       86      97      97      97      97      96      96      96      96      96      96
+9.0       30      33      33      33      33      34      34      34      34      34      34
+11.0      20      21      21      21      21      21      21      21      21      21      21
+10.0      10       9       9       9       9       9       9       9       9       9       9
+12.0       2       3       3       3       3       3       3       3       3       3       3
+14.0       1       1       1       1       1       1       1       1       1       1       1
+
+medicare
+       2014    2015    2016    2017    2018    2019    2020    2021    2022    2023    2024
+0.0  344758  342573  339832  337965  336186  334385  332111  329739  327392  325338  323332
+1.0   71106   73109   75094   76324   77319   78511   80682   82909   85152   87003   88823
+2.0   40148   40310   41060   41693   42471   43073   43172   43311   43412   43605   43784
+3.0     349     369     359     363     369     376     380     386     388     395     402
+4.0      49      49      65      64      64      64      64      64      65      67      67
+7.0      31      31      31      31      31      31      31      31      31      32      32
+5.0      16      16      16      17      17      16      16      16      16      16      16
+6.0       7       7       7       7       7       8       8       8       8       8       8
+8.0       1       1       1       1       1       1       1       1       1       1       1
+
+snap
+        2014    2015    2016    2017    2018    2019    2020    2021    2022    2023    2024
+0.0   408953  410079  412791  413483  414187  414915  415626  416229  416892  417594  418304
+1.0    34020   33272   31160   30597   29967   29322   28736   28205   27631   26976   26322
+2.0     6932    6675    6251    6168    6113    6054    5952    5903    5853    5830    5796
+3.0     2811    2719    2608    2575    2563    2550    2534    2518    2503    2493    2477
+4.0     1919    1901    1859    1852    1848    1840    1836    1830    1810    1802    1798
+5.0     1083    1076    1056    1051    1050    1047    1044    1044    1042    1039    1037
+6.0      479     475     472     472     471     471     471     470     469     467     467
+7.0      175     175     175     174     174     174     174     174     173     172     172
+8.0       58      58      58      58      57      57      57      57      57      57      57
+9.0       26      26      26      26      26      26      26      26      26      26      26
+11.0       5       5       5       5       5       5       5       5       5       5       5
+10.0       3       3       3       3       3       3       3       3       3       3       3
+12.0       1       1       1       1       1       1       1       1       1       1       1
+
+ssi
+       2014    2015    2016    2017    2018    2019    2020    2021    2022    2023    2024
+0.0  443520  443520  443771  443995  444219  444219  444463  444463  444705  444705  444851
+1.0   11643   11643   11443   11262   11080   11080   10876   10876   10646   10646   10507
+2.0    1206    1206    1164    1134    1099    1099    1059    1059    1048    1048    1042
+3.0      89      89      81      69      62      62      62      62      61      61      60
+4.0       7       7       6       5       5       5       5       5       5       5       5
+

From 441b9f49f00466da31fcf480d98d3b0d7de70c63 Mon Sep 17 00:00:00 2001
From: Amy-Xu <amy.xu@aei.org>
Date: Thu, 7 Dec 2017 14:02:20 -0500
Subject: [PATCH 4/6] fixed delta and updated decile table

---
 cps_stage3/decile2015.csv       | 20 ++++++++++----------
 cps_stage3/test_cps_benefits.py |  9 +++------
 2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/cps_stage3/decile2015.csv b/cps_stage3/decile2015.csv
index cb14f8da..9b827f26 100644
--- a/cps_stage3/decile2015.csv
+++ b/cps_stage3/decile2015.csv
@@ -1,11 +1,11 @@
 2015_decile,ss_benefits,ss_taxunits,ss_average,ssi_benefits,ssi_taxunits,ssi_average,medicaid_benefits,medicaid_taxunits,medicaid_average,medicare_benefits,medicare_taxunits,medicare_average,vb_benefits,vb_taxunits,vb_average,snap_benefits,snap_taxunits,snap_average
-1.0,2356481.6,118.3,2.4,154464.2,19.1,0.2,598590.7,42.2,0.6,1725226.7,109.2,1.7,365777.6,12.1,0.4,145995.5,58.4,0.1
-2.0,2411408.0,120.5,2.4,157525.7,19.9,0.2,820662.6,41.7,0.8,1868043.5,109.6,1.9,329554.2,10.3,0.3,138925.4,58.0,0.1
-3.0,1783238.9,91.1,1.8,127787.3,16.2,0.1,687520.4,45.1,0.7,1307569.7,83.0,1.3,248260.4,7.9,0.2,127714.4,51.3,0.1
-4.0,519415.4,25.5,0.5,31510.3,3.9,0.0,497498.9,41.3,0.5,289250.9,21.1,0.3,65792.8,2.2,0.1,155344.1,45.3,0.2
-5.0,406772.9,20.8,0.4,28972.0,3.5,0.0,481824.6,38.9,0.5,196159.7,16.1,0.2,67573.9,2.3,0.1,135604.2,38.2,0.1
-6.0,342122.4,17.9,0.3,23688.3,2.9,0.0,379137.5,29.9,0.4,168967.5,13.7,0.2,72378.6,2.4,0.1,74535.5,18.1,0.1
-7.0,340631.7,16.9,0.3,17511.1,2.2,0.0,267361.9,22.2,0.3,154586.2,12.6,0.2,80965.4,2.7,0.1,32853.3,7.4,0.0
-8.0,335573.5,16.7,0.3,6008.1,0.7,0.0,180877.7,15.8,0.2,148036.4,12.5,0.1,100201.4,3.3,0.1,12601.8,2.5,0.0
-9.0,274467.2,13.1,0.3,187.1,0.0,0.0,129812.1,11.5,0.1,90364.1,10.0,0.1,103412.4,3.2,0.1,2097.1,0.4,0.0
-10.0,246626.0,10.8,0.2,0.0,0.0,0.0,84352.4,8.4,0.1,76339.9,8.5,0.1,89057.8,2.8,0.1,30.1,0.0,0.0
+1.0,2356481.6,118.3,19921.4,154464.2,19.1,8107.0,598590.7,42.2,14176.1,1725226.7,109.2,15795.9,365777.6,12.1,30144.8,145995.5,58.4,2497.8
+2.0,2411408.0,120.5,20015.0,157525.7,19.9,7897.6,820662.6,41.7,19685.8,1868043.5,109.6,17045.8,329554.2,10.3,32144.8,138925.4,58.0,2393.2
+3.0,1783238.9,91.1,19583.3,127787.3,16.2,7887.8,687520.4,45.1,15243.9,1307569.7,83.0,15761.9,248260.4,7.9,31511.8,127714.4,51.3,2490.8
+4.0,519415.4,25.5,20342.1,31510.3,3.9,8036.8,497498.9,41.3,12045.7,289250.9,21.1,13732.1,65792.8,2.2,30099.9,155344.1,45.3,3427.0
+5.0,406772.9,20.8,19530.2,28972.0,3.5,8313.7,481824.6,38.9,12390.0,196159.7,16.1,12192.1,67573.9,2.3,29786.4,135604.2,38.2,3547.3
+6.0,342122.4,17.9,19104.6,23688.3,2.9,8174.8,379137.5,29.9,12682.0,168967.5,13.7,12358.4,72378.6,2.4,30033.3,74535.5,18.1,4120.2
+7.0,340631.7,16.9,20112.0,17511.1,2.2,8117.0,267361.9,22.2,12041.1,154586.2,12.6,12305.4,80965.4,2.7,30167.1,32853.3,7.4,4434.1
+8.0,335573.5,16.7,20101.1,6008.1,0.7,8645.3,180877.7,15.8,11424.3,148036.4,12.5,11869.2,100201.4,3.3,30366.0,12601.8,2.5,5091.7
+9.0,274467.2,13.1,21001.4,187.1,0.0,9282.4,129812.1,11.5,11289.6,90364.1,10.0,9069.6,103412.4,3.2,32213.4,2097.1,0.4,5714.2
+10.0,246626.0,10.8,22787.3,0.0,0.0,0.0,84352.4,8.4,10073.5,76339.9,8.5,8941.8,89057.8,2.8,32209.2,30.1,0.0,1910.3
diff --git a/cps_stage3/test_cps_benefits.py b/cps_stage3/test_cps_benefits.py
index adb45ca2..c199c730 100644
--- a/cps_stage3/test_cps_benefits.py
+++ b/cps_stage3/test_cps_benefits.py
@@ -6,6 +6,7 @@
 programs = ['ss', 'ssi', 'medicaid', 'medicare', 'vb', 'snap']
 billion = 10e9
 million = 10e6
+delta = 1e-06
 
 def read_files():
     ''' import weights, benefit, and raw cps file'''
@@ -35,18 +36,17 @@ def read_files():
 
     return cps
 
+cps = read_files()
+
 def test_decile_dist():
     
     ''' total participation, total benefits and average benefits
         by decile
     '''
-    cps = read_files()
     benefits_vars = [x + '_benefits_2015' for x in programs]
     p_vars = [x + '_recipients_2015' for x in programs]
     
-    
     decile2015 = pd.DataFrame(np.linspace(1,10, num=10), columns=['2015_decile'])
-    delta = 1e06
 
     for i in range(6):
 
@@ -74,8 +74,6 @@ def test_decile_dist():
 def test_aggregates():
     
     '''total individual & taxunit participation, total benefits from 2014-2026'''
-
-    cps = read_files()
     
     benefits = pd.DataFrame(programs, columns=['programs'])
     taxunits = pd.DataFrame(programs, columns=['programs'])
@@ -127,7 +125,6 @@ def test_tabs():
     ''' tabulation of number of participants per tax unit from 2014 to 2026'''
     
     tabs = {}
-    cps = read_files()
     
     # inline function to create single year program tabulation
     p_tab = lambda program: cps[program].value_counts()

From ebc9478c495b77c59ea0f400b2ae9a017d05178b Mon Sep 17 00:00:00 2001
From: Amy-Xu <amy.xu@aei.org>
Date: Thu, 7 Dec 2017 15:08:44 -0500
Subject: [PATCH 5/6] fixed global typo & divide weight by 100

---
 cps_stage3/aggregates.txt       | 38 ++++++++++++++++-----------------
 cps_stage3/test_cps_benefits.py | 12 ++++++-----
 2 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/cps_stage3/aggregates.txt b/cps_stage3/aggregates.txt
index c5d7289f..30e4a7a0 100644
--- a/cps_stage3/aggregates.txt
+++ b/cps_stage3/aggregates.txt
@@ -1,27 +1,27 @@
 Total benefits (billions)
-programs  2014    2015    2016    2017     2018     2019     2020     2021     2022     2023     2024
-      ss  84.9 9,016.7 9,354.7 9,766.6 10,490.9 11,239.9 12,036.5 12,830.5 13,719.6 14,680.9 15,703.3
-     ssi   5.4   547.7   549.1   552.6    570.3    589.1    608.7    628.4    647.8    669.2    690.8
-medicaid  36.9 4,127.6 3,840.9 3,806.6  3,923.1  3,916.4  3,916.8  3,912.4  3,910.5  3,909.1  3,910.2
-medicare  57.6 6,024.5 6,298.8 6,679.0  7,183.1  7,779.4  8,416.6  9,071.1  9,785.5 10,562.1 11,357.7
-      vb  14.7 1,523.0 1,582.1 1,589.9  1,598.3  1,607.1  1,616.7  1,626.8  1,637.7  1,649.3  1,661.6
-    snap   8.3   825.7   789.5   789.5    789.5    789.5    789.5    789.5    789.5    789.5    789.5
+programs  2014  2015  2016  2017    2018    2019    2020    2021    2022    2023    2024
+      ss 849.2 901.7 935.5 976.7 1,049.1 1,124.0 1,203.6 1,283.0 1,372.0 1,468.1 1,570.3
+     ssi  54.1  54.8  54.9  55.3    57.0    58.9    60.9    62.8    64.8    66.9    69.1
+medicaid 368.6 412.8 384.1 380.7   392.3   391.6   391.7   391.2   391.1   390.9   391.0
+medicare 576.1 602.5 629.9 667.9   718.3   777.9   841.7   907.1   978.5 1,056.2 1,135.8
+      vb 146.8 152.3 158.2 159.0   159.8   160.7   161.7   162.7   163.8   164.9   166.2
+    snap  83.0  82.6  79.0  79.0    79.0    79.0    79.0    79.0    79.0    79.0    79.0
 
 Total participating tax units (millions)
 programs  2014  2015  2016  2017  2018  2019  2020  2021  2022  2023  2024
-      ss   4.5 451.6 458.1 464.6 471.3 478.1 484.9 491.5 498.9 506.1 513.4
-     ssi   0.7  68.4  68.5  68.5  68.6  69.6  69.3  70.3  70.0  70.9  71.0
-medicaid   2.8 297.0 308.2 312.4 316.6 325.0 329.4 333.8 338.3 342.9 347.5
-medicare   3.9 396.1 409.8 421.4 433.4 446.2 461.0 476.0 491.1 505.4 519.3
-      vb   0.5  49.1  45.8  45.8  45.9  46.0  46.1  46.1  46.2  46.3  46.3
-    snap   2.9 279.7 267.7 266.9 265.6 264.4 263.3 262.1 260.9 259.5 258.1
+      ss  44.6  45.2  45.8  46.5  47.1  47.8  48.5  49.1  49.9  50.6  51.3
+     ssi   6.8   6.8   6.8   6.9   6.9   7.0   6.9   7.0   7.0   7.1   7.1
+medicaid  27.9  29.7  30.8  31.2  31.7  32.5  32.9  33.4  33.8  34.3  34.8
+medicare  38.6  39.6  41.0  42.1  43.3  44.6  46.1  47.6  49.1  50.5  51.9
+      vb   4.9   4.9   4.6   4.6   4.6   4.6   4.6   4.6   4.6   4.6   4.6
+    snap  28.5  28.0  26.8  26.7  26.6  26.4  26.3  26.2  26.1  26.0  25.8
 
 Total participants (millions)
 programs  2014  2015  2016  2017  2018  2019  2020  2021  2022  2023  2024
-      ss   5.7 577.7 593.8 609.1 624.6 639.9 655.4 664.2 690.1 709.4 728.7
-     ssi   0.8  75.5  75.4  75.2  75.2  76.2  76.0  77.0  76.7  77.7  77.8
-medicaid   5.2 563.6 581.4 589.2 597.2 611.9 620.2 628.6 637.0 645.6 654.4
-medicare   5.0 506.6 523.5 538.5 554.1 570.3 587.2 604.3 621.6 638.5 654.9
-      vb   0.5  49.9  46.4  46.5  46.6  46.7  46.7  46.8  46.9  46.9  47.0
-    snap   4.3 421.9 407.6 407.6 407.6 407.6 407.6 407.6 407.6 407.6 407.6
+      ss  56.5  57.8  59.4  60.9  62.5  64.0  65.5  66.4  69.0  70.9  72.9
+     ssi   7.6   7.6   7.5   7.5   7.5   7.6   7.6   7.7   7.7   7.8   7.8
+medicaid  52.4  56.4  58.1  58.9  59.7  61.2  62.0  62.9  63.7  64.6  65.4
+medicare  49.6  50.7  52.4  53.8  55.4  57.0  58.7  60.4  62.2  63.8  65.5
+      vb   5.0   5.0   4.6   4.7   4.7   4.7   4.7   4.7   4.7   4.7   4.7
+    snap  43.0  42.2  40.8  40.8  40.8  40.8  40.8  40.8  40.8  40.8  40.8
 
diff --git a/cps_stage3/test_cps_benefits.py b/cps_stage3/test_cps_benefits.py
index c199c730..7605e1fc 100644
--- a/cps_stage3/test_cps_benefits.py
+++ b/cps_stage3/test_cps_benefits.py
@@ -4,8 +4,8 @@
 from pandas.util.testing import assert_frame_equal
 
 programs = ['ss', 'ssi', 'medicaid', 'medicare', 'vb', 'snap']
-billion = 10e9
-million = 10e6
+billion = 1e09
+million = 1e06
 delta = 1e-06
 
 def read_files():
@@ -24,7 +24,7 @@ def read_files():
     # merge all essential variables
     cps = cps_income.merge(cps_benefit, on='RECID', how='left')
     cps.fillna(0, inplace=True)
-    cps = cps.join(weights)
+    cps = cps.join(weights/100)
     
     # rename to facilitate for loops
     cps.rename(columns={'s006': 'WT2014'}, inplace=True)
@@ -57,8 +57,10 @@ def test_decile_dist():
         cps['dummy'] = np.where(cps[p_vars[i]]!=0, cps['WT2015'], 0)
         
         # calculate total benefits, participation (# tax units), and average per decile
-        bp = cps[[benefits_vars[i] + '_weighted', 'dummy']].groupby(cps.WT2015_decile,
-                                                                    as_index=False).sum()/million
+        variables = [benefits_vars[i] + '_weighted', 'dummy']
+        bp = cps[variables].groupby(cps.WT2015_decile, as_index=False).sum()
+        
+        
         bp['average'] = bp[benefits_vars[i] + '_weighted']/(bp['dummy'] + delta)
 
         # rename and save

From 9e7949f5573bb3b7c259a09c526e2971fd75bcc0 Mon Sep 17 00:00:00 2001
From: Amy-Xu <amy.xu@aei.org>
Date: Thu, 7 Dec 2017 16:30:41 -0500
Subject: [PATCH 6/6] added docs

---
 cps_stage3/decile2015.csv       | 20 ++++++++---------
 cps_stage3/test_cps_benefits.py | 40 +++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 10 deletions(-)

diff --git a/cps_stage3/decile2015.csv b/cps_stage3/decile2015.csv
index 9b827f26..6d10e0bd 100644
--- a/cps_stage3/decile2015.csv
+++ b/cps_stage3/decile2015.csv
@@ -1,11 +1,11 @@
 2015_decile,ss_benefits,ss_taxunits,ss_average,ssi_benefits,ssi_taxunits,ssi_average,medicaid_benefits,medicaid_taxunits,medicaid_average,medicare_benefits,medicare_taxunits,medicare_average,vb_benefits,vb_taxunits,vb_average,snap_benefits,snap_taxunits,snap_average
-1.0,2356481.6,118.3,19921.4,154464.2,19.1,8107.0,598590.7,42.2,14176.1,1725226.7,109.2,15795.9,365777.6,12.1,30144.8,145995.5,58.4,2497.8
-2.0,2411408.0,120.5,20015.0,157525.7,19.9,7897.6,820662.6,41.7,19685.8,1868043.5,109.6,17045.8,329554.2,10.3,32144.8,138925.4,58.0,2393.2
-3.0,1783238.9,91.1,19583.3,127787.3,16.2,7887.8,687520.4,45.1,15243.9,1307569.7,83.0,15761.9,248260.4,7.9,31511.8,127714.4,51.3,2490.8
-4.0,519415.4,25.5,20342.1,31510.3,3.9,8036.8,497498.9,41.3,12045.7,289250.9,21.1,13732.1,65792.8,2.2,30099.9,155344.1,45.3,3427.0
-5.0,406772.9,20.8,19530.2,28972.0,3.5,8313.7,481824.6,38.9,12390.0,196159.7,16.1,12192.1,67573.9,2.3,29786.4,135604.2,38.2,3547.3
-6.0,342122.4,17.9,19104.6,23688.3,2.9,8174.8,379137.5,29.9,12682.0,168967.5,13.7,12358.4,72378.6,2.4,30033.3,74535.5,18.1,4120.2
-7.0,340631.7,16.9,20112.0,17511.1,2.2,8117.0,267361.9,22.2,12041.1,154586.2,12.6,12305.4,80965.4,2.7,30167.1,32853.3,7.4,4434.1
-8.0,335573.5,16.7,20101.1,6008.1,0.7,8645.3,180877.7,15.8,11424.3,148036.4,12.5,11869.2,100201.4,3.3,30366.0,12601.8,2.5,5091.7
-9.0,274467.2,13.1,21001.4,187.1,0.0,9282.4,129812.1,11.5,11289.6,90364.1,10.0,9069.6,103412.4,3.2,32213.4,2097.1,0.4,5714.2
-10.0,246626.0,10.8,22787.3,0.0,0.0,0.0,84352.4,8.4,10073.5,76339.9,8.5,8941.8,89057.8,2.8,32209.2,30.1,0.0,1910.3
+1.0,235648164574.3,11828872.7,19921.4,15446420820.6,1905326.1,8107.0,59859073657.8,4222520.3,14176.1,172522666333.8,10921985.5,15795.9,36577757385.0,1213400.6,30144.8,14599547226.0,5844984.0,2497.8
+2.0,241140803199.8,12047991.5,20015.0,15752573846.5,1994613.0,7897.6,82066261681.9,4168809.1,19685.8,186804345030.4,10958997.0,17045.8,32955416319.7,1025216.7,32144.8,13892541220.2,5804971.9,2393.2
+3.0,178323885867.7,9105893.6,19583.3,12778730438.7,1620057.0,7887.8,68752039985.8,4510126.1,15243.9,130756967800.6,8295777.6,15761.9,24826044904.9,787832.6,31511.8,12771442403.3,5127523.1,2490.8
+4.0,51941541201.6,2553402.2,20342.1,3151031056.1,392073.0,8036.8,49749892872.5,4130106.4,12045.7,28925090583.7,2106380.3,13732.1,6579280519.5,218581.0,30100.0,15534412690.7,4532945.8,3427.0
+5.0,40677288594.5,2082786.3,19530.2,2897197915.0,348485.8,8313.7,48182460457.7,3888821.7,12390.0,19615969305.1,1608914.5,12192.1,6757385855.2,226861.1,29786.4,13560422068.9,3822778.9,3547.3
+6.0,34212236748.2,1790789.2,19104.6,2368829570.4,289770.5,8174.8,37913747915.4,2989568.3,12682.0,16896754076.9,1367227.1,12358.4,7237855735.1,240994.4,30033.3,7453550856.0,1809022.0,4120.2
+7.0,34063165570.0,1693676.4,20112.0,1751105764.4,215732.5,8117.0,26736187395.1,2220411.7,12041.1,15458622689.6,1256250.2,12305.4,8096538973.5,268389.7,30167.1,3285332871.7,740924.7,4434.1
+8.0,33557347102.4,1669427.0,20101.1,600805574.6,69494.9,8645.3,18087766855.8,1583274.5,11424.3,14803643416.6,1247236.2,11869.2,10020136903.8,329978.9,30366.0,1260181184.6,247498.8,5091.7
+9.0,27446719012.6,1306899.4,21001.4,18709682.5,2015.5,9282.9,12981209061.1,1149838.7,11289.6,9036414388.8,996337.3,9069.6,10341242490.4,321022.8,32213.4,209709592.0,36699.4,5714.2
+10.0,24662603933.5,1082297.0,22787.3,0.0,0.0,0.0,8435236248.0,837370.0,10073.5,7633994376.3,853738.3,8941.8,8905775130.9,276498.0,32209.2,3008985.8,1575.0,1910.5
diff --git a/cps_stage3/test_cps_benefits.py b/cps_stage3/test_cps_benefits.py
index 7605e1fc..7c783b6f 100644
--- a/cps_stage3/test_cps_benefits.py
+++ b/cps_stage3/test_cps_benefits.py
@@ -3,6 +3,46 @@
 import numpy as np
 from pandas.util.testing import assert_frame_equal
 
+
+'''
+The tests in this script check distribution and aggregates for the benefit data.
+Whenever the benefit data gets updated, the tests would create new statistics
+and compare with the previous version. 
+
+This file needs three inputs: CPS weights (cps_weights.csv.gz), CPS tax unit
+database (cps.csv.gz), and CPS benefit (cps_benefits_extrap_full.csv.gz).
+The first two input files are in their corresponding folders as indicated
+in the code, but the third benefit file is different from the current version
+in that it also includes recipient information for each tax unit. Because this
+recipient per tax unit information is not used in the tax-calculator, we have
+dropped those variable to save space. But the recipients is essential for
+checking aggregates and distribution. So anyone who wants to run the tests needs
+to recreate the full version of benefit data.
+
+It is relatively simple to generate the full version:
+
+1. find the extrapolation.py in the current folder and comment out the six
+lines of code (line 310 - 315) that drop all recipients
+2. Rename the output to cps_benefits_extrap_full.csv.gz
+3. Run the extrapolation script with 'python extrapolation.py'
+
+The tests will create three new files for statistic summary:
+
+1. decile2015_new.csv: participation, total benefit and average benefit
+by wage decile
+2. aggregates_new.csv: total participation and benefits for each program
+from 2014 to 2026
+3. tabs_new.csv: tabulations of tax unit participation for each program
+from 2014 to 2026
+
+If all three files are exactly the same as the previous version, then the tests
+will pass. If the tests fail, compare the new version with the previous version
+carefully and then replace the previous with the new version if the difference is
+reasonable.
+
+'''
+
+
 programs = ['ss', 'ssi', 'medicaid', 'medicare', 'vb', 'snap']
 billion = 1e09
 million = 1e06