@@ -35,16 +35,9 @@ function make_pov( df :: DataFrame, incf::Symbol, growth=0.02 )::Tuple
35
35
povstats, povline
36
36
end
37
37
38
- # Raw FRS
39
- hhold = CSV. File ( " /mnt/data/frs/2022/tab/househol.tab" ; missingstring= [" " , " " ])|> DataFrame
40
- rename! ( hhold, lowercase .(names (hhold)))
41
- hhold_scot = @view hhold[hhold. gvtregn .== 299999999 ,:]
42
-
43
38
# one run of scotben 24 sys
44
- sys = STBParameters. get_default_system_for_fin_year ( 2025 )
45
- settings = Settings ()
46
39
tot = 0
47
- obs = Observable ( Progress (settings . uuid ," " ,0 ,0 ,0 ,0 ))
40
+ obs = Observable ( Progress (Base . UUID ( " c2ae9c83-d24a-431c-b04f-74662d2ba07e " ) ," " ,0 ,0 ,0 ,0 ))
48
41
Observable (Progress (Base. UUID (" c2ae9c83-d24a-431c-b04f-74662d2ba07e" ), " " , 0 , 0 , 0 , 0 ))
49
42
of = on (obs) do p
50
43
global tot
@@ -54,10 +47,13 @@ of = on(obs) do p
54
47
end
55
48
56
49
function onerun ( ;
50
+ settings:: Settings ,
57
51
weighting_relative_to_ons_weights :: Bool ,
58
52
to_y:: Int ,
59
53
to_q :: Int )
60
- settings. included_data_years = [2019 ,2021 ,2022 , 2023 ] # same as 3 year HBAI
54
+ global tot
55
+ tot = 0
56
+ sys = STBParameters. get_default_system_for_fin_year ( 2025 )
61
57
settings. requested_threads = 4
62
58
settings. to_y= to_y # match hbai, kinda sorta
63
59
settings. to_q= to_q
@@ -76,21 +72,36 @@ function onerun( ;
76
72
return res, results_hhs, results_indiv
77
73
end
78
74
79
- function load_model_data ()
75
+ function load_model_data (settings :: Settings ) :: Tuple
80
76
# overwrite raw data with uprated/matched versions
81
77
dataset_artifact = get_data_artifact ( settings )
82
78
model_hhs = HouseholdFromFrame. read_hh (
83
79
joinpath ( dataset_artifact, " households.tab" )) # CSV.File( ds.hhlds ) |> DataFrame
84
80
model_people = HouseholdFromFrame. read_pers (
85
81
joinpath ( dataset_artifact, " people.tab" ))
82
+ @show settings. included_data_years
86
83
model_hhs = model_hhs[ model_hhs. data_year .∈ ( settings. included_data_years, ) , :]
87
84
model_people = model_people[ model_people. data_year .∈ ( settings. included_data_years, ) , :]
85
+ settings. num_households = size ( model_hhs )[1 ]
86
+ settings. num_people = size ( model_people )[1 ]
87
+ @show settings
88
88
DataSummariser. overwrite_raw! ( model_hhs, model_people, settings. num_households )
89
- jhhs = leftjoin (results_hhs, model_hhs, on= [:hid ,:data_year ], makeunique= true )
90
- return jhhs, model_people, model_hhs
89
+ # jhhs = leftjoin(results_hhs, model_hhs, on=[:hid,:data_year], makeunique=true )
90
+ return model_people, model_hhs
91
+ end
92
+
93
+ function make_compare (results_hhs:: DataFrame , hbai_s:: DataFrame )
94
+ sbsub = results_hhs[results_hhs. data_year.== 2021 ,[:hid ,:data_year ,:grossing_factor ,:bhc_net_income ,:eq_scale_bhc ]]
95
+ hbsub = hbai_s[hbai_s. data_year.== 2021 ,[:sernum ,:data_year , :grossing_factor ,:bhc_net_income ,:before_hc_eqscale ,:ahcpubdef ,:ahcyrdef ]]
96
+ hbsub. grossing_factor ./= 3
97
+ compset = innerjoin ( sbsub, hbsub, on= [:hid => :sernum , :data_year ], makeunique= true )
98
+ compset. eqdif = .! (compset. eq_scale_bhc .≈ compset. before_hc_eqscale )
99
+ return compset
91
100
end
92
101
93
- function get_hbai ()
102
+
103
+
104
+ function get_hbai (settings:: Settings )
94
105
hbai = CSV. File ( " /mnt/data/hbai/2024-ed/UKDA-5828-tab/main/20224.csv" ; delim= ' ,' , missingstring= [" " ," -9" ," A" ]) |> DataFrame
95
106
rename! (lowercase, hbai)
96
107
hbai = hbai[( .! ismissing .( hbai. s_oe_bhc .+ hbai. s_oe_ahc .+ hbai. eahchh)), :]
@@ -120,10 +131,14 @@ function get_hbai()
120
131
hbai, hbai_s, hb23_s, hb23_heads
121
132
end
122
133
134
+ settings = Settings ()
135
+ settings. included_data_years = [2019 ,2021 ,2022 , 2023 ] # same as 3 year HBAI
136
+ hbai, hbai_s, hb23_s, hb23_heads = get_hbai (settings)
137
+ settings. num_households, settings. num_people, nhhs2 =
138
+ FRSHouseholdGetter. initialise ( settings; reset= true )
139
+ model_people, model_hhs = load_model_data (settings)
123
140
124
- hbai, hbai_s, hb23_s, hb23_heads = get_hbai ()
125
- jhhs, model_people, model_hhs = load_model_data ()
126
- n= 16 * 4
141
+ n= 64
127
142
df = DataFrame (
128
143
uprated = fill (" " ,n),
129
144
gross_type_relative_to = fill (" " ,n),
@@ -132,7 +147,8 @@ df = DataFrame(
132
147
inc_measure = fill (" " ,n),
133
148
scotben_hh = zeros (n), # [sb_h_mean_grossed, sb_h_mean_ungrossed,sb_h_median_grossed, sb_h_median_ungrossed ],
134
149
scotben_indiv = zeros (n), # [sb_i_mean_grossed, sb_i_mean_ungrossed,sb_i_median_grossed, sb_i_median_ungrossed ],
135
- hbai = zeros (n)) # [hbai_mean_grossed, hbai_mean_ungrossed, hbai_median_grossed, hbai_median_ungrossed])
150
+ hbai_21_23 = zeros (n),
151
+ hbai_23 = zeros (n)) # [hbai_mean_grossed, hbai_mean_ungrossed, hbai_median_grossed, hbai_median_ungrossed])
136
152
137
153
r = 0
138
154
@@ -144,6 +160,7 @@ for uprate in ["current", "y2024"]
144
160
end
145
161
for weighting_relative_to_ons_weights in [false ,true ]
146
162
results, results_hhs, results_indiv = onerun (
163
+ settings = settings,
147
164
weighting_relative_to_ons_weights = weighting_relative_to_ons_weights,
148
165
to_y = to_y,
149
166
to_q = to_q)
@@ -162,66 +179,32 @@ for uprate in ["current", "y2024"]
162
179
row. grossed = grossed ? " Grossed" : " Ungrossed"
163
180
row. inc_measure = pretty (string (inc))
164
181
row. stat = string (f)
165
- hhs_weights, indiv_weights, hbai_weights = if grossed
182
+ hhs_weights, indiv_weights, hbai_weights, hb23_weights = if grossed
166
183
results_hhs. grossing_factor,
167
184
results_indiv. grossing_factor,
168
- hbai_s. grossing_factor
185
+ hbai_s. grossing_factor,
186
+ hb23_s. grossing_factor
169
187
else
170
188
Weights ( results_hhs. num_people ),
171
189
Weights ( ones ( size ( results_indiv)[1 ])),
172
- Weights ( ones ( size ( hbai_s)[1 ]))
190
+ Weights ( ones ( size ( hbai_s)[1 ])),
191
+ Weights ( ones ( size ( hb23_s)[1 ]))
173
192
end
174
193
row. scotben_hh = f (results_hhs[! ,inc], hhs_weights )
175
194
row. scotben_indiv = f ( results_indiv[! ,inc], indiv_weights )
176
- row. hbai = f ( hbai_s[! ,inc], hbai_weights )
195
+ row. hbai_21_23 = f ( hbai_s[! ,inc], hbai_weights )
196
+ row. hbai_23 = f ( hb23_s[! ,inc], hb23_weights )
177
197
end # func
178
198
end # gross
179
199
end # incs
180
200
end
181
201
end # uprating
182
202
203
+ CSV. write ( " hbai-scotben-compares.tab" , df; delim= ' \t ' )
183
204
184
- sbmedian_frs_weights = median ( jhhs. bhc_net_income, Weights ( jhhs. weight_1 ./ 3 ) )
185
- # select summary hbai
186
- hbai_s[! ,[:sernum ,:grossing_factor ,:ahc_net_income ,:before_hc_eqscale ,:data_year ,:ahcpubdef ,:ahcyrdef ]]
187
-
188
- summarystats ( results_hhs. bhc_net_income )
189
- summarystats ( hbai_s. bhc_net_income )
190
-
191
- # 1. is it my weights?
192
- # Problem: my mean income is >100 higher than SPI mean income.
193
- #
194
- # join hbai and my hh data
195
- # read CSV version??
196
- # uprate mine to HBAI target
197
- # use HBAI weights/my weights
198
- #
199
-
200
-
201
- median (hbai. eq_ahc_net_income,Weights (hbai. grossing_factor))
202
- median (hb23. eq_ahc_net_income,Weights (hb23. grossing_factor))
203
- # should match ... these:
204
- unique (hbai. mdoeahc)
205
- # should match ... these:
206
- unique (hbai. mdoebhc)
207
-
208
- # test of weighting relative to exis
209
-
210
- household_total,
211
- targets, # no institutional,
212
- initialise_target_dataframe,
213
- make_target_row! = Weighting. get_targets ( settings )
214
- popsum = sum ( jhhs. weight )
215
- wscale = household_total/ popsum
216
- initial_weights = jhhs. weight .* wscale
217
-
218
- @time weightsp, data = generate_weights (
219
- settings. num_households;
220
- weight_type = settings. weight_type,
221
- lower_multiple = settings. lower_multiple,
222
- upper_multiple = settings. upper_multiple,
223
- household_total = household_total,
224
- targets = targets, # no institutional,
225
- initialise_target_dataframe = initialise_target_dataframe,
226
- make_target_row! = make_target_row!,
227
- initial_weights= initial_weights )
205
+ results, results_hhs, results_indiv = onerun (
206
+ settings = settings,
207
+ weighting_relative_to_ons_weights = true ,
208
+ to_y = 2024 ,
209
+ to_q = 2 )
210
+ compdata = make_compare ( results_hhs, hbai_s )
0 commit comments