Skip to content

Commit f17b5f7

Browse files
committed
EQ OECD scale now matches HBAI.
1 parent 9487729 commit f17b5f7

File tree

4 files changed

+58
-75
lines changed

4 files changed

+58
-75
lines changed

scripts/hbai-scotben-compares.jl

Lines changed: 48 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -35,16 +35,9 @@ function make_pov( df :: DataFrame, incf::Symbol, growth=0.02 )::Tuple
3535
povstats, povline
3636
end
3737

38-
# Raw FRS
39-
hhold = CSV.File( "/mnt/data/frs/2022/tab/househol.tab"; missingstring=[" ", ""])|>DataFrame
40-
rename!( hhold, lowercase.(names(hhold)))
41-
hhold_scot = @view hhold[hhold.gvtregn .== 299999999,:]
42-
4338
# one run of scotben 24 sys
44-
sys = STBParameters.get_default_system_for_fin_year( 2025 )
45-
settings = Settings()
4639
tot = 0
47-
obs = Observable( Progress(settings.uuid,"",0,0,0,0))
40+
obs = Observable( Progress(Base.UUID("c2ae9c83-d24a-431c-b04f-74662d2ba07e"),"",0,0,0,0))
4841
Observable(Progress(Base.UUID("c2ae9c83-d24a-431c-b04f-74662d2ba07e"), "", 0, 0, 0, 0))
4942
of = on(obs) do p
5043
global tot
@@ -54,10 +47,13 @@ of = on(obs) do p
5447
end
5548

5649
function onerun( ;
50+
settings::Settings,
5751
weighting_relative_to_ons_weights :: Bool,
5852
to_y::Int,
5953
to_q :: Int )
60-
settings.included_data_years = [2019,2021,2022, 2023] # same as 3 year HBAI
54+
global tot
55+
tot = 0
56+
sys = STBParameters.get_default_system_for_fin_year( 2025 )
6157
settings.requested_threads = 4
6258
settings.to_y=to_y #match hbai, kinda sorta
6359
settings.to_q=to_q
@@ -76,21 +72,36 @@ function onerun( ;
7672
return res, results_hhs, results_indiv
7773
end
7874

79-
function load_model_data()
75+
function load_model_data(settings::Settings)::Tuple
8076
# overwrite raw data with uprated/matched versions
8177
dataset_artifact = get_data_artifact( settings )
8278
model_hhs = HouseholdFromFrame.read_hh(
8379
joinpath( dataset_artifact, "households.tab")) # CSV.File( ds.hhlds ) |> DataFrame
8480
model_people = HouseholdFromFrame.read_pers(
8581
joinpath( dataset_artifact, "people.tab"))
82+
@show settings.included_data_years
8683
model_hhs = model_hhs[ model_hhs.data_year .∈ ( settings.included_data_years, ) , :]
8784
model_people = model_people[ model_people.data_year .∈ ( settings.included_data_years, ) , :]
85+
settings.num_households = size( model_hhs )[1]
86+
settings.num_people = size( model_people )[1]
87+
@show settings
8888
DataSummariser.overwrite_raw!( model_hhs, model_people, settings.num_households )
89-
jhhs = leftjoin(results_hhs, model_hhs, on=[:hid,:data_year], makeunique=true )
90-
return jhhs, model_people, model_hhs
89+
# jhhs = leftjoin(results_hhs, model_hhs, on=[:hid,:data_year], makeunique=true )
90+
return model_people, model_hhs
91+
end
92+
93+
function make_compare(results_hhs::DataFrame , hbai_s::DataFrame)
94+
sbsub = results_hhs[results_hhs.data_year.==2021,[:hid,:data_year,:grossing_factor,:bhc_net_income,:eq_scale_bhc ]]
95+
hbsub = hbai_s[hbai_s.data_year.==2021,[:sernum,:data_year, :grossing_factor,:bhc_net_income,:before_hc_eqscale,:ahcpubdef,:ahcyrdef]]
96+
hbsub.grossing_factor ./= 3
97+
compset = innerjoin( sbsub, hbsub, on=[:hid=>:sernum, :data_year], makeunique=true)
98+
compset.eqdif = .! (compset.eq_scale_bhc .≈ compset.before_hc_eqscale )
99+
return compset
91100
end
92101

93-
function get_hbai()
102+
103+
104+
function get_hbai(settings::Settings)
94105
hbai = CSV.File( "/mnt/data/hbai/2024-ed/UKDA-5828-tab/main/20224.csv"; delim=',', missingstring=["","-9","A"]) |> DataFrame
95106
rename!(lowercase, hbai)
96107
hbai = hbai[( .! ismissing.( hbai.s_oe_bhc .+ hbai.s_oe_ahc .+ hbai.eahchh)), :]
@@ -120,10 +131,14 @@ function get_hbai()
120131
hbai, hbai_s, hb23_s, hb23_heads
121132
end
122133

134+
settings = Settings()
135+
settings.included_data_years = [2019,2021,2022, 2023] # same as 3 year HBAI
136+
hbai, hbai_s, hb23_s, hb23_heads = get_hbai(settings)
137+
settings.num_households, settings.num_people, nhhs2 =
138+
FRSHouseholdGetter.initialise( settings; reset=true )
139+
model_people, model_hhs = load_model_data(settings)
123140

124-
hbai, hbai_s, hb23_s, hb23_heads = get_hbai()
125-
jhhs, model_people, model_hhs = load_model_data()
126-
n=16*4
141+
n=64
127142
df = DataFrame(
128143
uprated = fill("",n),
129144
gross_type_relative_to = fill("",n),
@@ -132,7 +147,8 @@ df = DataFrame(
132147
inc_measure = fill("",n),
133148
scotben_hh = zeros(n), # [sb_h_mean_grossed, sb_h_mean_ungrossed,sb_h_median_grossed, sb_h_median_ungrossed ],
134149
scotben_indiv = zeros(n), #[sb_i_mean_grossed, sb_i_mean_ungrossed,sb_i_median_grossed, sb_i_median_ungrossed ],
135-
hbai = zeros(n)) #[hbai_mean_grossed, hbai_mean_ungrossed, hbai_median_grossed, hbai_median_ungrossed])
150+
hbai_21_23 = zeros(n),
151+
hbai_23 = zeros(n)) #[hbai_mean_grossed, hbai_mean_ungrossed, hbai_median_grossed, hbai_median_ungrossed])
136152

137153
r = 0
138154

@@ -144,6 +160,7 @@ for uprate in ["current", "y2024"]
144160
end
145161
for weighting_relative_to_ons_weights in [false,true]
146162
results, results_hhs, results_indiv = onerun(
163+
settings = settings,
147164
weighting_relative_to_ons_weights = weighting_relative_to_ons_weights,
148165
to_y = to_y,
149166
to_q = to_q)
@@ -162,66 +179,32 @@ for uprate in ["current", "y2024"]
162179
row.grossed = grossed ? "Grossed" : "Ungrossed"
163180
row.inc_measure = pretty(string(inc))
164181
row.stat = string(f)
165-
hhs_weights, indiv_weights, hbai_weights = if grossed
182+
hhs_weights, indiv_weights, hbai_weights, hb23_weights = if grossed
166183
results_hhs.grossing_factor,
167184
results_indiv.grossing_factor,
168-
hbai_s.grossing_factor
185+
hbai_s.grossing_factor,
186+
hb23_s.grossing_factor
169187
else
170188
Weights( results_hhs.num_people ),
171189
Weights( ones( size( results_indiv)[1])),
172-
Weights( ones( size( hbai_s)[1]))
190+
Weights( ones( size( hbai_s)[1])),
191+
Weights( ones( size( hb23_s)[1]))
173192
end
174193
row.scotben_hh = f(results_hhs[!,inc], hhs_weights )
175194
row.scotben_indiv = f( results_indiv[!,inc], indiv_weights )
176-
row.hbai = f( hbai_s[!,inc], hbai_weights )
195+
row.hbai_21_23 = f( hbai_s[!,inc], hbai_weights )
196+
row.hbai_23 = f( hb23_s[!,inc], hb23_weights )
177197
end # func
178198
end # gross
179199
end # incs
180200
end
181201
end # uprating
182202

203+
CSV.write( "hbai-scotben-compares.tab", df; delim='\t')
183204

184-
sbmedian_frs_weights = median( jhhs.bhc_net_income, Weights( jhhs.weight_1 ./ 3) )
185-
# select summary hbai
186-
hbai_s[!,[:sernum,:grossing_factor,:ahc_net_income,:before_hc_eqscale,:data_year,:ahcpubdef,:ahcyrdef]]
187-
188-
summarystats( results_hhs.bhc_net_income )
189-
summarystats( hbai_s.bhc_net_income )
190-
191-
#1. is it my weights?
192-
# Problem: my mean income is >100 higher than SPI mean income.
193-
#
194-
# join hbai and my hh data
195-
# read CSV version??
196-
# uprate mine to HBAI target
197-
# use HBAI weights/my weights
198-
#
199-
200-
201-
median(hbai.eq_ahc_net_income,Weights(hbai.grossing_factor))
202-
median(hb23.eq_ahc_net_income,Weights(hb23.grossing_factor))
203-
# should match ... these:
204-
unique(hbai.mdoeahc)
205-
# should match ... these:
206-
unique(hbai.mdoebhc)
207-
208-
# test of weighting relative to exis
209-
210-
household_total,
211-
targets, # no institutional,
212-
initialise_target_dataframe,
213-
make_target_row! = Weighting.get_targets( settings )
214-
popsum = sum( jhhs.weight )
215-
wscale = household_total/popsum
216-
initial_weights = jhhs.weight .* wscale
217-
218-
@time weightsp, data = generate_weights(
219-
settings.num_households;
220-
weight_type = settings.weight_type,
221-
lower_multiple = settings.lower_multiple,
222-
upper_multiple = settings.upper_multiple,
223-
household_total = household_total,
224-
targets = targets, # no institutional,
225-
initialise_target_dataframe = initialise_target_dataframe,
226-
make_target_row! = make_target_row!,
227-
initial_weights=initial_weights )
205+
results, results_hhs, results_indiv = onerun(
206+
settings = settings,
207+
weighting_relative_to_ons_weights = true,
208+
to_y = 2024,
209+
to_q = 2)
210+
compdata = make_compare( results_hhs, hbai_s )

src/EquivalenceScales.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ module EquivalenceScales
6565
eq += 1
6666
println( "only 1 person; non head rel=$rel $(p.hid)")
6767
else
68-
if get_age(p) <= 14
68+
if get_age(p) < 14
6969
add = if scale == oxford
7070
0.5
7171
elseif scale == oecd

src/ModelHousehold.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -273,11 +273,11 @@ function eq_rel_to_hoh( p :: Person ) :: EQ_P_Type
273273
return eq_head
274274
elseif p.is_standard_child
275275
return eq_dependent_child
276-
elseif p.relationship_to_hoh in [Spouse,Cohabitee]
276+
elseif (p.relationship_to_hoh in [Spouse,Cohabitee]) || (p.default_benefit_unit == 1)
277277
return eq_spouse_of_head
278278
# hack for 2nd bu adults always being heads
279-
# elseif (p.default_benefit_unit > 1)
280-
# return eq_head
279+
# elseif (p.default_benefit_unit > 1) && ( ! p.is_standard_child )
280+
# return eq_head
281281
else
282282
return eq_other_adult
283283
end

src/STBOutput.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -874,20 +874,20 @@ function incomes_to_hist(
874874
maxr=1500.0,
875875
bandwidth=10 )::NamedTuple
876876
incs = deepcopy(hh[:,income_measure])
877+
# constrain the graph as in HBAI
878+
incs = max.( incs, minr)
879+
incs = min.( incs, maxr)
877880
maxinc = maximum(incs)
878881
mininc = minimum(incs)
879882
medinc = median( incs, Weights(hh.weighted_people))
880883
meaninc = mean( incs, Weights(hh.weighted_people))
881884
@show medinc meaninc
882-
# constrain the graph as in HBAI
883-
incs = max.( incs, minr)
884-
incs = min.( incs, maxr)
885885
ranges = collect( minr:bandwidth:maxr )
886886
push!( ranges,Inf)
887887
hist = fit( Histogram, incs, Weights( hh.weighted_people ), ranges, closed=:left )
888888
# check I've understood fit(Hist correctly ..
889-
# @assert hist.weights[1] ≈ sum( hh.weighted_people[ incs .<= minr ]) "$(hist.weights[1]) ≈ $(sum( hh.weighted_people[ incs .<= minr ])) $hist"
890-
# @assert hist.weights[end] ≈ sum( hh.weighted_people[ incs .>= maxr ]) "$(hist.weights[end]) ≈ $(sum( hh.weighted_people[ incs .>= maxr ])) $hist"
889+
@assert hist.weights[1] sum( hh.weighted_people[ incs .< hist.edges[1][2] ]) "$(hist.weights[1])$(sum( hh.weighted_people[ incs .<= minr ])) $hist"
890+
@assert hist.weights[end] sum( hh.weighted_people[ incs .>= maxr ]) "$(hist.weights[end])$(sum( hh.weighted_people[ incs .>= maxr ])) $hist"
891891
return ( max=maxinc, min=mininc, median=medinc, mean=meaninc, hist=hist )
892892
end
893893

@@ -897,7 +897,7 @@ Dump out histogram, means, etc. as 2-col delimited data.
897897
"""
898898
function write_hist( filename::String, incs::NamedTuple; delim='\t')
899899
d = DataFrame(
900-
edges_lower_limit=incs.hist.edges[1][1:end-1],
900+
edges_upper_limit=incs.hist.edges[1][2:end],
901901
population=incs.hist.weights )
902902
# add stats at bottom
903903
push!(d, ["mean", incs.mean]; promote=true ) # since col1 is float only otherwise

0 commit comments

Comments
 (0)