@@ -43,13 +43,28 @@ function do_pers_idiot_checks( pers :: AbstractDataFrame )
43
43
for bu in hbus
44
44
nbusps += size ( bu )[1 ]
45
45
numheads = sum ( bu[:,:is_bu_head ])
46
- @assert numheads == 1 " 1 head for each bu hh.hid=$(first. hid) "
46
+ @assert numheads == 1 " 1 head for each bu hh.hid=$(first. hid) numheads= $numheads bu = $(bu[ 1 , :default_benefit_unit ]) "
47
47
end
48
48
@assert nbusps == size (hp)[1 ] " size mismatch for hh.hid=$(hp. hid) "
49
- @assert sum ( hp[:,:is_hrp ]) == 1 " 1 head for each hh hh.hid=$(hp. hid) "
49
+ @assert sum ( hp[:,:is_hrp ]) == 1 " 1 head for each hh hh.hid=$(hp. hid) was $( sum ( hp[:, :is_hrp ]) ) "
50
50
end
51
51
end
52
52
53
+ function delete_irredemably_bad_hhs ( hh :: DataFrame , pers :: DataFrame )
54
+ kills = []
55
+ for h in eachrow ( hh )
56
+ p = pers[pers. hid .== h. hid,:]
57
+ n = size (p)[1 ]
58
+ # all children - killem all
59
+ if (maximum ( p[! ,:age ]) < 16 ) && (sum ( p[! ,:from_child_record ]) == n)
60
+ println ( " want to kill $(h. hid) " )
61
+ push! (kills, h. hid)
62
+ end
63
+ end
64
+ println ( " killing $(kills) " )
65
+ deleteat! (hh, hh. hid .∈ (kills,))
66
+ deleteat! (pers, pers. hid .∈ (kills,))
67
+ end
53
68
54
69
"""
55
70
reassign over 21s not in education to non child
@@ -86,12 +101,30 @@ function fixup_child_status!( pers :: DataFrameRow )::Int
86
101
return oc == pers. from_child_record ? 0 : 1 # count of changes made
87
102
end
88
103
104
+
105
+ function zeropos ( p :: DataFrameRow ):: Int
106
+ for i in 1 : 15
107
+ k = Symbol ( " relationship_$(i) " )
108
+ if ismissing (p[k])
109
+ return 9999
110
+ elseif p[k] == 0
111
+ return i
112
+ end
113
+ end
114
+ 9999
115
+ end
116
+
117
+
89
118
"""
90
119
91
120
"""
92
121
function fixup_relationships! ( hp :: AbstractDataFrame ):: Int
122
+ # sort the hh people in order of the zero (itself) relationship field
123
+ # that
124
+ hp. zpos .= zeropos .( eachrow (hp) )
125
+ sort! ( hp, :zpos )
93
126
num_people = size (hp)[1 ] #
94
- println ( " num people $num_people " )
127
+ # println( "num people $num_people")
95
128
@assert size ( hp[hp. is_hrp.== 1 ,:])[1 ] == 1 # exactly 1 hrp
96
129
nfixes = 0
97
130
for p in eachrow (hp) # for each person .. (only need 1st 1/2?)
@@ -107,67 +140,29 @@ function fixup_relationships!( hp :: AbstractDataFrame )::Int
107
140
# change the other person's relationship to match this one, if needed.
108
141
if j != p. pno
109
142
k = Symbol ( " relationship_$(j) " )
110
- relationship = Relationship (p[k]) # relationship of this person to person j
143
+ relationship = if ismissing ( p[k] ) # relationship of this person to person j
144
+ Missing_Relationship
145
+ else
146
+ Relationship (p[k])
147
+ end
148
+ if relationship == This_Person # can't be this person if pno != j
149
+ relationship = Missing_Relationship
150
+ end
111
151
oper = hp[j,:] # look up the other person
112
152
recip_relationship = Relationship (oper[ok])
113
- println (" hh $(p. hid) : checking $(p. pno) =>$(oper. pno) relationships $(relationship) =>$(recip_relationship) " )
114
- if is_partner ( relationship )
115
- if ! is_partner ( recip_relationship )
116
- nfixes += 1
117
- oper[ok] = Int ( relationship )
118
- end
119
- elseif is_dependent_child ( relationship )
120
- if ! is_parent ( recip_relationship )
121
- nfixes += 1
122
- r = if relationship == Son_or_daughter_incl_adopted
123
- Parent
124
- elseif relationship == Foster_child
125
- Foster_parent
126
- elseif relationship == Step_son_or_daughter
127
- Step_parent
128
- end
129
- oper[ok] = Int ( r )
130
- end
131
- elseif is_parent ( relationship )
132
- if ! is_dependent_child ( recip_relationship )
133
- nfixes += 1
134
- r = if relationship == Parent
135
- Son_or_daughter_incl_adopted
136
- elseif relationship == Foster_parent
137
- Foster_child
138
- elseif relationship == Step_parent
139
- Step_son_or_daughter
140
- end
141
- oper[ok] = Int ( r )
142
- end
143
- elseif is_sibling ( relationship )
144
- if ! is_sibling ( recip_relationship )
145
- nfixes += 1
146
- oper[ok] = Int ( relationship )
147
- end
148
- elseif is_other_relative ( relationship )
149
- if ! is_other_relative ( recip_relationship )
150
- nfixes += 1
151
- r = if relationship == Parent_in_law
152
- Son_in_law_or_daughter_in_law
153
- elseif relationship == Son_in_law_or_daughter_in_law
154
- Parent_in_law
155
- elseif relationship == Grand_child
156
- Grand_parent
157
- elseif relationship == Grand_parent
158
- Grand_child
159
- elseif relationship == Other_relative
160
- Other_relative
161
- end
162
- oper[ok] = Int ( r )
163
- end
164
- elseif is_non_relative ( relationship )
165
- if ! is_non_relative ( recip_relationship )
166
- nfixes += 1
167
- oper[ok] = Int ( Other_non_relative )
168
- end
169
- end # check end
170
- println (" final relationships: $(relationship) =>$(Relationship (oper[ok])) " )
153
+ if (relationship == Missing_Relationship) # lookup other way around if missing
154
+ relationship = reciprocal_relationship ( recip_relationship )
155
+ end
156
+ shouldbe_rel = reciprocal_relationship ( relationship )
157
+ if recip_relationship != shouldbe_rel
158
+ # println("hh $(p.hid): changing for $(p.pno)=>$(oper.pno) relationships $(relationship)=>$(recip_relationship)")
159
+ nfixes += 1
160
+ oper[ok] = Int (shouldbe_rel)
161
+ # println("final relationships: $(relationship)=>$(Relationship(oper[ok]))")
162
+ end
163
+ if relationship == This_Person # can't be this person if pno != j
164
+ oper[ok] = Int (Other_non_relative)
165
+ end
171
166
end # other people
172
167
end # each relationship of this person
173
168
# clear out the rest
@@ -179,6 +174,8 @@ function fixup_relationships!( hp :: AbstractDataFrame )::Int
179
174
end
180
175
end # clearout unneeded relationships
181
176
end # each person
177
+ # clear out zero sort marker
178
+ # select!( pers, Not( :zpos ))
182
179
return nfixes
183
180
end # function
184
181
@@ -201,6 +198,7 @@ function assign_hrp!( hp :: AbstractDataFrame; target::Symbol )
201
198
else # .. or oldest if no income
202
199
hrpp = findmax (hp. age)[2 ]
203
200
end
201
+ println ( " setting $hrpp $target to 1" )
204
202
hp[hrpp,target] = 1 ;
205
203
end
206
204
@@ -211,28 +209,60 @@ if bus numbers are 1,3,9 replace with 1,2,3
211
209
function fixup_bus! ( hp :: AbstractDataFrame ; target :: Symbol )
212
210
targets = hp[:,target]
213
211
buos = collect (sort ( OrderedSet (hp[:,target])))
214
- println (" initial buos $(hp[:,target]) " )
212
+ # println("initial buos $(hp[:,target])")
215
213
for p in eachrow (hp)
216
214
defb = p[target]
217
215
nb = searchsorted (buos, defb )[1 ]
218
216
p[target] = nb
219
217
end
220
- println (" final bunos $(hp[:,target]) " )
218
+ # println("final bunos $(hp[:,target])")
219
+ end
220
+
221
+ """
222
+ Allocate anyone, say, in Grand_parent relationship in a bu with a head to a new bu.
223
+ FIXME won't work for couples
224
+ """
225
+ function add_lonely_bus! ( hp :: AbstractDataFrame )
226
+ nbus = maximum ( hp[:,:default_benefit_unit ])
227
+ buheads = hp[ hp. is_bu_head .== 1 , : ]
228
+ for b in eachrow (buheads)
229
+ for p in eachrow (hp)
230
+ if p. pno != b. pno
231
+ if (p. default_benefit_unit == b. default_benefit_unit) # nominally in this bu
232
+ k = Symbol ( " relationship_$(b. pno) " )
233
+ if (is_not_immediate_family (Relationship (p[k]))&& (p. age >= 16 ))
234
+ println ( " adding $nbus for hh $(p. hid) age $(p. age) pno $(p. pno) " )
235
+ nbus += 1
236
+ p. default_benefit_unit = nbus
237
+ p. is_bu_head = true
238
+ end
239
+ end
240
+ end
241
+ end
242
+ end
221
243
end
222
244
223
- function get_relationships ( hp :: AbstractDataFrame ) :: Matrix{Int }
245
+ function get_relationships ( hp :: AbstractDataFrame ) :: Matrix{Relationship }
224
246
num_people = size (hp)[1 ]
225
- v = fill (- 1 ,15 ,15 )
247
+ v = fill (Missing_Relationship ,15 ,15 )
226
248
for i in 1 : num_people
227
249
k = Symbol (" relationship_$i " )
228
250
for j in 1 : num_people
229
- v[j,i] = hp[j,k]
251
+ v[j,i] = Relationship ( hp[j,k])
230
252
end
231
253
end
232
254
v
233
255
end
234
256
235
257
258
+ function print_relationships ( m:: Matrix{Relationship} )
259
+ n = findfirst ( isequal ( Missing_Relationship ), m[1 ,:])- 1
260
+ hc = hcat (m[1 : n,1 : n ],collect (1 : n))
261
+ pretty_table ( hc )
262
+ end
263
+
264
+
265
+
236
266
function put_relationships! ( hp :: AbstractDataFrame , rels :: Matrix{Int} )
237
267
238
268
end
@@ -303,6 +333,14 @@ function do_initial_fixes!(hh::DataFrame, pers::DataFrame )
303
333
if ! ismissing ( p. highest_qualification ) && (p. highest_qualification == 0 ) # missing is -1 here, not zero
304
334
p. highest_qualification = - 1
305
335
end
336
+ if (p. age < 16 ) || ((p. from_child_record== 1 )&& (p. age < 20 ))
337
+ p. is_hrp = 0
338
+ if (! ismissing (p. is_bu_head)) && (p. is_bu_head == 1 )
339
+ println ( " removing bu head for $(p. pno) aged $(p. age) hid=$(p. hid) " )
340
+ p. is_bu_head = 0
341
+ p. default_benefit_unit = 1 # FIXME wild guess
342
+ end
343
+ end
306
344
p. is_hrp = coalesce ( p. is_hrp, 0 )
307
345
# FIXME fixup all the relationships
308
346
if p. is_hrp == 1
@@ -312,8 +350,8 @@ function do_initial_fixes!(hh::DataFrame, pers::DataFrame )
312
350
#
313
351
# Data in order - just makes inspection easier.
314
352
#
315
- sort! ( hh, [:data_year , : hid ] )
316
- sort! ( pers, [:data_year , : hid ,:pno ,:default_benefit_unit ,:age ])
353
+ sort! ( hh, [:hid ] )
354
+ sort! ( pers, [:hid ,:pno ,:default_benefit_unit ,:age ])
317
355
#
318
356
# Kill a few annoying missings.
319
357
#
@@ -370,7 +408,7 @@ function do_main_fixes!(hh::DataFrame,pers::DataFrame)
370
408
@assert sum ( hp[:,:is_hrp ]) == 1 " !=1 hrp for $(thishh. hid) "
371
409
# Fixup non-contigious default BU allocations.
372
410
if length (bus) != = maximum (bus)
373
- println ( " non contig $(bus) $(thishh. hid) " )
411
+ # println( "non contig $(bus) $(thishh.hid)" )
374
412
fixup_bus! ( hp, target= :default_benefit_unit )
375
413
end
376
414
# For each of these now nicely numbered bus, ensure 1 bu head.
@@ -380,23 +418,43 @@ function do_main_fixes!(hh::DataFrame,pers::DataFrame)
380
418
nbusps += size ( bu )[1 ]
381
419
numheads = sum ( bu[:,:is_bu_head ])
382
420
if numheads != = 1
383
- println ( " numheads $numheads " )
421
+ # println( "numheads $numheads")
384
422
assign_hrp! ( bu; target= :is_bu_head )
385
423
end
386
424
end
387
425
# this is very unfinished
388
- n_relationships_changed += fixup_relationships! (hp)
389
426
@assert nbusps == size (hp)[1 ] " size mismatch for $(hp. hid) "
390
- end
427
+ n_relationships_changed += fixup_relationships! (hp)
428
+ for p in eachrow ( hp )
429
+ if (p. age < 16 ) || ((p. from_child_record== 1 )&& (p. age < 20 ))
430
+ p. is_hrp = 0
431
+ if (! ismissing (p. is_bu_head)) && (p. is_bu_head == 1 )
432
+ println ( " #2 removing bu head for $(p. pno) aged $(p. age) hid=$(p. hid) " )
433
+ p. is_bu_head = 0
434
+ p. default_benefit_unit = 1 # FIXME wild guess
435
+ end
436
+ end
437
+ end
438
+ add_lonely_bus! ( hp )
439
+ # endlessly repeated FIXME
440
+ nbusps = 0
441
+ # regroup bus
442
+ hbus = groupby ( hp, :default_benefit_unit )
443
+ for bu in hbus
444
+ nbusps += size ( bu )[1 ]
445
+ numheads = sum ( bu[:,:is_bu_head ])
446
+ if numheads != = 1
447
+ println ( " numheads wrong for $numheads " )
448
+ assign_hrp! ( bu; target= :is_bu_head )
449
+ end
450
+ end
451
+ end # hh loop
452
+ delete_irredemably_bad_hhs ( hh, pers )
391
453
end
392
454
455
+ """
393
456
394
- # # TODO FIXUP relationship_x fields
395
-
396
- #
397
- # Load synthetic datasets using default settings.
398
- #
399
-
457
+ """
400
458
function fixall! ( hh:: DataFrame , pers:: DataFrame )
401
459
settings = Settings ()
402
460
settings. dataset_type = synthetic_data
@@ -406,11 +464,9 @@ function fixall!( hh::DataFrame, pers::DataFrame)
406
464
do_pers_idiot_checks ( pers )
407
465
# Delete working columns with the mostly.ai string primary keys - we've replaced them
408
466
# with BigInts as in the actual data.
409
- #=
410
467
select! ( hh, Not (:uhidstr ) )
411
468
select! ( pers, Not ( :pidstr ))
412
469
select! ( pers, Not ( :uhidstr ))
413
- =#
414
470
# write synth files to default locations.
415
471
ds = main_datasets ( settings )
416
472
CSV. write ( ds. hhlds, hh; delim= ' \t ' )
0 commit comments