@@ -94,10 +94,13 @@ def get_base_Percent(data):
94
94
train_data = get_base_FVC (train_data )
95
95
96
96
97
+
98
+
97
99
# tabular feature generation
98
100
99
101
def get_tab (df ):
100
- vector = [(df .Age .values [0 ] - train .Age .values .mean ()) / train .Age .values .std ()] # df.Age.values[0].mean(), df.Age.values[0].std()
102
+ # print(df)
103
+ vector = [(df .Age .values [0 ] - train_data .Age .values .mean ()) / train_data .Age .values .std ()] # df.Age.values[0].mean(), df.Age.values[0].std()
101
104
102
105
if df .Sex .values [0 ] == 'Male' :
103
106
vector .append (0 )
@@ -113,7 +116,12 @@ def get_tab(df):
113
116
else :
114
117
vector .extend ([1 ,0 ]) # this is useless
115
118
116
- vector .append ((df .Volume .values [0 ] - train .Volume .values .mean ()) / train .Volume .values .std ())
119
+ vector .append ((df .Volume .values [0 ] - train_data .Volume .values .mean ()) / train_data .Volume .values .std ())
120
+
121
+ vector .append ((df .baseline_week .values [0 ] - train_data .baseline_week .values .mean ()) / train_data .baseline_week .values .std ())
122
+
123
+ vector .append ((df .base_FVC .values [0 ] - train_data .base_FVC .values .mean ()) / train_data .base_FVC .values .std ())
124
+
117
125
return np .array (vector )
118
126
119
127
@@ -148,11 +156,13 @@ def __getitem__(self, idx):
148
156
pid = all_features [0 ]
149
157
fvc = []
150
158
fvc .append (all_features [2 ])
159
+ # print(self.train_df.iloc[[idx]])
160
+ feature_set = get_tab (self .train_df .iloc [[idx ]])
151
161
try :
152
162
i = np .random .choice (self .train_data [pid ], size = 1 )[0 ]
153
163
img = get_img (f'{ root_path } /train/{ pid } /{ i } ' )
154
164
x .append (img )
155
- tab .append (all_features [ 1 : 5 ] )
165
+ tab .append (feature_set )
156
166
except Exception as e :
157
167
print (e )
158
168
print ('error' )
@@ -405,11 +415,12 @@ def hyb_loss(outputs,target,l):
405
415
# need to edit from here
406
416
407
417
# cut data
408
- train_data = train_data .iloc [range (100 )]
418
+ if hyp .dummy_training :
419
+ train_data = train_data .iloc [range (hyp .dummy_train_rows )]
409
420
410
421
for model in train_models :
411
422
log = open (f"{ result_dir } /{ model } .txt" , "a+" )
412
- kfold = KFold (n_splits = nfold )
423
+ kfold = KFold (n_splits = nfold )
413
424
414
425
ifold = 0
415
426
for train_index , test_index in kfold .split (train_data ):
@@ -582,4 +593,5 @@ def hyb_loss(outputs,target,l):
582
593
583
594
584
595
# ref: https://www.kaggle.com/miklgr500/linear-decay-based-on-resnet-cnn
596
+ # https://www.kaggle.com/furcifer/q-regression-with-ct-tabular-features-pytorch
585
597
# https://pytorch.org/docs/stable/index.html
0 commit comments