forked from UK-Digital-Heart-Project/4Dsurvival
-
Notifications
You must be signed in to change notification settings - Fork 0
/
demo_validate.py
97 lines (71 loc) · 3.56 KB
/
demo_validate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
"""
@author: gbello
"""
import os, sys, pickle
import optunity, lifelines
from lifelines.utils import concordance_index
import numpy as np
sys.path.insert(0, '../code')
from CoxReg_Single_run import *
from hypersearch import *
#import input data: i_full=list of patient IDs, y_full=censoring status and survival times for patients, x_full=input data for patients (i.e. volumetric measures of RV function)
with open('../data/inputdata_conv.pkl', 'rb') as f: c3 = pickle.load(f)
x_full = c3[0]
y_full = c3[1]
del c3
#Initialize lists to store predictions
preds_bootfull = []
inds_inbag = []
Cb_opts = []
#STEP 1
#(1a) find optimal hyperparameters
opars, osummary = hypersearch_cox(x_data=x_full, y_data=y_full, method='particle swarm', nfolds=6, nevals=50, penalty_range=[-2,1])
#(1b) using optimal hyperparameters, train a model on full sample
omod = coxreg_single_run(xtr=x_full, ytr=y_full, penalty=10**opars['penalty'])
#(1c) Compute Harrell's Concordance index
predfull = omod.predict_partial_hazard(x_full)
C_app = concordance_index(y_full[:,1], -predfull, y_full[:,0])
print('\n\n==================================================')
print('Apparent concordance index = {0:.4f}'.format(C_app))
print('==================================================\n\n')
#BOOTSTRAP SAMPLING
#define useful variables
nsmp = len(x_full)
rowids = [_ for _ in range(nsmp)]
B = 100
for b in range(B):
print('\n-------------------------------------')
print('Current bootstrap sample:', b, 'of', B-1)
print('-------------------------------------')
#STEP 2: Generate a bootstrap sample by doing n random selections with replacement (where n is the sample size)
b_inds = np.random.choice(rowids, size=nsmp, replace=True)
xboot = x_full[b_inds]
yboot = y_full[b_inds]
#(2a) find optimal hyperparameters
bpars, bsummary = hypersearch_cox(x_data=xboot, y_data=yboot, method='particle swarm', nfolds=6, nevals=50, penalty_range=[-2,1])
#(2b) using optimal hyperparameters, train a model on bootstrap sample
bmod = coxreg_single_run(xtr=xboot, ytr=yboot, penalty=10**bpars['penalty'])
#(2c[i]) Using bootstrap-trained model, compute predictions on bootstrap sample. Evaluate accuracy of predictions (Harrell's Concordance index)
predboot = bmod.predict_partial_hazard(xboot)
Cb_boot = concordance_index(yboot[:,1], -predboot, yboot[:,0])
#(2c[ii]) Using bootstrap-trained model, compute predictions on FULL sample. Evaluate accuracy of predictions (Harrell's Concordance index)
predbootfull = bmod.predict_partial_hazard(x_full)
Cb_full = concordance_index(y_full[:,1], -predbootfull, y_full[:,0])
#STEP 3: Compute optimism for bth bootstrap sample, as difference between results from 2c[i] and 2c[ii]
Cb_opt = Cb_boot - Cb_full
#store data on current bootstrap sample (predictions, C-indices)
preds_bootfull.append(predbootfull)
inds_inbag.append(b_inds)
Cb_opts.append(Cb_opt)
del bpars, bmod
#STEP 5
#Compute bootstrap-estimated optimism (mean of optimism estimates across the B bootstrap samples)
C_opt = np.mean(Cb_opts)
#Adjust apparent C using bootstrap-estimated optimism
C_adj = C_app - C_opt
#compute confidence intervals for optimism-adjusted C
C_opt_95confint = np.percentile([C_app - o for o in Cb_opts], q=[2.5, 97.5])
print('\n\n=========================SUMMARY=========================')
print('Apparent concordance index = {0:.4f}'.format(C_app))
print('Optimism bootstrap estimate = {0:.4f}'.format(C_opt))
print('Optimism-adjusted concordance index = {0:.4f}, and 95% CI = {1}'.format(C_adj, C_opt_95confint))