-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscreening.py
225 lines (179 loc) · 9.67 KB
/
screening.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from sklearn.model_selection import KFold
from scipy.optimize import minimize
rng = np.random.default_rng(66)
# loading other Python files
import fun
from conquer import high_dim
# opt is the default parameters
opt = {'phi': 0.1, 'gamma': 1.25, 'max_iter': 5e2, 'tol': 1e-2}
# select_fun is the screening function
# @njit
def select_fun(X, Y, m, n, h, tau, beta_initial, kernel, k_index):
deriv_first_initial = fun.deriv_i(X, Y, m, n, h, tau, beta_initial, kernel)
deriv_total_initial = (1 / m) * np.sum(deriv_first_initial, axis=0)
beta0 = beta_initial
phi = np.linalg.norm(X[0], ord=np.inf)**2/n
# phi = opt['phi']
ro =1
count = 0
while count <= opt['max_iter'] and ro > opt['tol'] :
gradbeta0 = fun.deriv_obj_fun(X, Y, m, n, h, tau, beta0, kernel, deriv_first_initial, deriv_total_initial)
loss_eval0 = fun.obj_fun(X, Y, m, n, h, tau, beta0, kernel, deriv_first_initial, deriv_total_initial)
beta1 = fun.hard_thresholding( beta0 - gradbeta0 / phi, k_index )
r0 = np.linalg.norm(beta1 - beta0)
loss_proxy = loss_eval0 + (beta1 - beta0) @ gradbeta0 + 0.5 * phi * r0**2
loss_eval1 = fun.obj_fun(X, Y, m, n, h, tau, beta1, kernel, deriv_first_initial, deriv_total_initial)
while loss_proxy < loss_eval1:
phi *= opt['gamma']
beta1 = fun.hard_thresholding(beta0 - gradbeta0 / phi, k_index)
r0 = np.linalg.norm(beta1 - beta0)
loss_proxy = loss_eval0 + (beta1 - beta0) @ gradbeta0 + 0.5 * phi * r0** 2
loss_eval1 = fun.obj_fun(X, Y, m, n, h, tau, beta1, kernel, deriv_first_initial, deriv_total_initial)
beta0, phi = beta1, np.linalg.norm(X[0], ord=np.inf)**2/n
count += 1
return {'beta': beta1, 'count': count, 'loss': loss_eval1, 'mse': r0}
# def final_select(X, Y, m, n, p, h, tau, beta_initial, kernel, k_index):
# # sqr = high_dim(X[0], Y[0], intercept=False)
# # l1_model = sqr.l1(tau=tau)
# #
# # # beta_initial is the initial value of beta, lasso is the default penalty
# # beta_initial = l1_model['beta']
#
# # deriv_loss_initial is the derivative of the loss function ∇L_i(β)
# # deriv_loss_initial = fun.deriv_i(X, Y, m, n, h, tau, beta_initial, kernel)
#
# # deriv_loss_N is the summation of the derivative of the loss function: ∇L_N(β)
# # deriv_loss_N = (1 / m) * np.sum(deriv_loss_initial, axis=0)
#
# ff = [0] * k_index
# dev = np.zeros(k_index)
# for i in range(k_index):
# ff[i] = select_fun(X, Y, m, n, h, tau, beta_initial, kernel, i+1)
# dev[i] = np.log(fun.cqr_check_sum(X, Y, m, tau, ff[i]['beta'])) + (i + 1) * np.log(np.log(m * n)) * np.log(p) / (n * m)
# # dev[i] = np.log( fun.obj_fun(X, Y, m, n, h, tau, ff[i]['beta'], kernel, deriv_loss_initial, deriv_loss_N) ) + (i + 1) * np.log(np.log(m * n)) * np.log(p) / (n * m)
#
# dev_index = np.argmin(dev)
# index = np.where(ff[dev_index]['beta'].flatten() != 0)[0]
# beta = ff[dev_index]['beta'].flatten()
# return {'index': index, 'beta_value': beta}
# final_select is the screening result
def final_select(X, Y, m, n, p, h, tau, beta_initial, kernel, k_index):
# Initialize minimum dev and corresponding index
min_dev = float('inf') # Infinite initially to guarantee the first comparison will be less
dev_index = -1 # Placeholder for the index of the minimum value
# Loop over the range of k_index to compute and compare dev values
for i in range(k_index):
# Calculate the current beta value from select_fun
ff = select_fun(X, Y, m, n, h, tau, beta_initial, kernel, i+1)
# Compute the dev value for the current index
current_dev = np.log( fun.cqr_check_sum(X, Y, m, tau, ff['beta']) ) + (i + 1) * np.log(np.log(m * n)) * np.log(p) / (n * m)
# Track the minimum dev and its corresponding index
if current_dev < min_dev:
min_dev = current_dev
dev_index = i
best_beta = ff['beta'].flatten() # Store the beta associated with the minimum dev
# Get the index positions where beta is non-zero
index = np.where(best_beta != 0)[0]
# Return the selected index and beta values
return {'index': index, 'beta_value': best_beta}
# def compute_dev(X, Y, m, n, p, h, tau, beta_initial, kernel, k):
# ff_k = select_fun(X, Y, m, n, h, tau, beta_initial, kernel, k + 1)
# dev_value = np.log(fun.cqr_check_sum(X, Y, m, tau, ff_k['beta'])) + \
# (k + 1) * np.log(np.log(m * n)) * np.log(p) / (n * m)
# return dev_value, ff_k
#
# def final_select(X, Y, m, n, p, h, tau, beta_initial, kernel, k_index):
# results = []
#
# # Parallel computation of dev values
# with ThreadPoolExecutor() as executor:
# futures = [executor.submit(compute_dev, X, Y, m, n, p, h, tau, beta_initial, kernel, k) for k in range(k_index)]
# for future in futures:
# results.append(future.result())
#
# # Find the minimum dev and its corresponding beta
# dev_values = [result[0] for result in results]
# ff_results = [result[1] for result in results]
# min_index = np.argmin(dev_values)
#
# best_beta = ff_results[min_index]['beta'].flatten()
# index = np.where(best_beta != 0)[0]
#
# return {'index': index, 'beta_value': best_beta}
# Define the check function (quantile loss)
def quantile_loss(y_true, y_pred, tau):
residuals = y_true - y_pred
return np.mean(np.maximum(tau * residuals, (tau - 1) * residuals))
# Define the objective function (quantile loss + LASSO penalty)
def objective_function(w, X, y, tau, alpha):
"""Objective function: quantile loss + LASSO penalty."""
y_pred = X @ w
return quantile_loss(y, y_pred, tau) + alpha * np.sum(np.abs(w))
# Cross-validation to find the optimal alpha (lambda)
def quantile_lasso_cv(X, y, tau, alphas, n_folds=5):
kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)
alpha_scores = []
for alpha in alphas:
fold_scores = []
for train_idx, val_idx in kf.split(X):
X_train, X_val = X[train_idx], X[val_idx]
y_train, y_val = y[train_idx], y[val_idx]
# Initial guess for weights
initial_w = np.zeros(X.shape[1])
# Optimize the objective function
result = minimize(
fun=objective_function,
x0=initial_w,
args=(X_train, y_train, tau, alpha),
method='L-BFGS-B',
options={'maxiter': 1000}
)
# Evaluate on the validation set
y_pred_val = X_val @ result.x
val_score = quantile_loss(y_val, y_pred_val, tau)
fold_scores.append(val_score)
# Average score across folds for this alpha
alpha_scores.append(np.mean(fold_scores))
# Find the alpha with the lowest score
optimal_alpha = alphas[np.argmin(alpha_scores)]
return optimal_alpha
# Define a range of alpha (lambda) values to test
alphas = np.logspace(-4, 0, 20)
# CQR is the LASSO estimator adopted by:
# Communication-constrained distributed quantile regression with optimal statistical guarantees. Tan, Battey, and Zhou, 2022, Journal of Machine Learning Research.
# @njit
def CQR(X, Y, m, n, N, tau, s, p, beta_initial, kernel):
bandwidth = {'b': 0.75 * s ** (1 / 2) * (np.log(p) / n) ** (1 / 4), 'h': 0.75 * (s * np.log(p) / N) ** (1 / 4)}
deriv_first_initial = fun.deriv_i(X, Y, m, n, bandwidth['b'], tau, beta_initial, kernel)
deriv_first_initial_2 = fun.deriv_i(X, Y, m, n, bandwidth['h'], tau, beta_initial, kernel)
deriv_total_initial = (1 / m) * np.sum(deriv_first_initial_2, axis=0)
# lambda_sim = np.array([max(abs(X[0].T.dot(tau - (rng.uniform(0,1, n) <= tau))))
# for b in range(200)]) / n
# lambda_value = 0.75 * np.quantile(lambda_sim, 0.9)
# lambda_max = np.linalg.norm( fun.deriv_obj_fun(X, Y, m, n, bandwidth['b'], tau, np.zeros(p), kernel, deriv_first_initial, deriv_total_initial), ord=np.inf )
# lambda_value = 0.005 * lambda_max
lambda_value = quantile_lasso_cv(X[0], Y[0], tau, alphas)
beta0 = beta_initial
phi = np.linalg.norm(X[0], ord=np.inf)**2/n
# phi = opt['phi']
ro =1
count = 0
while count <= opt['max_iter'] and ro > opt['tol'] :
gradbeta0 = fun.deriv_obj_fun(X, Y, m, n, bandwidth['b'], tau, beta0, kernel, deriv_first_initial, deriv_total_initial)
loss_eval0 = fun.obj_fun(X, Y, m, n, bandwidth['b'], tau, beta0, kernel, deriv_first_initial, deriv_total_initial)
beta1 = fun.soft_thresholding( beta0 - gradbeta0 / phi, lambda_value )
r0 = np.linalg.norm(beta1 - beta0)
loss_proxy = loss_eval0 + (beta1 - beta0) @ gradbeta0 + 0.5 * phi * r0**2
loss_eval1 = fun.obj_fun(X, Y, m, n, bandwidth['b'], tau, beta1, kernel, deriv_first_initial, deriv_total_initial)
while loss_proxy < loss_eval1:
phi *= opt['gamma']
beta1 = fun.soft_thresholding(beta0 - gradbeta0 / phi, lambda_value)
r0 = np.linalg.norm(beta1 - beta0)
loss_proxy = loss_eval0 + (beta1 - beta0) @ gradbeta0 + 0.5 * phi * r0** 2
loss_eval1 = fun.obj_fun(X, Y, m, n, bandwidth['b'], tau, beta1, kernel, deriv_first_initial, deriv_total_initial)
beta0, phi = beta1, np.linalg.norm(X[0], ord=np.inf)**2/n
count += 1
index = np.where(beta1 != 0)[0]
return {'index': index, 'beta_value': beta1, 'count': count, 'loss': loss_eval1, 'mse': r0}