Skip to content

Commit beca755

Browse files
committed
Updating the way we compute weights.
1 parent e6f804b commit beca755

File tree

3 files changed

+74
-63
lines changed

3 files changed

+74
-63
lines changed

gamma_glm_model.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -299,16 +299,15 @@ def fit_ldscore_model(ld_df, ld_col_names, w_col_name=None,
299299

300300

301301
def get_model_lrt(coef, est_intercept,
302-
ld_df, ld_col_names, w_col_name,
302+
ld_df, ld_col_names, ld_weights,
303303
chisq_col='CHISQ',
304304
null_fit_intercept=False):
305305

306306
X = ld_df.loc[:, ['N'] + ld_col_names].values
307307
X[:, 0] = 1.
308308

309309
y = np.fmax(1e-6, ld_df[chisq_col])
310-
311-
u = np.maximum(ld_df[w_col_name].values, 1.)
310+
u = 1./ld_weights
312311

313312
fitted_tau = np.append([est_intercept], coef)
314313
fitted_ll = gamma_loglik(fitted_tau, X, y, u=u)

perform_regression.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -382,8 +382,11 @@ def perform_ldsc_regression(ld_scores,
382382
'Coefficients': list(zip(ld_score_names, reg.coef))
383383
}
384384

385+
ld_weighs = np.sqrt(np.maximum(nss_df[ldc['WeightCol']].values, 1.))
386+
ld_weighs /= float(np.sum(ld_weighs))
387+
385388
ldc['Regression']['LRT'] = get_model_lrt(reg.coef, reg.intercept,
386-
nss_df, ld_score_names, ldc['WeightCol'])
389+
nss_df, ld_score_names, ld_weighs)
387390
ldc['Regression']['LRT_se'] = 0.0
388391

389392
pred_chi2 = predict_chi2(reg.coef, reg.intercept,
@@ -392,7 +395,7 @@ def perform_ldsc_regression(ld_scores,
392395
ldc['Regression']['Predictive Performance'] = {
393396
'Overall': compute_prediction_metrics(pred_chi2,
394397
nss_df['CHISQ'].values,
395-
nss_df[ldc['WeightCol']].values),
398+
1./ld_weighs),
396399
'Per MAF bin': {}
397400
}
398401

@@ -401,7 +404,7 @@ def perform_ldsc_regression(ld_scores,
401404
ldc['Regression']['Predictive Performance']['Per MAF bin'][i] = compute_prediction_metrics(
402405
pred_chi2[maf_subset],
403406
nss_df.loc[maf_subset, 'CHISQ'].values,
404-
nss_df.loc[maf_subset, ldc['WeightCol']].values
407+
1./ld_weighs[maf_subset]
405408
)
406409

407410
if ldc['Annotation']:
@@ -473,7 +476,7 @@ def perform_ldsc_regression(ld_scores,
473476
ldc['Regression']['Annotations']['Predictive Performance'][an] = {
474477
'Overall': compute_prediction_metrics(pred_chi2[ann_subset],
475478
nss_df.loc[ann_subset, 'CHISQ'].values,
476-
nss_df.loc[ann_subset, ldc['WeightCol']].values),
479+
1. / ld_weighs[ann_subset]),
477480
'Per MAF bin': {}
478481
}
479482

@@ -482,7 +485,7 @@ def perform_ldsc_regression(ld_scores,
482485
ldc['Regression']['Annotations']['Predictive Performance'][an]['Per MAF bin'][i] = compute_prediction_metrics(
483486
pred_chi2[ann_subset & maf_subset],
484487
nss_df.loc[ann_subset & maf_subset, 'CHISQ'].values,
485-
nss_df.loc[ann_subset & maf_subset, ldc['WeightCol']].values
488+
1. / ld_weighs[ann_subset & maf_subset]
486489
)
487490

488491
write_pbz2(os.path.join(output_dir, f"{ldc['Name']}.pbz2"),

predictive_performance_analysis.py

Lines changed: 64 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,12 @@
3131
'S-R2_1.0': '#F28E2B'
3232
}
3333

34-
methods = ['S-D2_0.0', 'S-D2_0.25', 'S-D2_0.5', 'S-D2_0.75', 'S-D2_1.0']
34+
partitioned = False
35+
methods = ['R2_0.0', 'R2_0.25', 'R2_0.5', 'R2_0.75', 'R2_1.0']
36+
37+
if partitioned:
38+
methods = ['S-' + m for m in methods]
39+
3540
metrics = [
3641
'Mean Difference',
3742
'Weighted Mean Difference',
@@ -44,7 +49,7 @@
4449

4550
annot_res = []
4651
global_res = []
47-
avg_chi2 = []
52+
all_snps_chi2 = []
4853

4954
for trait_file in glob.glob("results/regression/EUR/M_5_50_chi2filt/*/*.pbz2"):
5055
trait_res = read_pbz2(trait_file)
@@ -63,31 +68,34 @@
6368
'Method': m
6469
})
6570

66-
avg_chi2.append({
67-
'Trait': trait_name,
68-
'Score': trait_res['Predictive Performance']['Overall']['Mean Predicted Chisq'],
69-
'Method': m
70-
})
71-
72-
for ann, ann_res in trait_res['Annotations']['Predictive Performance'].items():
73-
for mbin, mbin_res in ann_res['Per MAF bin'].items():
74-
for metric in metrics:
75-
76-
annot_res.append({
77-
'Annotation': ann,
78-
'Trait': trait_name,
79-
'MAFbin': mbin,
80-
'Metric': metric,
81-
'Score': mbin_res[metric],
82-
'Method': m
83-
})
71+
for metric in metrics + ['Mean Predicted Chisq']:
72+
all_snps_chi2.append({
73+
'Trait': trait_name,
74+
'Metric': metric,
75+
'Score': trait_res['Predictive Performance']['Overall'][metric],
76+
'Method': m
77+
})
78+
79+
if partitioned:
80+
for ann, ann_res in trait_res['Annotations']['Predictive Performance'].items():
81+
for mbin, mbin_res in ann_res['Per MAF bin'].items():
82+
for metric in metrics:
83+
84+
annot_res.append({
85+
'Annotation': ann,
86+
'Trait': trait_name,
87+
'MAFbin': mbin,
88+
'Metric': metric,
89+
'Score': mbin_res[metric],
90+
'Method': m
91+
})
8492

8593
annot_res = pd.DataFrame(annot_res)
8694
global_res = pd.DataFrame(global_res)
87-
avg_chi2 = pd.DataFrame(avg_chi2)
95+
all_snps_chi2 = pd.DataFrame(all_snps_chi2)
8896

8997
print(f'Average {metric} across all traits and SNP categories:')
90-
print(avg_chi2.groupby('Method').mean())
98+
print(all_snps_chi2.groupby(['Method', 'Metric']).mean())
9199

92100
print('= = = = = = =')
93101

@@ -107,36 +115,37 @@
107115
plt.savefig(f"figures/analysis/global/{metric}{fig_format}")
108116
plt.close()
109117

110-
plt.subplots(figsize=(10, 8))
111-
sns.barplot(x='MAFbin', y='Score', hue='Method',
112-
data=annot_res.loc[annot_res['Metric'] == metric], ci=None,
113-
hue_order=methods,
114-
palette=ld_scores_colors)
115-
plt.xlabel('MAF Decile bin')
116-
plt.ylabel(metric)
117-
plt.savefig(f"figures/analysis/annotation/{metric}{fig_format}")
118-
plt.close()
119-
120-
highly_enriched_cats = [
121-
'Coding_UCSC',
122-
'Conserved_LindbladToh',
123-
'GERP.RSsup4',
124-
'synonymous',
125-
'Conserved_Vertebrate_phastCons46way',
126-
'Conserved_Mammal_phastCons46way',
127-
'Conserved_Primate_phastCons46way',
128-
'BivFlnk',
129-
'Ancient_Sequence_Age_Human_Promoter',
130-
'Human_Promoter_Villar_ExAC'
131-
]
132-
133-
plt.subplots(figsize=(10, 8))
134-
sns.barplot(x='MAFbin', y='Score', hue='Method',
135-
hue_order=methods,
136-
data=annot_res.loc[annot_res['Annotation'].isin(highly_enriched_cats) &
137-
(annot_res['Metric'] == metric)], ci=None,
138-
palette=ld_scores_colors)
139-
plt.xlabel('MAF Decile bin')
140-
plt.ylabel(metric)
141-
plt.savefig(f"figures/analysis/highly_enriched_annotation/{metric}{fig_format}")
142-
plt.close()
118+
if partitioned:
119+
plt.subplots(figsize=(10, 8))
120+
sns.barplot(x='MAFbin', y='Score', hue='Method',
121+
data=annot_res.loc[annot_res['Metric'] == metric], ci=None,
122+
hue_order=methods,
123+
palette=ld_scores_colors)
124+
plt.xlabel('MAF Decile bin')
125+
plt.ylabel(metric)
126+
plt.savefig(f"figures/analysis/annotation/{metric}{fig_format}")
127+
plt.close()
128+
129+
highly_enriched_cats = [
130+
'Coding_UCSC',
131+
'Conserved_LindbladToh',
132+
'GERP.RSsup4',
133+
'synonymous',
134+
'Conserved_Vertebrate_phastCons46way',
135+
'Conserved_Mammal_phastCons46way',
136+
'Conserved_Primate_phastCons46way',
137+
'BivFlnk',
138+
'Ancient_Sequence_Age_Human_Promoter',
139+
'Human_Promoter_Villar_ExAC'
140+
]
141+
142+
plt.subplots(figsize=(10, 8))
143+
sns.barplot(x='MAFbin', y='Score', hue='Method',
144+
hue_order=methods,
145+
data=annot_res.loc[annot_res['Annotation'].isin(highly_enriched_cats) &
146+
(annot_res['Metric'] == metric)], ci=None,
147+
palette=ld_scores_colors)
148+
plt.xlabel('MAF Decile bin')
149+
plt.ylabel(metric)
150+
plt.savefig(f"figures/analysis/highly_enriched_annotation/{metric}{fig_format}")
151+
plt.close()

0 commit comments

Comments
 (0)