-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathadj.py
67 lines (54 loc) · 3.39 KB
/
adj.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def fdr(p_vals):
from scipy.stats import rankdata
ranked_p_values = rankdata(p_vals)
fdr = p_vals * len(p_vals) / ranked_p_values
fdr[fdr > 1] = 1
return fdr
# Given a set of p-values, returns p-values adjusted using one of several methods
def pval_corrected(self, method=None):
'''p-values corrected for multiple testing problem
This uses the default p-value correction of the instance stored in
``self.multitest_method`` if method is None.
'''
import statsmodels.stats.multitest as smt
if method is None:
method = self.multitest_method
#TODO: breaks with method=None
return smt.multipletests(self.pvals_raw, method=method)
def correct_pvalues_for_multiple_testing(pvalues, correction_type = "Benjamini-Hochberg"):
"""
consistent with R - print correct_pvalues_for_multiple_testing([0.0, 0.01, 0.029, 0.03, 0.031, 0.05, 0.069, 0.07, 0.071, 0.09, 0.1])
"""
from numpy import array, empty
pvalues = array(pvalues)
n = float(pvalues.shape[0])
new_pvalues = empty(n)
if correction_type == "Bonferroni":
new_pvalues = n * pvalues
elif correction_type == "Bonferroni-Holm":
values = [ (pvalue, i) for i, pvalue in enumerate(pvalues) ]
values.sort()
for rank, vals in enumerate(values):
pvalue, i = vals
new_pvalues[i] = (n-rank) * pvalue
elif correction_type == "Benjamini-Hochberg":
values = [ (pvalue, i) for i, pvalue in enumerate(pvalues) ]
values.sort()
values.reverse()
new_values = []
for i, vals in enumerate(values):
rank = n - i
pvalue, index = vals
new_values.append((n/rank) * pvalue)
for i in xrange(0, int(n)-1):
if new_values[i] < new_values[i+1]:
new_values[i+1] = new_values[i]
for i, vals in enumerate(values):
pvalue, index = vals
new_pvalues[index] = new_values[i]
return new_pvalues
# Benjamini and Hochberg FDR at alpha=0.01
from statsmodels.stats.multitest import multipletests
AdP = multipletests(pvals=Pval, alpha=0.01, method="fdr_bh", is_sorted=False, returnsorted=False)
#print(AdP[1])
#print(len(AdP[1][np.where(AdP[1]<0.01)])) # AdP[1] returns corrected P-vals (array)