Skip to content

Commit

Permalink
Merge pull request #104 from clemente-lab/influence-unittests
Browse files Browse the repository at this point in the history
Influence unittests
  • Loading branch information
kbpi314 authored Aug 5, 2020
2 parents e00b8a9 + 30aa006 commit 0f6a9bd
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 127 deletions.
6 changes: 3 additions & 3 deletions cutie/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def resample1_cutie_pc(var1_index, var2_index, samp_var1, samp_var2, **kwargs):
def cookd(var1_index, var2_index, samp_var1, samp_var2, **kwargs):
"""
Takes a given var1 and var2 by indices and recomputes Cook's D for each i-th
sample. (UT in test_pointwise_metrics)
sample. Not intended to be used with nan data. (UT in test_pointwise_metrics)
----------------------------------------------------------------------------
INPUTS
var1_index - Integer. Index for variable from file 1 in pairwise correlation.
Expand Down Expand Up @@ -310,7 +310,7 @@ def cookd(var1_index, var2_index, samp_var1, samp_var2, **kwargs):
def dffits(var1_index, var2_index, samp_var1, samp_var2, **kwargs):
"""
Takes a given var1 and var2 by indices and recomputes DFFITS for each i-th
sample. (UT in test_pointwise_metrics)
sample. Not intended to be used with nan data. (UT in test_pointwise_metrics)
----------------------------------------------------------------------------
INPUTS
var1_index - Integer. Index for variable from file 1 in pairwise correlation.
Expand Down Expand Up @@ -365,7 +365,7 @@ def dffits(var1_index, var2_index, samp_var1, samp_var2, **kwargs):
def dsr(var1_index, var2_index, samp_var1, samp_var2, **kwargs):
"""
Takes a given var1 and var2 by indices and recomputes DFFITS for each i-th
sample. (UT in test_pointwise_metrics)
sample. Not intended to be used with nan data. (UT in test_pointwise_metrics)
----------------------------------------------------------------------------
INPUTS
var1_index - Integer. Index for variable from file 1 in pairwise correlation.
Expand Down
173 changes: 49 additions & 124 deletions tests/test_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,42 +186,6 @@ def setUp(self):
# specific to pearson, paired var1 = 1 var2 = 2 with bonferonni
self.var1, self.var2 = 1, 2
self.sign = np.sign(self.correlations[self.var1][self.var2])
self.pointwise_results = {
'p': {
'cutie_1pc': (np.array([1., 1., 1., 1., 1.]),
np.array([0., 0., 0., 0., 0.]),
np.array([-0.58387421, -0.65710223, -0.6882472 , -0.54772256, -0.51298918]),
np.array([0.41612579, 0.34289777, 0.3117528 , 0.45227744, 0.48701082])),
'cookd': (np.array([0., 0., 0., 0., 0.]),
np.array([0., 0., 0., 0., 1.]),
np.array([5.27697119e-02, 1.44618414e-01, 4.84502508e-01, 7.29729730e-03, 8.51351351e+00]),
np.array([0.94945921, 0.87104214, 0.65714228, 0.99274683, 0.05797719])),
'dffits': (np.array([0., 0., 0., 0., 0.]),
np.array([0., 0., 1., 0., 1.]),
np.array([-0.27560574, -0.55227095, 2.28912231, -0.09944903, 3.72104204]),
np.array([1.26491106, 1.26491106, 1.26491106, 1.26491106, 1.26491106])),
'dsr': (np.array([0., 0., 0., 0., 0.]),
np.array([0., 0., 1., 0., 0.]),
np.array([-0.39893758, -1.07867081, 3.77134948, -0.18156826, 0.66299354]),
np.array([-0.39893758, -1.07867081, 3.77134948, -0.18156826, 0.66299354]))},
'r': {
'cutie_1pc': (np.array([1., 1., 1., 1., 1.]),
np.array([0., 0., 0., 0., 0.]),
np.array([-0.58387421, -0.65710223, -0.6882472 , -0.54772256, -0.51298918]),
np.array([0.41612579, 0.34289777, 0.3117528 , 0.45227744, 0.48701082])),
'cookd': (np.array([0., 0., 0., 0., 0.]),
np.array([0., 0., 0., 0., 1.]),
np.array([5.27697119e-02, 1.44618414e-01, 4.84502508e-01, 7.29729730e-03, 8.51351351e+00]),
np.array([0.94945921, 0.87104214, 0.65714228, 0.99274683, 0.05797719])),
'dffits': (np.array([0., 0., 0., 0., 0.]),
np.array([0., 0., 1., 0., 1.]),
np.array([-0.27560574, -0.55227095, 2.28912231, -0.09944903, 3.72104204]),
np.array([1.26491106, 1.26491106, 1.26491106, 1.26491106, 1.26491106])),
'dsr': (np.array([0., 0., 0., 0., 0.]),
np.array([0., 0., 1., 0., 0.]),
np.array([-0.39893758, -1.07867081, 3.77134948, -0.18156826, 0.66299354]),
np.array([-0.39893758, -1.07867081, 3.77134948, -0.18156826, 0.66299354]))}
}

self.infln_metrics = ['cutie_1pc', 'cookd', 'dffits', 'dsr']
self.infln_mapping = {
Expand Down Expand Up @@ -397,62 +361,11 @@ def setUp(self):
self.test_dir = os.path.abspath(os.path.dirname(__file__))
self.work_dir = os.path.join(self.test_dir, 'test_data/')

self.pointwise_results = {}
for p in ['p', 'r']:
self.pointwise_results[p] = {}
for f in self.infln_mapping:
x_old = self.samp_var1[:, self.var1]
y_old = self.samp_var2[:, self.var2]

# remove nan for influence calculation
var1_values, var2_values = utils.remove_nans(x_old, y_old)
influence = statistics.return_influence(var1_values, var2_values)

#assert_almost_equal(self.pointwise_results[p][f],
arr_0, arr_1, arr_2, arr_3 = self.infln_mapping[f](var1_index=self.var1,
var2_index=self.var2, samp_var1=self.samp_var1,
samp_var2=self.samp_var2, influence=influence,
threshold=self.threshold[p], fold=self.fold,
fold_value=self.fold_value[p], param=p)

fp = self.work_dir + '_'.join(['normal', p, f, '.npz'])
np.savez(fp, arr_0, arr_1, arr_2, arr_3)
results = []
for key, value in np.load(fp).iteritems():
results.append(value)
np.savetxt(self.work_dir + '_'.join['normal', p, f, key + '.txt', value)
self.pointwise_results[p][f] = results

# setup pointwise results with intermediate files for negnan
tuples = [(0,0),(0,1)]

self.nan_neg_pointwise_results = {}
for t in tuples:
t1, t2, = t
self.nan_neg_pointwise_results[str(t)] = {}
for p in ['p', 'r']:
self.nan_neg_pointwise_results[str(t)][p] = {}
for f in self.infln_mapping:
var1_values = self.nanneg_samp_var1[:, t1]
var2_values = self.nanneg_samp_var2[:, t2]
tuplesP = list(itertools.permutations(list(range(len(self.samp_var1[0,:]))), 2))
tuplesC = list(itertools.combinations_with_replacement(list(range(len(self.samp_var2[0,:]))), 2))
self.tuples = sorted(list(set(tuplesP) | set(tuplesC)))

# remove nan for influence calculation
# var1_values, var2_values = utils.remove_nans(x_old, y_old)
influence = statistics.return_influence(var1_values, var2_values)

arr_0, arr_1, arr_2, arr_3 = self.infln_mapping[f](var1_index=t1,
var2_index=t2, samp_var1=self.nanneg_samp_var1,
samp_var2=self.nanneg_samp_var2, influence=influence,
threshold=self.threshold[p], fold=self.fold,
fold_value=self.fold_value[p], param=p)

fp = self.work_dir + '_'.join(['nanneg', str(t), p, f, '.npz'])
np.savez(fp, arr_0, arr_1, arr_2, arr_3)
results = []
for key, value in np.load(fp).iteritems():
results.append(value)
np.savetxt(self.work_dir + '_'.join['nanneg', str(t), p, f, key + '.txt', value)
self.nan_neg_pointwise_results[str(t)][p][f] = results

def test_compute_pc(self):
assert_almost_equal((np.nan, np.nan), statistics.compute_pc(self.undef_corr[0],
Expand Down Expand Up @@ -535,43 +448,55 @@ def test_calculate_FP_sets(self):
self.samp_var1, self.samp_var2, self.infln_metrics, self.infln_mapping,
self.threshold[p], True, self.fold_value[p], p)


def test_pointwise_metrics(self):
# test normal data
# test cutie, cookd, dffits, dsr for just var1 = 1 var2 = 2
x_old = self.samp_var1[:, self.var1]
y_old = self.samp_var2[:, self.var2]

# remove nan for influence calculation
var1_values, var2_values = utils.remove_nans(x_old, y_old)
influence = statistics.return_influence(var1_values, var2_values)
for p in ['p', 'r']:
for f in self.infln_mapping:
assert_almost_equal(self.pointwise_results[p][f],
self.infln_mapping[f](var1_index=self.var1, var2_index=self.var2,
samp_var1=self.samp_var1, samp_var2=self.samp_var2,
influence=influence,
threshold=self.threshold[p], fold=self.fold,
fold_value=self.fold_value[p], param=p))

# test nanneg data
# test cutie, cookd, dffits, dsr for just var1 = 1 var2 = 2
x_old = self.nanneg_samp_var1[:, 0]
y_old = self.nanneg_samp_var1[:, 0]

# remove nan for influence calculation
var1_values, var2_values = utils.remove_nans(x_old, y_old)
influence = statistics.return_influence(var1_values, var2_values)
for p in ['p', 'r']:
for f in self.infln_mapping:
assert_almost_equal(self.nan_neg_pointwise_results[p][f],
self.infln_mapping[f](var1_index=self.var1, var2_index=self.var2,
samp_var1=self.nanneg_samp_var1, samp_var2=self.nanneg_samp_var2,
influence=influence,
threshold=self.threshold[p], fold=self.fold,
fold_value=self.fold_value[p], param=p))
# generate results and output intermediate file
pointwise_results = {}
for t in self.tuples:
t1, t2, = t
pointwise_results[str(t)] = {}

for p in ['p', 'r']:
pointwise_results[str(t)][p] = {}

for f in self.infln_mapping::
x_old = self.samp_var1[:, t1]
y_old = self.samp_var2[:, t2]

var1_values, var2_values = utils.remove_nans(x_old, y_old)
influence = statistics.return_influence(var1_values, var2_values)

arr_0, arr_1, arr_2, arr_3 = self.infln_mapping[f](var1_index=t1,
var2_index=t2, samp_var1=self.samp_var1, samp_var2=self.samp_var2,
influence=influence, threshold=self.threshold[p], fold=self.fold,
fold_value=self.fold_value[p], param=p)

# save results to compressed object and later text file
fp = self.work_dir + '_'.join([str(t), p, f, '.npz'])
np.savez(fp, arr_0, arr_1, arr_2, arr_3)
results = []
for key, value in np.load(fp).items():
results.append(value)
np.savetxt(self.work_dir + '_'.join([str(t), p, f, key + '.txt']), value)

pointwise_results[str(t)][p][f] = results

# test cutie, cookd, dffits, dsr
# with inputs mixed with nan and neg values as defined in setUp
for t in self.tuples:
t1, t2 = t
var1_values = self.samp_var1[:, t1]
var2_values = self.samp_var2[:, t2]
influence = statistics.return_influence(var1_values, var2_values)

for p in ['p', 'r']:
for f in self.infln_mapping:
results = self.infln_mapping[f](var1_index=t1,
var2_index=t2, samp_var1=self.samp_var1, samp_var2=self.samp_var2,
influence=influence,
threshold=self.threshold[p], fold=self.fold,
fold_value=self.fold_value[p], param=p)
# comparison to 7 decimal places is the default value
assert_almost_equal(pointwise_results[str(t)][p][f], results)

def test_pointwise_comparison(self):
for p in ['p', 'r']:
Expand Down

0 comments on commit 0f6a9bd

Please sign in to comment.