-
Notifications
You must be signed in to change notification settings - Fork 0
/
BradleyTerry.py
89 lines (74 loc) · 2.93 KB
/
BradleyTerry.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import numpy as np
import choix
class Feature:
def __init__(self, featureName=None, itemKey=None, surveyName=None):
self.featureName = featureName
self.itemKey = itemKey
self.surveyName = surveyName
self.strength = -1
def __str__(self):
return '{}/{} - {} : {}'.format(self.featureName, self.surveyName, self.itemKey, self.strength)
class FeatureSet:
def __init__(self, filename, exclude=None):
if isinstance(filename, list):
feats = filename
else:
with open(filename, 'r') as f:
feats = f.readline().strip().split(',')
if exclude is None:
pass
#feats = feats[1:]
elif isinstance(exclude, int):
feats.remove(feats[exclude])
elif isinstance(exclude, str):
feats.remove(exclude)
elif isinstance(exclude, list):
for ex in exclude:
if isinstance(ex, int):
feats.remove(feats[ex])
elif isinstance(ex, str):
feats.remove(ex)
self.features = []
for i, feature in enumerate(feats):
self.features.append(Feature(feature, i, feature))
def __str__(self):
result = '\n'
for f in self.features:
result += '\t'
result += str(f)
result += '\n'
return result
def get_by_featureName(self, featureName):
for feature in self.features:
if feature.featureName == featureName:
return feature
def get_by_surveyName(self, surveyName):
for feature in self.features:
if feature.surveyName == surveyName:
return feature
def get_by_itemKey(self, itemKey):
for feature in self.features:
if feature.itemKey == itemKey:
return feature
def rename(self, featureName, surveyName):
feature = self.get_by_featureName(featureName)
feature.surveyName = surveyName
def fit(self, surveyfile):
cdata = []
with open(surveyfile, 'r') as f:
for line in f:
line = line.split('>')
key1 = self.get_by_surveyName(line[0].strip()).itemKey
key2 = self.get_by_surveyName(line[1].strip()).itemKey
cdata.append((key1, key2))
results = choix.opt_pairwise(len(self.features), cdata, alpha=0.0001)
for i, strength in enumerate(np.exp(results)):
self.get_by_itemKey(i).strength = strength
pass
if __name__ == '__main__':
print('Running dummy test: ')
data = FeatureSet('./data/adult.csv', exclude=['HasTelephone', 'CheckingAccountBalance_geq_0', 'CheckingAccountBalance_geq_200', 'SavingsAccountBalance_geq_100', 'SavingsAccountBalance_geq_500'])
data.rename('Age', 'age')
data.fit('./data/adult-survey.txt')
print(data)
pass