-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmale_female_analyse.py
343 lines (272 loc) · 10.6 KB
/
male_female_analyse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
'''
This file is only used for further investigation of the male/female dimensions of the trained model.
It is bound to a fixed set of files, if it should be used with other files it jhas to be adopted.
'''
import sys
from docopt import docopt
import numpy as np
import mydataloader
from mydataloader import index_to_tag
from sent_representation_classify import load_simple_model, load_data
import torch
from torch.utils.data import DataLoader
import torch.autograd as autograd
import model as m
import matplotlib.pyplot as plt
import random
FOLDER = './analyses/'
FILE_BASE = 'invert_4m4f_'
FILE_APX_DATA = ['train-', 'dev-']
FILE_CATEGORIES = [
('Initially and inverted correct', 'correct_correct'),
('Initially correct and inverted incorrect', 'correct_incorrect'),
('Initially incorrect and inverted correct', 'incorrect_correct'),
('Initially and inverted incorrect', 'incorrect_incorrect')
]
FILE_OUT_TWIST_COUNTS = FOLDER + FILE_BASE + 'results.txt'
color_palette = [
'#e6194b',
'#3cb44b',
'#ffe119',
'#0082c8',
'#f58231',
'#911eb4',
'#46f0f0',
'#000000',
'#f032e6',
'#d2f53c',
'#fabebe',
'#008080',
'#aa6e28',
'#800000',
'#808000',
'#000080',
'#808080',
'#e6beff',
'#aaffc3',
'#fffac8',
'#ff0000',
'#00ff00',
'#0000ff',
'#ff00ff',
'#234567',
'#00ffff'
]
def init_misclassification_dict(labels):
d = dict()
for label in labels:
d[label] = dict()
for label2 in labels:
d[label][label2] = 0
return d
def plot_correct_incorrect_bar(x_labels, misclassification_dict, title, block=True):
num_groups = len(x_labels)
# how many classified as label as array for plotting
amounts = dict()
for lbl in x_labels:
amounts[lbl] = [misclassification_dict[l][lbl] for l in x_labels]
# how many of gold label for acc
amounts_gold = dict()
for lbl in x_labels:
amounts_gold[lbl] = sum([misclassification_dict[lbl][l] for l in x_labels])
# calculate accuracy
amount_data = [amounts_gold[lbl] for lbl in x_labels]
correct = [amounts[lbl][i] for i, lbl in enumerate(x_labels)]
print(title)
print(amount_data)
print(correct)
accuracy = round(sum(correct) / sum(amount_data) * 100, 2)
lbl_accuracies = [round(correct[i] / amount_data[i] * 100, 2) for i in range(len(x_labels))]
fig, ax = plt.subplots()
index = np.arange(num_groups)
colors = [color_palette[i] for i in index]
bar_width = .1
for i, lbl in enumerate(x_labels):
plt.bar(index + i * bar_width, amounts[lbl], bar_width, color=color_palette[i], label=lbl)
plt.xlabel('Gold label')
plt.ylabel('# samples')
x_labels = [lbl + ' (' + str(lbl_accuracies[i]) + ')' for i, lbl in enumerate(x_labels)]
plt.xticks(index + bar_width, x_labels)
plt.title(title + ' (' + str(accuracy) + ')')
plt.legend(title='Classified as:', bbox_to_anchor=(0,1.12,1,0.2), loc="lower left",
mode="expand", borderaxespad=0, ncol=3)
plt.subplots_adjust(top=0.8)
plt.show(block=block)
def plot_diff_inversed_normal(x_labels, correct, inverse_model_dict, title, block=True):
num_groups = len(x_labels)
amounts = dict()
for lbl in x_labels:
amounts[lbl] = [inverse_model_dict[l][lbl] for l in x_labels]
fig, ax = plt.subplots()
index = np.arange(num_groups)
colors = [color_palette[i] for i in index]
bar_width = .1
# plot normal model
plt.bar(index, correct, bar_width, color=color_palette[num_groups], label='Correct by std model')
# plot inversed
for i, lbl in enumerate(x_labels):
plt.bar(index + (i+1) * bar_width, amounts[lbl], bar_width, color=color_palette[i], label=lbl)
plt.xlabel('Gold label')
plt.ylabel('# samples')
plt.xticks(index + bar_width * 1.5, x_labels)
plt.title(title)
plt.legend(title='Classified as:', bbox_to_anchor=(0,1.12,1,0.2), loc="lower left",
mode="expand", borderaxespad=0, ncol=2)
plt.subplots_adjust(top=0.7)
plt.show(block=block)
def plot_findings(params):
if params == None:
params = 'train'
with open(FILE_OUT_TWIST_COUNTS) as f_in:
all_data = [(line.strip().split(' ')[0], int(line.strip().split(' ')[1])) for line in f_in.readlines()]
filtered_data = [(key.split('-')[1:], amount) for key, amount in all_data if key.split('-')[0] == params]
filtered_data = [(k[0], k[1], k[2], amount) for k, amount in filtered_data]
x_labels = index_to_tag
# Plot basic results for normal model
misclassification_dict_normal = init_misclassification_dict(x_labels)
for gold, predicted, _, amount in filtered_data:
misclassification_dict_normal[gold][predicted] += amount
title = 'Classification (normal model) in ' + params
plot_correct_incorrect_bar(x_labels, misclassification_dict_normal, title, block=False)
# plot basic results for inversed model
misclassification_dict_inv = init_misclassification_dict(x_labels)
for gold, _, predicted_inv, amount in filtered_data:
misclassification_dict_inv[gold][predicted_inv] += amount
title = 'Classification (inversed male/female model) in ' + params
plot_correct_incorrect_bar(x_labels, misclassification_dict_inv, title, block=False)
# Plot inversed model compared with nomal model
correct_normal = [misclassification_dict_normal[lbl][lbl] for lbl in x_labels]
misclassification_dict_inv_normal = init_misclassification_dict(x_labels)
for gold, predicted, predicted_inv, amount in filtered_data:
if gold == predicted:
misclassification_dict_inv_normal[gold][predicted_inv] += amount
title = 'M/F inversed where normal model was correct'
plot_diff_inversed_normal(x_labels, correct_normal, misclassification_dict_inv_normal, title, block=True)
def print_sents(params):
if params == None:
num_sents=100
else:
num_sents = int(params)
counter = dict()
for data_type in FILE_APX_DATA:
for category_title, category in FILE_CATEGORIES:
file = FOLDER + FILE_BASE + data_type + category + '.txt'
print()
print('Reading:', file)
print('Showing sentences of', data_type + 'data:', category_title)
with open(file) as f_in:
line_idx = 0
premise = None
hypothesis = None
labels = None
confidences_normal = None
lines = f_in.readlines()
size = len(lines) // 9
random_smpl_indizes = [idx * 9 + 8 for idx in random.sample(range(size), num_sents)]
for line in lines:
# check if premise
if line_idx % 9 == 0:
premise = line.strip()
# check if hypothesis
if line_idx % 9 == 3:
hypothesis = line.strip()
# check if label
if line_idx % 9 == 6:
labels = [int(v) for v in line.strip().split(' ')]
if line_idx % 9 == 7:
confidences_normal = line.strip().split(' ')
if line_idx % 9 == 8:
confidences_inv = line.strip().split(' ')
lbl_gold = index_to_tag[labels[0]]
lbl_predicted = index_to_tag[labels[1]]
lbl_predicted_inv = index_to_tag[labels[2]]
conf_norml = confidences_normal[labels[1]]
conf_inv = confidences_inv[labels[2]]
if line_idx in random_smpl_indizes:
print('premise:', premise)
print('hypothesis:', hypothesis)
print('label-gold:', lbl_gold)
print('label-predicted:', lbl_predicted, '(' + conf_norml + ')')
print('label-predicted-inv:', lbl_predicted_inv + '(' + conf_inv + ')')
print('-----')
# count
key = data_type + lbl_gold + '-' + lbl_predicted + '-' + lbl_predicted_inv
if key in counter:
counter[key] += 1
else:
counter[key] = 1
line_idx += 1
print('Done.')
# write out overall statistics for plotting
print('Write statistics into', FILE_OUT_TWIST_COUNTS)
with open(FILE_OUT_TWIST_COUNTS, 'w') as f_out:
for key in counter.keys():
f_out.write(key + ' ' + str(counter[key]) + '\n')
def plot_stats_at_path(params):
'''
plot the accuracies of a model. file must have the following lines:
<dataset>-<goldlabel>-<predicted label>-<dummy> <amount>
:params like "<path to file> <dataset>"
'''
path = params.split(' ')[0]
dataset = params.split(' ')[1]
print(path)
print(dataset)
with open(path) as f_in:
all_data = [(line.strip().split(' ')[0], int(line.strip().split(' ')[1])) for line in f_in.readlines()]
filtered_data = [(key.split('-')[1:], amount) for key, amount in all_data if key.split('-')[0] == dataset]
filtered_data = [(k[0], k[1], k[2], amount) for k, amount in filtered_data]
x_labels = index_to_tag
# Plot basic results for normal model
misclassification_dict_normal = init_misclassification_dict(x_labels)
for gold, predicted, _, amount in filtered_data:
misclassification_dict_normal[gold][predicted] += amount
title = 'Classification (normal model) in ' + dataset
plot_correct_incorrect_bar(x_labels, misclassification_dict_normal, title)
def print_stats_simple_model(params):
'''
prints the statistics needed to plot details of the model
:params need to be "path dims=dim1,dim2,dim3"
'''
splitted_params = params.split(' ')
path = splitted_params[0]
dimensions = [int(d) for d in splitted_params[1].split('=')[1].split(',')]
input_dim = len(dimensions)
classifier = load_simple_model(path, input_dim)
# load data
data_train = DataLoader(load_data(FOLDER, 'train', dimensions), drop_last=False, batch_size=1, shuffle=False)
data_dev = DataLoader(load_data(FOLDER, 'dev', dimensions), drop_last=False, batch_size=1, shuffle=False)
# go through data
for name, dataset in [('train', data_train), ('dev', data_dev)]:
misclassification_dict = init_misclassification_dict([i for i in range(len(index_to_tag))])
for p, h, lbl in dataset:
var_p = m.cuda_wrap(autograd.Variable(p))
var_h = m.cuda_wrap(autograd.Variable(h))
var_lbl = m.cuda_wrap(autograd.Variable(lbl))
prediction = classifier(var_p, var_h)
_, predicted_idx = torch.max(prediction, dim=1)
predicted_idx = predicted_idx.data[0]
lbl = lbl[0]
misclassification_dict[lbl][predicted_idx] += 1
# output misclassicication dict
for lbl_idx_gold in range(len(index_to_tag)):
for lbl_idx_predicted in range(len(index_to_tag)):
# add dummy duplicate to have same format as when dimensions are inversed
print(name + '-' + index_to_tag[lbl_idx_gold] + '-' + index_to_tag[lbl_idx_predicted] + '-' + index_to_tag[lbl_idx_predicted] + ' ' + str(misclassification_dict[lbl_idx_gold][lbl_idx_predicted]))
# count predictions for gold - predicted pairs
mapper = dict()
mapper['sents'] = print_sents
mapper['plot'] = plot_findings
mapper['print_stats_simple_model'] = print_stats_simple_model
mapper['plot_simple_model'] = plot_stats_at_path
def main():
args = docopt("""Analyse male/female effects
Usage: male_female_analyse.py sents [<params>]
male_female_analyse.py plot [<params>]
male_female_analyse.py print_stats_simple_model <params>
male_female_analyse.py plot_simple_model <params>
""")
fn = [k for k in args if args[k] == True][0]
mapper[fn](args['<params>'])
if __name__ == '__main__':
main()