-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPerceptron.py
118 lines (98 loc) · 2.56 KB
/
Perceptron.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/python3
# Author: Deepak Pandita
# Date created: 26 Jan 2018
import numpy as np
import argparse
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')
#using optional parameters
parser = argparse.ArgumentParser()
parser.add_argument('--iterations', action="store", help = "No. of iterations to run", type = int)
parser.add_argument('--nodev', help = "Not use the dev set", action = "store_true")
args = parser.parse_args()
#file paths
train_file = '/data/adult/a7a.train'
dev_file = '/data/adult/a7a.dev'
test_file = '/data/adult/a7a.test'
#default no. of iterations
iterations = 10
dev = True
if args.iterations:
iterations = args.iterations
if args.nodev:
dev=False
#Read train file
print('Reading file: '+train_file)
f = open(train_file)
train_examples = f.readlines()
f.close()
#weights (There are 123 features in the data + bias)
w = np.zeros(124)
Ws = []
#Perceptron
print('Running Perceptron...')
iter = 0
while(iter<iterations):
iter+=1
print('Iteration: ' + str(iter))
for line in train_examples:
tokens = line.strip().split(' ')
y = float(tokens[0]) #label
instance = tokens[1:]
x=np.zeros(124)
x[-1]=1.0
for token in instance:
feature = int(token.split(":")[0])
value = float(token.split(":")[1])
#print feature
x[feature-1] = value
if y*sum(w*x) <= 0:
w+=y*x
Ws.append(w.tolist())
#This function predicts the label on given examples using given weights and returns the accuracy
def predict(examples,W):
correct = 0
for line in examples:
tokens = line.strip().split(' ')
y = float(tokens[0]) #label
instance = tokens[1:]
x=np.zeros(124)
x[-1]=1.0
for token in instance:
feature = int(token.split(":")[0])
value = float(token.split(":")[1])
#print feature
x[feature-1] = value
if y*sum(W*x) > 0:
correct+=1
accuracy = float(correct)/len(examples)
return accuracy
#If dev set is given
if dev:
#Read dev file
print('Reading file: '+dev_file)
d = open(dev_file)
dev_examples = d.readlines()
d.close()
accuracies = []
for W in Ws:
accuracy = predict(dev_examples,W)
accuracies.append(accuracy)
print("Accuracies on dev set: " + str(accuracies))
plt.plot(accuracies)
plt.xlabel("#Iterations")
plt.ylabel("Accuracy")
plt.title("Plot of accuracy on dev set")
plt.show()
#Read test file
print('Reading file: '+test_file)
t = open(test_file)
test_examples = t.readlines()
t.close()
accuracy = predict(test_examples,w)
print("Test accuracy: " + str(accuracy))
weight_str = ""
for wt in w:
weight_str += str(wt)+" "
print("Feature weights (bias last): "+weight_str.strip())