Skip to content

Commit ddb7f6f

Browse files
committed
Added template for the defeat learners project
1 parent 6d5a4e5 commit ddb7f6f

File tree

5 files changed

+183
-0
lines changed

5 files changed

+183
-0
lines changed
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
"""
2+
A simple wrapper for linear regression. (c) 2015 Tucker Balch
3+
"""
4+
5+
import numpy as np
6+
7+
class LinRegLearner(object):
8+
9+
def __init__(self, verbose = False):
10+
pass # move along, these aren't the drones you're looking for
11+
12+
def addEvidence(self,dataX,dataY):
13+
"""
14+
@summary: Add training data to learner
15+
@param dataX: X values of data to add
16+
@param dataY: the Y training values
17+
"""
18+
19+
# slap on 1s column so linear regression finds a constant term
20+
newdataX = np.ones([dataX.shape[0],dataX.shape[1]+1])
21+
newdataX[:,0:dataX.shape[1]]=dataX
22+
23+
# build and save the model
24+
self.model_coefs, residuals, rank, s = np.linalg.lstsq(newdataX, dataY)
25+
26+
def query(self,points):
27+
"""
28+
@summary: Estimate a set of test points given the model we built.
29+
@param points: should be a numpy array with each row corresponding to a specific query.
30+
@returns the estimated values according to the saved model.
31+
"""
32+
return (self.model_coefs[:-1] * points).sum(axis = 1) + self.model_coefs[-1]
33+
34+
if __name__=="__main__":
35+
print "the secret clue is 'zzyzx'"

mc3h1_defeat_learners/RTLearner.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
"""
2+
A FAKE Random Tree Learner. (c) 2016 Tucker Balch
3+
This is just a linear regression learner implemented named RTLearner so
4+
it is used as a template or placeholder for use by testbest4. You should
5+
replace this code with your own RTLearner.
6+
"""
7+
8+
import numpy as np
9+
10+
class RTLearner(object):
11+
12+
def __init__(self, verbose = False, leaf_size = 1):
13+
pass # move along, these aren't the drones you're looking for
14+
15+
def addEvidence(self,dataX,dataY):
16+
"""
17+
@summary: Add training data to learner
18+
@param dataX: X values of data to add
19+
@param dataY: the Y training values
20+
"""
21+
22+
# slap on 1s column so linear regression finds a constant term
23+
newdataX = np.ones([dataX.shape[0],dataX.shape[1]+1])
24+
newdataX[:,0:dataX.shape[1]]=dataX
25+
26+
# build and save the model
27+
self.model_coefs, residuals, rank, s = np.linalg.lstsq(newdataX, dataY)
28+
29+
def query(self,points):
30+
"""
31+
@summary: Estimate a set of test points given the model we built.
32+
@param points: should be a numpy array with each row corresponding to a specific query.
33+
@returns the estimated values according to the saved model.
34+
"""
35+
# get the linear result
36+
ret_val = (self.model_coefs[:-1] * points).sum(axis = 1) + self.model_coefs[-1]
37+
# add some random noise
38+
ret_val = ret_val + 0.09 * np.random.normal(size = ret_val.shape[0])
39+
return ret_val
40+
41+
if __name__=="__main__":
42+
print "get me a shrubbery"
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Ignore everything in this directory
2+
*
3+
# Except this file
4+
!.gitignore

mc3h1_defeat_learners/gen_data.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
"""
2+
template for generating data to fool learners (c) 2016 Tucker Balch
3+
"""
4+
5+
import numpy as np
6+
import math
7+
8+
# this function should return a dataset (X and Y) that will work
9+
# better for linear regresstion than random trees
10+
def best4LinReg():
11+
X = np.random.normal(size = (100, 4))
12+
Y = np.sin(X[:,1])*np.cos(1./(0.0001+X[:,0]**2))
13+
return X, Y
14+
15+
def best4RT():
16+
X = np.random.normal(size = (50, 2))
17+
Y = 0.8 * X[:,0] + 5.0 * X[:,1]
18+
return X, Y
19+
20+
if __name__=="__main__":
21+
print "they call me Tim."

mc3h1_defeat_learners/testbest4.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
"""
2+
Test best4 data generator. (c) 2016 Tucker Balch
3+
"""
4+
5+
import numpy as np
6+
import math
7+
import LinRegLearner as lrl
8+
import RTLearner as rt
9+
from gen_data import best4LinReg, best4RT
10+
11+
# compare two learners' rmse out of sample
12+
def compare_os_rmse(learner1, learner2, X, Y):
13+
14+
# compute how much of the data is training and testing
15+
train_rows = math.floor(0.6* X.shape[0])
16+
test_rows = X.shape[0] - train_rows
17+
18+
# separate out training and testing data
19+
train = np.random.choice(X.shape[0], size=train_rows, replace=False)
20+
test = np.setdiff1d(np.array(range(X.shape[0])), train)
21+
trainX = X[train, :]
22+
trainY = Y[train]
23+
testX = X[test, :]
24+
testY = Y[test]
25+
26+
# train the learners
27+
learner1.addEvidence(trainX, trainY) # train it
28+
learner2.addEvidence(trainX, trainY) # train it
29+
30+
# evaluate learner1 out of sample
31+
predY = learner1.query(testX) # get the predictions
32+
rmse1 = math.sqrt(((testY - predY) ** 2).sum()/testY.shape[0])
33+
34+
# evaluate learner2 out of sample
35+
predY = learner2.query(testX) # get the predictions
36+
rmse2 = math.sqrt(((testY - predY) ** 2).sum()/testY.shape[0])
37+
38+
return rmse1, rmse2
39+
40+
def test_code():
41+
42+
# create two learners and get data
43+
lrlearner = lrl.LinRegLearner(verbose = False)
44+
rtlearner = rt.RTLearner(verbose = False, leaf_size = 1)
45+
X, Y = best4LinReg()
46+
47+
# compare the two learners
48+
rmseLR, rmseRT = compare_os_rmse(lrlearner, rtlearner, X, Y)
49+
50+
# share results
51+
print
52+
print "best4LinReg() results"
53+
print "RMSE LR : ", rmseLR
54+
print "RMSE RT : ", rmseRT
55+
if rmseLR < 0.9 * rmseRT:
56+
print "LR < 0.9 RT: pass"
57+
else:
58+
print "LR < 0.9 RT: fail"
59+
print
60+
61+
# get data that is best for a random tree
62+
lrlearner = lrl.LinRegLearner(verbose = False)
63+
rtlearner = rt.RTLearner(verbose = False, leaf_size = 1)
64+
X, Y = best4RT()
65+
66+
# compare the two learners
67+
rmseLR, rmseRT = compare_os_rmse(lrlearner, rtlearner, X, Y)
68+
69+
# share results
70+
print
71+
print "best4RT() results"
72+
print "RMSE LR : ", rmseLR
73+
print "RMSE RT : ", rmseRT
74+
if rmseRT < 0.9 * rmseLR:
75+
print "RT < 0.9 LR: pass"
76+
else:
77+
print "RT < 0.9 LR: fail"
78+
print
79+
80+
if __name__=="__main__":
81+
test_code()

0 commit comments

Comments
 (0)