-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path8_linear_regression.py
105 lines (86 loc) · 2.63 KB
/
8_linear_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# To add a new cell, type '#%%'
# To add a new markdown cell, type '#%% [markdown]'
#%%
from IPython import get_ipython
#%% [markdown]
#<h1> SIT 720 - Python Intro </h1>
#%% [markdown]
# <h2>Model Appraisal in ML</h2>
#
# <h3>Python Packages for Supervised Learning</h3>
#%%
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#%%
#load CSV
data = pd.read_csv('/Users/ragyibrahim/Downloads/foodtruck_profits.txt', delimiter = ',', header = None).values
#Print data shape
print("Data Shape: {}".format(data.shape))
#Print Head
print("Data head: {}".format(data[0:5, :]))
#%% [markdown]
# Create Scatter Plot of Data
#%%
#create plot
plt.scatter(data[:, 0], data[:, 1], color = 'red', marker = 'x')
#Add x label
plt.xlabel("Population of city in 10,000s")
#Add y label
plt.ylabel("Profit of 10,000s")
plt.show()
#%% [markdown]
# Predict profit from Population
#%%
#create a variable for the length
m = len(data)
#Split label and feature variables into seperate matrices
X = np.matrix(data[:,0])
Y = np.matrix(data[:,1])
#The above creates a 1-D matrix. We need to transpose
X = np.matrix(X).T
Y = np.matrix(Y).T
#Print results
print("Number of Examples: {}".format(m))
print("Shape of Data: {}".format(X.shape))
print("Shape of Label: {}".format(Y.shape))
#%% [markdown]
# <h3>Fitting a regression model</h3>
#
# A regression line takes the form: $y = w_0 + w_1x_1$ <br>
#
# The closed form solution is: $\text{W = }{(X^TX)}^{-1} X^Ty $<br>
#
# Where $\text{ W = }[w_o, w_1] \text{ , X = }[1, x_1] $
#%%
#Add the intercept term to X
X = np.c_[np.ones(m), X]
print("New shape of data: {}".format(X.shape))
#%% [markdown]
# <code>np.linalg.pinv</code> calculates the inverse of avaliable and the pseudo inverse $A^+$ when determinant is zero and therefore the inverse is not possible. This is why it is preferred to <code>np.linalg.inv</code>
#%%
#Compute closed form solution
tempW = np.linalg.inv(np.dot(X.T, X))
tempW2 = np.dot(X.T, Y)
W = np.dot(tempW, tempW2)
print("Weights matrix shape: {}".format(W.shape))
print(W)
#%% [markdown]
# <h3>Plot regression line</h3>
#%%
#Plot data points
plt.scatter(data[:,0], data[:, 1], color = 'red', marker = "x")
#Plot regression line
plt.plot(X[:,1], np.dot(X,W))
#Add x and y labels
plt.xlabel('Population of City in 10,000s')
plt.ylabel('Profit in 10,000s')
#Show plot
plt.show()
#%% [markdown]
# <h3>Use RL to make predictions</h3>
#%%
#Add the intercept and the X-value
predicted_profit = np.dot([1,17.5], W)
# Predicted profit needs to be multiplied by y units, 10,000, and rounded to be currency
print ("The predicted Profit at x = 17.5: {}".format(np.round(predicted_profit*10000, decimals = 2)))