-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlinear_regression_from_scratch(OLS)
49 lines (48 loc) · 1.46 KB
/
linear_regression_from_scratch(OLS)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from math import sqrt
#some utility function
def mean(values):
mean_value=sum(values) / float(len(values))
return mean_value
# Calculate the variance of a list of numbers
def variance(values, mean):
var=0
for i in range(len(values)):
var+=(values[i]-mean)**2
return var
# Calculate covariance between x and y
def covariance(x, mean_x, y, mean_y):
covar = 0.0
for i in range(len(x)):
covar += (x[i] - mean_x) * (y[i] - mean_y)
return covar
# Calculate coefficients(betas)
def coefficients(dataset):
x = [row[0] for row in dataset]
y = [row[1] for row in dataset]
x_mean, y_mean = mean(x), mean(y)
b1 = covariance(x, x_mean, y, y_mean) / variance(x, x_mean)
b0 = y_mean - b1 * x_mean
return [b0, b1]
# Simple linear regression algorithm
def simple_linear_regression(train, test):
predictions = []
b0, b1 = coefficients(train)
for row in test:
yhat = b0 + b1 * row[0]
predictions.append(yhat)
return predictions
# Evaluate regression algorithm on training dataset
def evaluate_algorithm(dataset, algorithm):
test_set = []
for row in dataset:
row_copy = list(row)
row_copy[-1] = None
test_set.append(row_copy)
predicted = algorithm(dataset, test_set)
actual = [row[-1] for row in dataset]
rmse = rmse_metric(actual, predicted)
return predicted,rmse
# Test simple linear regression
dataset = [[1, 1], [2, 3], [4, 3], [3, 2], [5, 5]]
predicton,rmse = evaluate_algorithm(dataset, simple_linear_regression)
print('RMSE: %.3f' % (rmse))