-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMultiple_Regression.py
67 lines (54 loc) · 2.46 KB
/
Multiple_Regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import rc, font_manager
from sklearn import linear_model
ticks_font = font_manager.FontProperties(family='Times New Roman', style='normal',
size=12, weight='normal', stretch='normal')
plt.style.use('seaborn-white')
ax=plt.gca()
## Loading Data ##
df=pd.read_csv('D:\Python\edx\Machine Learning\FuelConsumptionCo2.csv')
with open('MultipleReg.txt','a') as f:
print(df.head(),file=f)
print(df.describe(),file=f)
## Data features to be used for regression ##
f_col=['ENGINESIZE','CYLINDERS','FUELCONSUMPTION_CITY','FUELCONSUMPTION_HWY','FUELCONSUMPTION_COMB','CO2EMISSIONS']
X=df[f_col]
with open('MultipleReg.txt','a') as f:
print(X.head(9),file=f)
plt.figure()
plt.scatter(X.ENGINESIZE,X.CO2EMISSIONS,color='blue')
plt.title('Scatter Plot - Engine Size vs Emissions',fontname='Times New Roman',
fontsize=12)
plt.ylabel('Emissions',fontname='Times New Roman',fontsize=12)
plt.xlabel('Engine Size',fontname='Times New Roman',fontsize=12)
## Train Test Data ##
mask=np.random.rand(len(df))<0.8
train=X[mask]
test=X[mask]
plt.figure()
plt.scatter(train.ENGINESIZE,train.CO2EMISSIONS,color='blue')
plt.title('Train Data Plot - Engine Size vs Emissions',fontname='Times New Roman',
fontsize=12)
plt.ylabel('Emissions',fontname='Times New Roman',fontsize=12)
plt.xlabel('Engine Size',fontname='Times New Roman',fontsize=12)
## MLR ## uses Ordinary Least Square (OLS) OLS can find the best parameters using of the following methods:
# - Solving the model parameters analytically using closed-form equations -
# Using an optimization algorithm (Gradient Descent, Stochastic Gradient Descent, Newton’s Method, etc.)
Lreg=linear_model.LinearRegression()
x=np.asanyarray(train[['ENGINESIZE','CYLINDERS','FUELCONSUMPTION_COMB']])
y=np.asanyarray(train[['CO2EMISSIONS']])
Lreg.fit(x,y)
with open('MultipleReg.txt','a') as f:
print('Coefficients: ', Lreg.coef_,file=f)
print('Intercept: ',Lreg.intercept_,file=f)
## Prediction ##
y_hat=Lreg.predict(test[['ENGINESIZE','CYLINDERS','FUELCONSUMPTION_COMB']])
x1=np.asanyarray(test[['ENGINESIZE','CYLINDERS','FUELCONSUMPTION_COMB']])
y1=np.asanyarray(test[['CO2EMISSIONS']])
with open('MultipleReg.txt','a') as f:
print('Residual Sum of squares: %.2f'%np.mean((y_hat-y)**2),file=f)
print('Variance score: %.2f'%Lreg.score(x1,y1),file=f)
## Display Plot ##
plt.show()