-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
machine leaning
- Loading branch information
Showing
159 changed files
with
5,218 additions
and
282 deletions.
There are no files selected for viewing
12 changes: 12 additions & 0 deletions
12
Locally_Weighted_Linear_Regression/.idea/Locally_Weighted_Linear_Regression.iml
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
7 changes: 7 additions & 0 deletions
7
Locally_Weighted_Linear_Regression/.idea/inspectionProfiles/profiles_settings.xml
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
from mpl_toolkits import mplot3d | ||
|
||
def plot1(x,y):#一组数据的可视化 | ||
plt.plot(x,y,'o',color='blue',label='y_true')#显示真实值散点图 | ||
plt.xlabel("X")#标签设置 | ||
plt.ylabel("y") | ||
plt.legend(loc='best')#图例显示最佳位置 | ||
plt.show()#显示图形 | ||
def plot2(x,y,x_test,y_pre): | ||
plt.plot(x, y, 'o', color='blue', label='y_true') | ||
plt.plot(x_test,y_pre,'-',color='red',label="y_pre")#显示预测值的曲线图 | ||
plt.xlabel("X") | ||
plt.ylabel("y") | ||
plt.legend() | ||
plt.show() | ||
def plot3(x,y,z):#两个特征,三维可视化 | ||
fig=plt.figure() | ||
ax=plt.axes(projection='3d') | ||
ax.scatter3D(x,y,z,c=z,cmap='Blues')#显示三维图,轴为标签数组,散点颜色随其数值增加而变深 | ||
ax.set_xlabel("X1")#标签设置 | ||
ax.set_ylabel("X2") | ||
ax.set_zlabel("y") | ||
plt.show() | ||
def plot4(X1,X2,y,x_test1,x_test2,y_pre1): | ||
fig = plt.figure() | ||
ax = plt.axes(projection='3d') | ||
ax.scatter3D(X1, X2, y, c=y, cmap='Blues') # 显示三维图,轴为标签数组,散点颜色随其数值增加而变深 | ||
ax.plot3D(x_test1,x_test2,y_pre1,'gray')#显示预测值的三维曲线图 | ||
ax.set_xlabel("X1") # 标签设置 | ||
ax.set_ylabel("X2") | ||
ax.set_zlabel("y") | ||
plt.show() |
Binary file not shown.
Binary file not shown.
Binary file added
BIN
+688 Bytes
Locally_Weighted_Linear_Regression/__pycache__/MSE_RMSE.cpython-37.pyc
Binary file not shown.
Binary file added
BIN
+1.43 KB
Locally_Weighted_Linear_Regression/__pycache__/PlotData.cpython-37.pyc
Binary file not shown.
Binary file not shown.
Binary file added
BIN
+364 Bytes
Locally_Weighted_Linear_Regression/__pycache__/comCost_lwlr.cpython-37.pyc
Binary file not shown.
Binary file added
BIN
+485 Bytes
Locally_Weighted_Linear_Regression/__pycache__/computeCost.cpython-37.pyc
Binary file not shown.
Binary file added
BIN
+716 Bytes
Locally_Weighted_Linear_Regression/__pycache__/gradientdesent.cpython-37.pyc
Binary file not shown.
Binary file added
BIN
+3.45 KB
Locally_Weighted_Linear_Regression/__pycache__/hold_out.cpython-37.pyc
Binary file not shown.
Binary file added
BIN
+644 Bytes
Locally_Weighted_Linear_Regression/__pycache__/normalEqu.cpython-37.pyc
Binary file not shown.
Binary file added
BIN
+770 Bytes
Locally_Weighted_Linear_Regression/__pycache__/plot_lwlr.cpython-37.pyc
Binary file not shown.
Binary file added
BIN
+1.1 KB
Locally_Weighted_Linear_Regression/__pycache__/regression_lwlr.cpython-37.pyc
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
import numpy as np | ||
def comCost_lwlr(y_test,y_pre): | ||
m = len(y_test) # 样本总量 | ||
cha = y_pre-y_test # h(x)-y | ||
J = np.dot(cha.T, cha) / m / 2 # 代价函数计算 | ||
return J |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
import numpy as np | ||
def computeCost(X,y,theta): | ||
"""X为特征矩阵,y为标签数组,theta为角度,此函数为计算代价函数值,返回其值J""" | ||
m=len(y)#样本总量 | ||
cha=np.dot(X,theta)-y#h(x)-y | ||
J=np.dot(cha.T,cha)/m/2#代价函数计算 | ||
return J[0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
import numpy as np | ||
from computeCost import computeCost | ||
def gradientDesent(X,y,theta,alpha,num_iters): | ||
"""X为特征矩阵,y为标签数组,theta为角度alpha为学习效率,num_iters为所迭代的次数, | ||
此函数为梯度算法,返回最小角度和代价函数矩阵""" | ||
m=len(y)#样本总量 | ||
J_history=np.zeros((num_iters,1))#将代价函数矩阵初始为零矩阵 | ||
for iters in range(num_iters): | ||
cha=np.dot(X,theta)-y#求h(x)-y,得到一个数组 | ||
theta=theta-alpha*(1/m)*np.dot(X.T,cha)#梯度算法应用 | ||
J_history[iters][0]=computeCost(X,y,theta)#调用代价函数,每次迭代的结果写入,更新矩阵值 | ||
return theta,J_history#返回最佳角度和代价函数矩阵 | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
import numpy as np | ||
import pandas as pd | ||
import gradientdesent as gd | ||
import sys | ||
sys.path.append(r"C:\Users\Lenovo\performance_evaluation") | ||
from normalEqu import normalEqu | ||
from MAE import MAE | ||
from MAPE import MAPE | ||
import MSE_RMSE | ||
from R2 import R2 | ||
from computeCost import computeCost | ||
from comCost_lwlr import comCost_lwlr | ||
import regression_lwlr as lw | ||
#记得主函数分测试集 | ||
def random_data(X,y): | ||
"""此函数为随机重排函数,输入特征变量和标签数组,返回随机重排后的特征变量和标签数组""" | ||
z=np.hstack([X,y]) | ||
m,n=z.shape | ||
indexlist=list(range(m)) | ||
np.random.shuffle(indexlist) | ||
v=z[indexlist,:] | ||
X=v[:,0:-1] | ||
y=v[:,-1] | ||
y=y.reshape(-1,1) | ||
return X,y | ||
def hold_out(X,y,percent):#注意x,y为numpy数组 | ||
m=len(y) | ||
train_X=[] | ||
train_y=[] | ||
val_X=[] | ||
val_y=[] | ||
X,y=random_data(X,y) | ||
for j in range(m): | ||
if j<(m*percent): | ||
train_X.append(X[j]) | ||
train_y.append(y[j]) | ||
else: | ||
val_X.append(X[j]) | ||
val_y.append(y[j]) | ||
|
||
return train_X,train_y,val_X,val_y | ||
|
||
def hold_out2(X,y,percent,num_val): | ||
"""留出集评估正规方程函数,输入X特征矩阵,y标签数组,percent训练集所占百分比,num_val几轮验证,输出theta,评估矩阵,返回theta""" | ||
m=len(y) | ||
X1=X | ||
y1=y | ||
J1=[]#装每轮的训练集代价函数 | ||
J2=[]#装每轮的测试集代价函数 | ||
J5=[[0],[0]]#装每轮的theta | ||
mae=0 | ||
mape=0 | ||
mse=0 | ||
rmse=0 | ||
r2=0 | ||
for i in range(num_val): | ||
X1,y1=random_data(X,y) | ||
q=int(m*percent) | ||
train_X=X1[:q,:]#按照百分比进行训练集和测试集的切割 | ||
train_y=y1[:q,:] | ||
val_X=X1[q:,:] | ||
val_y=y1[q:,:] | ||
theta,J_train=normalEqu(train_X,train_y)#调用正规方程函数得到代价函数的theta | ||
|
||
J_val=computeCost(val_X,val_y,theta)#得到验证集的代价J | ||
mae += MAE(val_y, np.dot(val_X,theta)) # 调用MAE函数,进行加和 | ||
mape+= MAPE(val_y, np.dot(val_X,theta)) # 调用MAPE函数 | ||
r2+= R2(val_y, np.dot(val_X,theta)) # 调用R2函数 | ||
mse+= MSE_RMSE.MSE(val_y, np.dot(val_X,theta)) # 调用MSE函数 | ||
rmse += MSE_RMSE.RMSE(val_y, np.dot(val_X,theta)) # 调用RMSE函数 | ||
J1.append(J_train) | ||
J2.append(J_val) | ||
J5=np.hstack([J5,theta]) | ||
l,theta=np.hsplit(J5,[1]) | ||
theta=np.mean(theta,axis=1)#几轮下来得到theta平均值 | ||
theta=theta.reshape(2,1) | ||
print("theta") | ||
print(theta)#输出theta | ||
J3=np.mean(J1)#几轮下来得到J_train平均值 | ||
J4=np.mean(J2)#几轮下来得到J_test平均值 | ||
|
||
dr = pd.Series([J3,J4,mae/num_val, mape/num_val, mse/num_val, rmse/num_val, r2/num_val], index=["J_train","J_val","MAE", "MAPE", "MSE", "RMSE", "R2"]) # 创立含有七种评估的矩阵 | ||
print(dr) | ||
return theta | ||
|
||
|
||
def hold_out3(X,y,percent,num_val,k): | ||
"""留出集评估局部加权线性回归,输入X特征矩阵,y标签数组,percent训练集所占百分比,num_val几轮验证,输出theta,评估矩阵,返回theta""" | ||
m=len(y) | ||
X1=X | ||
y1=y | ||
J1=[]#装每轮的训练集代价函数 | ||
J2=[]#装每轮的测试集代价函数 | ||
|
||
mae=0 | ||
mape=0 | ||
mse=0 | ||
rmse=0 | ||
r2=0 | ||
for i in range(num_val): | ||
X1,y1=random_data(X,y) | ||
q=int(m*percent) | ||
train_X=X1[:q,:]#按照百分比进行训练集和测试集的切割 | ||
train_y=y1[:q,:] | ||
val_X=X1[q:,:] | ||
val_y=y1[q:,:] | ||
y_pre1 = lw.lwlrTest(train_X, train_X, train_y, k)#得到训练集预测值 | ||
y_pre2 = lw.lwlrTest(val_X,train_X, train_y, k)#得到验证集预测值 | ||
J_train=comCost_lwlr(train_y,y_pre1)#得到训练集代价J | ||
J_val=comCost_lwlr(val_y,y_pre2)#得到验证集的代价J | ||
mae += MAE(val_y, y_pre2) # 调用MAE函数,进行加和 | ||
mape+= MAPE(val_y, y_pre2) # 调用MAPE函数 | ||
r2+= R2(val_y, y_pre2) # 调用R2函数 | ||
mse+= MSE_RMSE.MSE(val_y, y_pre2) # 调用MSE函数 | ||
rmse += MSE_RMSE.RMSE(val_y, y_pre2) # 调用RMSE函数 | ||
J1.append(J_train) | ||
J2.append(J_val) | ||
|
||
J3=np.mean(J1)#几轮下来得到J_train平均值 | ||
J4=np.mean(J2)#几轮下来得到J_test平均值 | ||
|
||
dr = pd.Series([J3,J4,mae/num_val, mape/num_val, mse/num_val, rmse/num_val, r2/num_val], index=["J_train","J_val","MAE", "MAPE", "MSE", "RMSE", "R2"]) # 创立含有七种评估的矩阵 | ||
print(dr) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
"""局部加权线性回归""" | ||
import numpy as np | ||
import PlotData as PD | ||
import regression_lwlr as lw | ||
from plot_lwlr import plot_lwlr | ||
from hold_out import hold_out3 | ||
rng=np.random.RandomState(0) | ||
X=10*rng.rand(120) | ||
def model(x): | ||
y=2*x-5+rng.randn(120)+1.8*np.sin(3*x) | ||
return y | ||
y=model(X)#随机产生120个数据 | ||
print(PD.plot1(X,y))#初步数据可视化 | ||
X1=X.copy() | ||
y1=y.copy() | ||
x1_test=X1[80:]#此处数据是作为数据可视化用的,数据可视化要用一维数组 | ||
y1_test=y1[80:] | ||
x1_train=X1[:80] | ||
y1_train=y1[:80] | ||
#print(ya) | ||
X=X.reshape(-1,1) | ||
m=len(y)#获取原特征矩阵的行数 | ||
ones=np.ones(m).reshape(-1,1) | ||
X=np.hstack([ones,X])#特征矩阵中合并一个x0矩阵,x0初始为1 | ||
print(X)#输出特征数组 | ||
y=y.reshape(-1,1) | ||
"""对数据进行分割,暂时分三分之二为训练集,三分之一为测试集,设定多个k值,通过数据可视化查看拟合情况然后选取最后k值 | ||
进行留出集验证评估模型各性能指标""" | ||
X_train=X[:80,:]#训练集和测试集分割 | ||
y_train=y[:80,:] | ||
X_test=X[80:,:] | ||
y_test=y[80:,:] | ||
k=[0.15,0.3,0.45,0.6,0.75,0.9]#设置多个k值 | ||
plot_lwlr(X_test,X_train,y_train,x1_test,x1_train,y1_test,y1_train,k)#拟合情况可视化 | ||
#y_pre=lw.lwlrTest(X_test,X_train,y_train,0.45) | ||
hold_out3(X,y,0.8,10,0.45)#10次留出集验证,训练集占比百分之八十 | ||
#输出训练集和验证集代价函数,MAE,MAPE,MSE,RMSE,R2 | ||
print("一般情况下,J_train比较大,为过拟合,即高偏差情况,若J_test远大于J_train为欠拟合,由于之前已经进行拟合情况可视化并选取k值,所以这个模型的拟合情况是较好的") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
import numpy as np | ||
from computeCost import computeCost | ||
|
||
def normalEqu(X,y): | ||
"""此函数通过正规方程求得当代价函数最小时的最小角,返回由最小theta排列组成的矩阵,其中X是特征矩阵,y是标签数组""" | ||
X=np.array(X) | ||
y=np.array(y) | ||
y.reshape(-1,1) | ||
turn=np.linalg.pinv(np.dot(X.T,X))#求XT*X的逆 | ||
theta=np.dot(np.dot(turn,X.T),y)#求正规方程函数公式,也就是让代价函数的导数等于零(三维图中达到最凹点)时,theta的计算公式 | ||
J=computeCost(X,y,theta) | ||
return theta,J#返回角度最佳矩阵 | ||
|
||
|
Oops, something went wrong.