Skip to content

Commit

Permalink
machine leaning
Browse files Browse the repository at this point in the history
machine leaning
  • Loading branch information
karagg committed Jun 19, 2019
1 parent 48ff82d commit fcd9da4
Show file tree
Hide file tree
Showing 159 changed files with 5,218 additions and 282 deletions.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Locally_Weighted_Linear_Regression/.idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions Locally_Weighted_Linear_Regression/.idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

107 changes: 107 additions & 0 deletions Locally_Weighted_Linear_Regression/.idea/workspace.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 34 additions & 0 deletions Locally_Weighted_Linear_Regression/PlotData.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d

def plot1(x,y):#一组数据的可视化
plt.plot(x,y,'o',color='blue',label='y_true')#显示真实值散点图
plt.xlabel("X")#标签设置
plt.ylabel("y")
plt.legend(loc='best')#图例显示最佳位置
plt.show()#显示图形
def plot2(x,y,x_test,y_pre):
plt.plot(x, y, 'o', color='blue', label='y_true')
plt.plot(x_test,y_pre,'-',color='red',label="y_pre")#显示预测值的曲线图
plt.xlabel("X")
plt.ylabel("y")
plt.legend()
plt.show()
def plot3(x,y,z):#两个特征,三维可视化
fig=plt.figure()
ax=plt.axes(projection='3d')
ax.scatter3D(x,y,z,c=z,cmap='Blues')#显示三维图,轴为标签数组,散点颜色随其数值增加而变深
ax.set_xlabel("X1")#标签设置
ax.set_ylabel("X2")
ax.set_zlabel("y")
plt.show()
def plot4(X1,X2,y,x_test1,x_test2,y_pre1):
fig = plt.figure()
ax = plt.axes(projection='3d')
ax.scatter3D(X1, X2, y, c=y, cmap='Blues') # 显示三维图,轴为标签数组,散点颜色随其数值增加而变深
ax.plot3D(x_test1,x_test2,y_pre1,'gray')#显示预测值的三维曲线图
ax.set_xlabel("X1") # 标签设置
ax.set_ylabel("X2")
ax.set_zlabel("y")
plt.show()
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
6 changes: 6 additions & 0 deletions Locally_Weighted_Linear_Regression/comCost_lwlr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import numpy as np
def comCost_lwlr(y_test,y_pre):
m = len(y_test) # 样本总量
cha = y_pre-y_test # h(x)-y
J = np.dot(cha.T, cha) / m / 2 # 代价函数计算
return J
7 changes: 7 additions & 0 deletions Locally_Weighted_Linear_Regression/computeCost.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import numpy as np
def computeCost(X,y,theta):
"""X为特征矩阵,y为标签数组,theta为角度,此函数为计算代价函数值,返回其值J"""
m=len(y)#样本总量
cha=np.dot(X,theta)-y#h(x)-y
J=np.dot(cha.T,cha)/m/2#代价函数计算
return J[0]
15 changes: 15 additions & 0 deletions Locally_Weighted_Linear_Regression/gradientdesent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import numpy as np
from computeCost import computeCost
def gradientDesent(X,y,theta,alpha,num_iters):
"""X为特征矩阵,y为标签数组,theta为角度alpha为学习效率,num_iters为所迭代的次数,
此函数为梯度算法,返回最小角度和代价函数矩阵"""
m=len(y)#样本总量
J_history=np.zeros((num_iters,1))#将代价函数矩阵初始为零矩阵
for iters in range(num_iters):
cha=np.dot(X,theta)-y#求h(x)-y,得到一个数组
theta=theta-alpha*(1/m)*np.dot(X.T,cha)#梯度算法应用
J_history[iters][0]=computeCost(X,y,theta)#调用代价函数,每次迭代的结果写入,更新矩阵值
return theta,J_history#返回最佳角度和代价函数矩阵



123 changes: 123 additions & 0 deletions Locally_Weighted_Linear_Regression/hold_out.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import numpy as np
import pandas as pd
import gradientdesent as gd
import sys
sys.path.append(r"C:\Users\Lenovo\performance_evaluation")
from normalEqu import normalEqu
from MAE import MAE
from MAPE import MAPE
import MSE_RMSE
from R2 import R2
from computeCost import computeCost
from comCost_lwlr import comCost_lwlr
import regression_lwlr as lw
#记得主函数分测试集
def random_data(X,y):
"""此函数为随机重排函数,输入特征变量和标签数组,返回随机重排后的特征变量和标签数组"""
z=np.hstack([X,y])
m,n=z.shape
indexlist=list(range(m))
np.random.shuffle(indexlist)
v=z[indexlist,:]
X=v[:,0:-1]
y=v[:,-1]
y=y.reshape(-1,1)
return X,y
def hold_out(X,y,percent):#注意x,y为numpy数组
m=len(y)
train_X=[]
train_y=[]
val_X=[]
val_y=[]
X,y=random_data(X,y)
for j in range(m):
if j<(m*percent):
train_X.append(X[j])
train_y.append(y[j])
else:
val_X.append(X[j])
val_y.append(y[j])

return train_X,train_y,val_X,val_y

def hold_out2(X,y,percent,num_val):
"""留出集评估正规方程函数,输入X特征矩阵,y标签数组,percent训练集所占百分比,num_val几轮验证,输出theta,评估矩阵,返回theta"""
m=len(y)
X1=X
y1=y
J1=[]#装每轮的训练集代价函数
J2=[]#装每轮的测试集代价函数
J5=[[0],[0]]#装每轮的theta
mae=0
mape=0
mse=0
rmse=0
r2=0
for i in range(num_val):
X1,y1=random_data(X,y)
q=int(m*percent)
train_X=X1[:q,:]#按照百分比进行训练集和测试集的切割
train_y=y1[:q,:]
val_X=X1[q:,:]
val_y=y1[q:,:]
theta,J_train=normalEqu(train_X,train_y)#调用正规方程函数得到代价函数的theta

J_val=computeCost(val_X,val_y,theta)#得到验证集的代价J
mae += MAE(val_y, np.dot(val_X,theta)) # 调用MAE函数,进行加和
mape+= MAPE(val_y, np.dot(val_X,theta)) # 调用MAPE函数
r2+= R2(val_y, np.dot(val_X,theta)) # 调用R2函数
mse+= MSE_RMSE.MSE(val_y, np.dot(val_X,theta)) # 调用MSE函数
rmse += MSE_RMSE.RMSE(val_y, np.dot(val_X,theta)) # 调用RMSE函数
J1.append(J_train)
J2.append(J_val)
J5=np.hstack([J5,theta])
l,theta=np.hsplit(J5,[1])
theta=np.mean(theta,axis=1)#几轮下来得到theta平均值
theta=theta.reshape(2,1)
print("theta")
print(theta)#输出theta
J3=np.mean(J1)#几轮下来得到J_train平均值
J4=np.mean(J2)#几轮下来得到J_test平均值

dr = pd.Series([J3,J4,mae/num_val, mape/num_val, mse/num_val, rmse/num_val, r2/num_val], index=["J_train","J_val","MAE", "MAPE", "MSE", "RMSE", "R2"]) # 创立含有七种评估的矩阵
print(dr)
return theta


def hold_out3(X,y,percent,num_val,k):
"""留出集评估局部加权线性回归,输入X特征矩阵,y标签数组,percent训练集所占百分比,num_val几轮验证,输出theta,评估矩阵,返回theta"""
m=len(y)
X1=X
y1=y
J1=[]#装每轮的训练集代价函数
J2=[]#装每轮的测试集代价函数

mae=0
mape=0
mse=0
rmse=0
r2=0
for i in range(num_val):
X1,y1=random_data(X,y)
q=int(m*percent)
train_X=X1[:q,:]#按照百分比进行训练集和测试集的切割
train_y=y1[:q,:]
val_X=X1[q:,:]
val_y=y1[q:,:]
y_pre1 = lw.lwlrTest(train_X, train_X, train_y, k)#得到训练集预测值
y_pre2 = lw.lwlrTest(val_X,train_X, train_y, k)#得到验证集预测值
J_train=comCost_lwlr(train_y,y_pre1)#得到训练集代价J
J_val=comCost_lwlr(val_y,y_pre2)#得到验证集的代价J
mae += MAE(val_y, y_pre2) # 调用MAE函数,进行加和
mape+= MAPE(val_y, y_pre2) # 调用MAPE函数
r2+= R2(val_y, y_pre2) # 调用R2函数
mse+= MSE_RMSE.MSE(val_y, y_pre2) # 调用MSE函数
rmse += MSE_RMSE.RMSE(val_y, y_pre2) # 调用RMSE函数
J1.append(J_train)
J2.append(J_val)

J3=np.mean(J1)#几轮下来得到J_train平均值
J4=np.mean(J2)#几轮下来得到J_test平均值

dr = pd.Series([J3,J4,mae/num_val, mape/num_val, mse/num_val, rmse/num_val, r2/num_val], index=["J_train","J_val","MAE", "MAPE", "MSE", "RMSE", "R2"]) # 创立含有七种评估的矩阵
print(dr)
38 changes: 38 additions & 0 deletions Locally_Weighted_Linear_Regression/linear_weight_main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""局部加权线性回归"""
import numpy as np
import PlotData as PD
import regression_lwlr as lw
from plot_lwlr import plot_lwlr
from hold_out import hold_out3
rng=np.random.RandomState(0)
X=10*rng.rand(120)
def model(x):
y=2*x-5+rng.randn(120)+1.8*np.sin(3*x)
return y
y=model(X)#随机产生120个数据
print(PD.plot1(X,y))#初步数据可视化
X1=X.copy()
y1=y.copy()
x1_test=X1[80:]#此处数据是作为数据可视化用的,数据可视化要用一维数组
y1_test=y1[80:]
x1_train=X1[:80]
y1_train=y1[:80]
#print(ya)
X=X.reshape(-1,1)
m=len(y)#获取原特征矩阵的行数
ones=np.ones(m).reshape(-1,1)
X=np.hstack([ones,X])#特征矩阵中合并一个x0矩阵,x0初始为1
print(X)#输出特征数组
y=y.reshape(-1,1)
"""对数据进行分割,暂时分三分之二为训练集,三分之一为测试集,设定多个k值,通过数据可视化查看拟合情况然后选取最后k值
进行留出集验证评估模型各性能指标"""
X_train=X[:80,:]#训练集和测试集分割
y_train=y[:80,:]
X_test=X[80:,:]
y_test=y[80:,:]
k=[0.15,0.3,0.45,0.6,0.75,0.9]#设置多个k值
plot_lwlr(X_test,X_train,y_train,x1_test,x1_train,y1_test,y1_train,k)#拟合情况可视化
#y_pre=lw.lwlrTest(X_test,X_train,y_train,0.45)
hold_out3(X,y,0.8,10,0.45)#10次留出集验证,训练集占比百分之八十
#输出训练集和验证集代价函数,MAE,MAPE,MSE,RMSE,R2
print("一般情况下,J_train比较大,为过拟合,即高偏差情况,若J_test远大于J_train为欠拟合,由于之前已经进行拟合情况可视化并选取k值,所以这个模型的拟合情况是较好的")
14 changes: 14 additions & 0 deletions Locally_Weighted_Linear_Regression/normalEqu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import numpy as np
from computeCost import computeCost

def normalEqu(X,y):
"""此函数通过正规方程求得当代价函数最小时的最小角,返回由最小theta排列组成的矩阵,其中X是特征矩阵,y是标签数组"""
X=np.array(X)
y=np.array(y)
y.reshape(-1,1)
turn=np.linalg.pinv(np.dot(X.T,X))#求XT*X的逆
theta=np.dot(np.dot(turn,X.T),y)#求正规方程函数公式,也就是让代价函数的导数等于零(三维图中达到最凹点)时,theta的计算公式
J=computeCost(X,y,theta)
return theta,J#返回角度最佳矩阵


Loading

0 comments on commit fcd9da4

Please sign in to comment.