-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhold_out.py
54 lines (48 loc) · 1.77 KB
/
hold_out.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import numpy as np
import pandas as pd
from gradientDesent import gradientDesent
from computecost import computecost
from computecost import sigmoid
#记得主函数分测试集
def random_data(X,y):
"""此函数为随机重排函数,输入特征变量和标签数组,返回随机重排后的特征变量和标签数组"""
z=np.hstack([X,y])
m,n=z.shape
indexlist=list(range(m))
np.random.shuffle(indexlist)
v=z[indexlist,:]
X=v[:,0:-1]
y=v[:,-1]
y=y.reshape(-1,1)
return X,y
def hold_out(X,y,percent,num_val=10):
"""留出集评估逻辑回归函数,输入X特征矩阵,y标签数组,percent训练集所占百分比,num_val几轮验证,输出theta,评估矩阵,返回theta"""
m=len(y)
X1=X
y1=y
J1=[]#装每轮的训练集代价函数
J2=[]#装每轮的测试集代价函数
J5=[[0],[0],[0]]#装每轮的theta
for i in range(num_val):
X1,y1=random_data(X,y)
q=int(m*percent)
train_X=X1[:q,:]#按照百分比进行训练集和测试集的切割
train_y=y1[:q,:]
val_X=X1[q:,:]
val_y=y1[q:,:]
theta,J_train=gradientDesent(train_X,train_y,0.3,1500)#调用梯度下降逻辑回归函数得到代价函数的theta
s = sigmoid(np.dot(val_X, theta))
J_val=computecost(s,val_y)#得到验证集的代价J
J1.append(J_train[-1,-1])
J2.append(J_val)
J5=np.hstack([J5,theta])
l,theta=np.hsplit(J5,[1])
theta=np.mean(theta,axis=1)#几轮下来得到theta平均值
theta=theta.reshape(3,1)
print("theta")
print(theta)#输出theta
J3=np.mean(J1)#几轮下来得到J_train平均值
J4=np.mean(J2)#几轮下来得到J_test平均值
df=pd.Series([J3,J4],index=['J_train',"J_test"])
print(df)
return theta