-
Notifications
You must be signed in to change notification settings - Fork 1
/
RandomForest.py
84 lines (39 loc) · 1.48 KB
/
RandomForest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import numpy as np
import pandas as pd
from DataPreprocessing import *
import matplotlib.pyplot as plt
#get_ipython().run_line_magic('matplotlib', 'inline')
import seaborn as sn
from sklearn.ensemble import RandomForestClassifier as RFC
# **Creating and testing temporary models with available criteria in scikit learn and finalizing the model which has best test and train scores**
# In[11]:
#crit = ['gini', 'entropy']
#max_features = ['auto' , 'sqrt' , 'log2']
# In[10]:
#print(' Criterion ' + ' max_features '+' Train Score ' + ' Test Score')
#for i in crit:
# for j in max_features:
# temp_rf = RFC(n_estimators = 100, criterion = i, max_features = j)
# temp_rf.fit(x_train,y_train)
# temp_y_pred = temp_rf.predict(x_test)
# train_score=temp_rf.score(x_train,y_train)
# test_score=temp_rf.score(x_test,y_test)
# print(i,j,train_score,test_score)
# > Accuracy is best with Gini Index and sqrt max_features
# #### train the model with gini index and sqrt max_features and test. it
# In[4]:
gini_rd_frst = RFC(n_estimators = 100, criterion = 'gini', max_features = 'sqrt')
# In[5]:
gini_rd_frst.fit(x_train, y_train)
# In[6]:
gini_rd_frst_y_pred = gini_rd_frst.predict(x_test)
# In[7]:
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
# In[8]:
gini_rd_frst_ac = accuracy_score(gini_rd_frst_y_pred,y_test)
# In[9]:
gini_rd_frst_ac
# In[ ]: