-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathPima.py
32 lines (26 loc) · 1.03 KB
/
Pima.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# import all packages
import pandas as pd
from numpy import loadtxt
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
#Loading dataset and to retrieve first 5 rows
dataset = pd.read_csv('pima-indians-diabetes.csv')
dataset.head()
#Split the data into X and Y
X = dataset.iloc[:,0:8]
Y = dataset.iloc[:,8]
# split data into train and test sets with pre-defined "seed=7" for future reference
seed = 7
test_size = 0.33
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)
# Creating XGBClassifier and fitting into training set of data
# Used Scikit-learn to fit the model
model = XGBClassifier()
model.fit(X_train, y_train)
# Use Fitted training model to make predictions on test data
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]
#Evaluate the performance of the developed model after predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))