-
Notifications
You must be signed in to change notification settings - Fork 1
/
trainClassifier.m
103 lines (91 loc) · 7.06 KB
/
trainClassifier.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
function [trainedClassifier, validationAccuracy, validationPredictions, validationScores] = trainClassifier(trainingData, crossValType, classes)
% trainClassifier(trainingData, crossValType)
% returns a trained classifier and its accuracy.
% This code recreates the classification model trained in
% Classification Learner app.
%
% Input:
% trainingData: the training data of same data type as imported
% in the app (table or matrix).
%
% crossValType: the type of cross validation to be performed on
% the training set ('KFold' or 'Leaveout')
%
% Output:
% trainedClassifier: a struct containing the trained classifier.
% The struct contains various fields with information about the
% trained classifier.
%
% trainedClassifier.predictFcn: a function to make predictions
% on new data. It takes an input of the same form as this training
% code (table or matrix) and returns predictions for the response.
% If you supply a matrix, include only the predictors columns (or
% rows).
%
% validationAccuracy: a double containing the accuracy in
% percent. In the app, the History list displays this
% overall accuracy score for each model.
%
% Use the code to train the model with new data.
% To retrain your classifier, call the function from the command line
% with your original data or new data as the input argument trainingData.
%
% For example, to retrain a classifier trained with the original data set
% T, enter:
% [trainedClassifier, validationAccuracy] = trainClassifier(T)
%
% To make predictions with the returned 'trainedClassifier' on new data T,
% use
% yfit = trainedClassifier.predictFcn(T)
%
% To automate training the same classifier with new data, or to learn how
% to programmatically train classifiers, examine the generated code.
% Auto-generated by MATLAB on 08-Mar-2018 15:05:24
% Extract predictors and response
% This code processes the data into the right shape for training the
% classifier.
% Convert input to table
inputTable = array2table(trainingData, 'VariableNames', {'column_1', 'column_2', 'column_3', 'column_4', 'column_5', 'column_6', 'column_7', 'column_8', 'column_9', 'column_10', 'column_11', 'column_12', 'column_13', 'column_14', 'column_15', 'column_16', 'column_17', 'column_18', 'column_19', 'column_20', 'column_21', 'column_22', 'column_23', 'column_24', 'column_25', 'column_26', 'column_27', 'column_28', 'column_29', 'column_30', 'column_31', 'column_32', 'column_33', 'column_34', 'column_35', 'column_36', 'column_37', 'column_38', 'column_39'});
predictorNames = {'column_1', 'column_2', 'column_3', 'column_4', 'column_5', 'column_6', 'column_7', 'column_8', 'column_9', 'column_10', 'column_11', 'column_12', 'column_13', 'column_14', 'column_15', 'column_16', 'column_17', 'column_18', 'column_19', 'column_20', 'column_21', 'column_22', 'column_23', 'column_24', 'column_25', 'column_26', 'column_27', 'column_28', 'column_29', 'column_30', 'column_31', 'column_32', 'column_33', 'column_34', 'column_35', 'column_36', 'column_37', 'column_38'};
predictors = inputTable(:, predictorNames);
response = inputTable.column_39;
isCategoricalPredictor = [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false];
% Train a classifier
% This code specifies all the classifier options and trains the classifier.
classificationKNN = fitcknn(...
predictors, ...
response, ...
'Distance', 'Euclidean', ...
'Exponent', [], ...
'NumNeighbors', 1, ...
'DistanceWeight', 'Equal', ...
'Standardize', true, ...
'ClassNames', classes);
% Create the result struct with predict function
predictorExtractionFcn = @(x) array2table(x, 'VariableNames', predictorNames);
knnPredictFcn = @(x) predict(classificationKNN, x);
trainedClassifier.predictFcn = @(x) knnPredictFcn(predictorExtractionFcn(x));
% Add additional fields to the result struct
trainedClassifier.ClassificationKNN = classificationKNN;
trainedClassifier.About = 'This struct is a trained classifier exported from Classification Learner R2016b.';
trainedClassifier.HowToPredict = sprintf('To make predictions on a new predictor column matrix, X, use: \n yfit = c.predictFcn(X) \nreplacing ''c'' with the name of the variable that is this struct, e.g. ''trainedClassifier''. \n \nX must contain exactly 38 columns because this classifier was trained using 38 predictors. \nX must contain only predictor columns in exactly the same order and format as your training \ndata. Do not include the response column or any columns you did not import into \nClassification Learner. \n \nFor more information, see <a href="matlab:helpview(fullfile(docroot, ''stats'', ''stats.map''), ''appclassification_exportmodeltoworkspace'')">How to predict using an exported model</a>.');
% Extract predictors and response
% This code processes the data into the right shape for training the
% classifier.
% Convert input to table
inputTable = array2table(trainingData, 'VariableNames', {'column_1', 'column_2', 'column_3', 'column_4', 'column_5', 'column_6', 'column_7', 'column_8', 'column_9', 'column_10', 'column_11', 'column_12', 'column_13', 'column_14', 'column_15', 'column_16', 'column_17', 'column_18', 'column_19', 'column_20', 'column_21', 'column_22', 'column_23', 'column_24', 'column_25', 'column_26', 'column_27', 'column_28', 'column_29', 'column_30', 'column_31', 'column_32', 'column_33', 'column_34', 'column_35', 'column_36', 'column_37', 'column_38', 'column_39'});
predictorNames = {'column_1', 'column_2', 'column_3', 'column_4', 'column_5', 'column_6', 'column_7', 'column_8', 'column_9', 'column_10', 'column_11', 'column_12', 'column_13', 'column_14', 'column_15', 'column_16', 'column_17', 'column_18', 'column_19', 'column_20', 'column_21', 'column_22', 'column_23', 'column_24', 'column_25', 'column_26', 'column_27', 'column_28', 'column_29', 'column_30', 'column_31', 'column_32', 'column_33', 'column_34', 'column_35', 'column_36', 'column_37', 'column_38'};
predictors = inputTable(:, predictorNames);
response = inputTable.column_39;
isCategoricalPredictor = [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false];
% Perform cross-validation
switch crossValType
case 'KFold'
partitionedModel = crossval(trainedClassifier.ClassificationKNN, crossValType, 10);
case 'Leaveout'
partitionedModel = crossval(trainedClassifier.ClassificationKNN, crossValType, 'on');
end
% Compute validation accuracy
validationAccuracy = 1 - kfoldLoss(partitionedModel, 'LossFun', 'ClassifError');
% Compute validation predictions and scores
[validationPredictions, validationScores] = kfoldPredict(partitionedModel);