-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathml_algorithm.R
38 lines (30 loc) · 955 Bytes
/
ml_algorithm.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
library(foreign)
library(dplyr)
library(tidyverse)
library(caret)
library(randomForest)
library(rmarkdown)
# Load data
credit_data <- read.arff("rda/credit-g-dataset.arff")
# Check for missing values
missing_values <- colSums(is.na(credit_data))
print(missing_values)
# Convert categorical variables to factors
credit_data <- credit_data %>%
mutate_if(is.character, as.factor)
# Exploratory Data Analysis (EDA)
summary(credit_data)
# Split the data into training and testing sets
set.seed(123)
train_index <- createDataPartition(credit_data$class, p = 0.8, list = FALSE)
train_data <- credit_data[train_index, ]
test_data <- credit_data[-train_index, ]
# Model Training
model <- randomForest(class ~ ., data = train_data, ntree = 100)
# Model Evaluation
predictions <- predict(model, test_data)
conf_matrix <- confusionMatrix(predictions, test_data$class)
print(conf_matrix)
# Feature Importance
importance <- importance(model)
varImpPlot(model)