library(plyr)
library(readr)
library(dplyr)
library(caret)
#install.packages("randomForest")
library(randomForest)
df = read.csv(file = "healthcare-dataset-stroke-data.csv")
head(df)
LabelEncoder$fit(df$gender)
??LabelEncoder
glimpse(df)
str(df)
#Label encoding: gender, ever_married, work_type, Residence_type, smoking_status
df$gender = as.numeric(df$gender)
df$ever_married = as.numeric(df$ever_married)
df$work_type = as.numeric(df$work_type)
df$Residence_type = as.numeric(df$Residence_type)
df$smoking_status = as.numeric(df$smoking_status)
df$bmi = as.numeric(df$bmi)
# sets de treino/teste
set.seed(53)
treino <- sample(nrow(df), 0.7*nrow(df), replace = FALSE)
treinoSet <- df[treino,]
validacaoSet <- df[-treino,]
summary(treinoSet)
summary(validacaoSet)
treinoSet$stroke <- as.factor(treinoSet$stroke)
str(treinoSet) # 3577 obs. of 12 variables
str(validacaoSet) # 1533 obs. of 12 variables
?randomForest
# Criação do modelo
modelo <- randomForest(stroke ~ ., data = treinoSet, importance = TRUE)
modelo
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.