Characteristics of Pima Indian women tested for diabetes are used in this example to predict their disease statuses.
library(mlbench) data(PimaIndiansDiabetes2) stats <- list( list("Number of women" = ~ length(diabetes)), "diabetes" = list("pos" = ~ n_perc(diabetes == "pos"), "neg" = ~ n_perc(diabetes == "neg")), "pregnant" = list("Median (Range)" = ~ median_range(pregnant)), "glucose" = list("Median (Range)" = ~ median_range(glucose)), "pressure" = list("Median (Range)" = ~ median_range(pressure)), "triceps" = list("Median (Range)" = ~ median_range(triceps)), "insulin" = list("Median (Range)" = ~ median_range(insulin)), "mass" = list("Median (Range)" = ~ median_range(mass)), "pedigree" = list("Median (Range)" = ~ median_range(pedigree)), "age" = list("Median (Range)" = ~ median_range(age)) ) summary_kbl(stats, na.omit(PimaIndiansDiabetes2))
## Analysis libraries library(MachineShop) library(ggplot2) ## Parallel processing library(doParallel) registerDoParallel(cores = 6) ## Dataset data(PimaIndiansDiabetes2, package = "mlbench") Pima <- na.omit(PimaIndiansDiabetes2) ## Model formula fo <- diabetes ~ . ## Model selected from tuned models selected_model <- SelectedModel(TunedModel(KNNModel, grid = 5), TunedModel(NNetModel, grid = 5), TunedModel(RandomForestModel, grid = 5)) ## Model fit model_fit <- fit(fo, data = Pima, model = selected_model) ## Variable importance vi <- varimp(model_fit) plot(vi)
## Resample estimation with cross-validation res <- resample(fo, data = Pima, model = selected_model, control = CVControl) ## Estimated performance summary(performance(res)) ## Variable probability cutoff summary(performance(res, cutoff = 0.25))
## True positive and false positive rates over all probability cutoffs roc <- performance_curve(res) ## ROC curve plot(roc, diagonal = TRUE) + coord_fixed()
## Area under the curve auc(roc)
(conf <- confusion(res)) summary(conf) plot(conf)
## Variable probability cutoff summary(confusion(res, cutoff = 0.25))
cal <- calibration(res, breaks = NULL) plot(cal, se = TRUE)
pd <- dependence(model_fit, select = c(glucose, age, insulin)) plot(pd)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.