The following illustrates use of the package to predict the species of flowers in Edgar Anderson's iris data set.
stats <- list( list("Number of flowers" = ~ length(Species)), "Species" = list("setosa" = ~ n_perc(Species == "setosa"), "versicolor" = ~ n_perc(Species == "versicolor"), "virginica" = ~ n_perc(Species == "virginica")), "Sepal.Length" = list("Median (Range)" = ~ median_range(Sepal.Length)), "Sepal.Width" = list("Median (Range)" = ~ median_range(Sepal.Width)), "Petal.Length" = list("Median (Range)" = ~ median_range(Petal.Length)), "Petal.Width" = list("Median (Range)" = ~ median_range(Petal.Width)) ) summary_kbl(stats, iris)
## Analysis libraries library(MachineShop) library(magrittr) ## Training and test sets set.seed(123) train_indices <- sample(nrow(iris), nrow(iris) * 2 / 3) trainset <- iris[train_indices, ] testset <- iris[-train_indices, ] ## Model formula fo <- Species ~ . ## Models available for factor responses modelinfo(factor(0)) %>% names ## Model-specific information modelinfo(GBMModel) ## Generalized boosted model fit to training set iris_fit <- fit(fo, data = trainset, model = GBMModel) ## Variable importance (vi <- varimp(iris_fit)) plot(vi)
## Test set predicted probabilities predict(iris_fit, newdata = testset, type = "prob") %>% head ## Test set predicted classifications predict(iris_fit, newdata = testset) %>% head ## Test set performance obs <- response(iris_fit, newdata = testset) pred <- predict(iris_fit, newdata = testset, type = "prob") performance(obs, pred)
## Resample estimation of model performance (res <- resample(fo, data = iris, model = GBMModel, control = CVControl)) summary(res) plot(res)
## Default performance metrics performance(res) %>% summary ## Metrics available for the resample output metricinfo(res) %>% names ## User-specified metrics performance(res, c(accuracy, kappa2)) %>% summary
## Tune over a grid of model parameters iris_fit <- TunedModel( GBMModel, grid = expand_params(n.trees = c(25, 50, 100), interaction.depth = 1:3, n.minobsinnode = c(5, 10)) ) %>% fit(fo, data = iris) ## Variable importance varimp(iris_fit) ## Plot performance over the grid points tuned_model <- as.MLModel(iris_fit) plot(tuned_model, type = "line")
## Model comparisons control <- CVControl(folds = 10, repeats = 5) res1 <- resample(fo, data = iris, model = GBMModel(n.tree = 50), control = control) res2 <- resample(fo, data = iris, model = RandomForestModel(ntree = 50), control = control) res3 <- resample(fo, data = iris, model = NNetModel(size = 5), control = control) res <- c(GBM = res1, RF = res2, NNet = res3) summary(res) plot(res)
## Pairwise model differences and t-tests perfdiff <- diff(res) summary(perfdiff) t.test(perfdiff) plot(perfdiff)
## Stacked regression stackedmodel <- StackedModel(GBMModel, RandomForestModel, NNetModel) res_stacked <- resample(fo, data = iris, model = stackedmodel) summary(res_stacked) ## Super learner supermodel <- SuperModel(GBMModel, RandomForestModel, NNetModel) res_super <- resample(fo, data = iris, model = supermodel) summary(res_super)
cal <- calibration(res1) plot(cal, se = TRUE)
(conf <- confusion(res1, cutoff = NULL)) summary(conf)
plot(conf)
pd <- dependence(iris_fit, select = c(Petal.Length, Petal.Width)) plot(pd)
library(recipes) rec <- recipe(fo, data = iris) %>% role_case(stratum = Species) iris_fit <- fit(rec, model = GBMModel) varimp(iris_fit) res <- resample(rec, model = GBMModel, control = CVControl) summary(res)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.