#' Run Models Performance
#'
#'
#' @title Run Models Performance
#' @description This function performs the summary of perfomance of models fitter by run_models function
#' @param fit_run_model list generated by function run_models
#' @param df_valida validation data frame
#' @param verbose plot graphics of comparation between models
#' @details details
#' @importFrom caret postResample
#' @importFrom dplyr tibble
#' @importFrom ggplot2 ggplot geom_point ggtitle geom_abline
#' @importFrom ggplot2 geom_tile facet_wrap geom_text
#' @importFrom ggplot2 xlim ylim aes labs geom_col
#' @importFrom ggplot2 scale_x_discrete scale_y_discrete
#' @importFrom ggplot2 scale_fill_gradientn
#' @importFrom tidyr gather
#' @importFrom stats var
#' @examples
#' \dontrun{
#' library(labgeo)
#' library(dplyr)
#' data("c_stock")
#' vf = labgeo::factor_detect(c_stock, 40)
#' df = c_stock %>% mutate_at(vf,funs(factor)) %>% filter(stock_10 < 10)
#' dp = data_preparation(df = df, p = 0.20, prune = 0.99)
#' treino = dp$treino
#' teste = dp$teste
#' nm = names(treino)[1]
#' vrf = recursive_feature_elimination(treino,sizes = c(5,10,15,20,25,30,40),
#' nfolds = 5,cpu_cores = 7, metric = "Rsquared")
#' vsel = c(nm,vrf$optVariables)
#' dfsel = treino %>% select(one_of(vsel))
#' models = c("ridge", "rf", "cubist","pls")
#' fit_models = run_models(df = dfsel, models = models,
#' cpu_cores = 7, tune_length = 5, metric = "Rsquared")
#' dfresult = run_models_performance(fit_models, df_valida = teste, verbose = T)
#' }
#' @export
# importFrom viridis scale_color_viridis
run_models_performance <- function(fit_run_model, df_valida, verbose = FALSE) {
package_inicio <- search()[ifelse(unlist(gregexpr("package:", search())) == 1,
TRUE, FALSE)]
if (fit_run_model[[1]]$modelType == "Classification") {
result <- rmp_classificacao(fit_run_model, df_valida, verbose)
package_fim <- search()[ifelse(unlist(gregexpr("package:", search())) == 1,
TRUE, FALSE)]
package_list <- setdiff(package_fim, package_inicio)
if (length(package_list) > 0) {
for (package in package_list) {
detach(package, character.only = TRUE)
}
}
return(result)
} else {
result <- rmp_regressao(fit_run_model, df_valida, verbose)
package_fim <- search()[ifelse(unlist(gregexpr("package:", search())) == 1,
TRUE, FALSE)]
package_list <- setdiff(package_fim, package_inicio)
if (length(package_list) > 0) {
for (package in package_list) {
detach(package, character.only = TRUE)
}
}
return(result)
}
}
#' Calculate statiscs of accuracy to regression models
#'
#'
#' @title Calculate statiscs of accuracy to regression models
#' @description This function Calculate statiscs of accuracy to regression models
#' @param obs vector with observated data
#' @param pred vector with predicted value
#' @details details
#' @importFrom stats cor t.test
#' @examples
#' \dontrun{
#' v.obs = c(0,25,75,100)
#' v.pred = c(1.24,70,97)
#' pred_acc(v.obs, v.pred)
#' }
#' @export
pred_acc <- function(obs, pred) {
mu <- mean(obs)
mbe <- mean(obs - pred)
mae <- mean(abs(obs - pred))
mse <- mean( (obs - pred) ^ 2)
rme <- mbe / mu * 100
rmae <- mae / mu * 100
rmse <- sqrt(mse)
rrmse <- rmse / mu * 100
mo <- mean( (obs - mu) ^ 2)
nse <- 1 - (mse / mo)
r2 <- stats::cor(obs, pred) ^ 2
vecv <- (1 - sum( (obs - pred) ^ 2) /
sum( (obs - mean(obs)) ^ 2)) * 100
list(
mean_bias_error = mbe,
relative_mean_bias_error = rme,
mean_absolute_error = mae,
relative_mean_absolute_error = rmae,
mean_square_error = mse,
root_mean_square_error = rmse,
relative_rmse = rrmse,
Nash_Sutcliffe_efficiency = nse,
variance_explained_perc = vecv,
rsquared = r2
)
}
rmp_classificacao <- function(fit_run_model, df_valida, verbose = FALSE) {
Freq <- var <- value <- Prediction <- Reference <- model <- time <- NULL
nm <- length(fit_run_model)
summ_model <- dplyr::tibble(
model = character(nm), fit = list(nm), dfpredobs = list(nm),
accuracy = numeric(nm), Kappa = numeric(nm), time_run = numeric(nm),
byclass = list(nm), cf = list(nm),
g1 = list(nm), g2 = list(nm)
)
cont <- 1
for (i in seq_along(fit_run_model)) {
fit_md <- fit_run_model[[i]]
if (is.null(fit_md) == FALSE) {
v <- tryCatch({
predict(fit_md, df_valida[, -1])
},
error = function(e) {
print(" ")
print(e)
NULL
}
)
if (is.null(v) == FALSE) {
ddd <- data.frame(vobs = df_valida[, 1], vpred = v)
summ_model$model[cont] <- fit_md$method
summ_model$fit[cont] <- list(fit_md)
summ_model$dfpredobs[cont] <- list(ddd)
cf <- caret::confusionMatrix(
ddd$vpred,
ddd$vobs, mode = "everything"
)
summ_model$accuracy[cont] <- cf$overall[1]
summ_model$Kappa[cont] <- cf$overall[2]
summ_model$byclass[cont] <- list(cf$byClass)
summ_model$time_run[cont] <- fit_md$times$everything[3]
summ_model$cf[cont] <- list(cf$table)
cont <- cont + 1
}
}
if (verbose == TRUE) {
confusion <- data.frame(cf$table)
freqcols <- prop.table(cf$table, 2) %>%
data.frame() %>%
dplyr::rename(freq_col = Freq)
freqrows <- prop.table(cf$table, 1) %>%
data.frame() %>%
dplyr::rename(freq_row = Freq)
ddd <- dplyr::left_join(
freqcols, freqrows,
by = c("Prediction", "Reference")
) %>%
tidyr::gather(key = var, value = value, -Prediction, -Reference)
g1 <- ggplot(confusion, mapping = aes(x = Reference, y = Prediction)) +
geom_tile(colour = "white", fill = "lightyellow2") +
scale_x_discrete(name = "Actual Class") +
scale_y_discrete(name = "Predicted Class") +
geom_tile(
aes(x = Reference, y = Prediction),
data = subset(confusion, as.character(Reference) ==
as.character(Prediction)),
color = "black", size = 1, fill = "red", alpha = 0.2
) +
geom_text(aes(label = confusion$Freq), vjust = 1) +
ggtitle(fit_md$method)
g2 <- ggplot(ddd, mapping = aes(x = Reference, y = Prediction)) +
geom_tile(aes(fill = value), colour = "white") +
scale_fill_gradientn(
colours = c("lightyellow2", "white", "palegreen"),
values = rescale(c(0, 50, 100))
) +
scale_x_discrete(name = "Actual Class") +
scale_y_discrete(name = "Predicted Class") +
labs(fill = "Normalized\nFrequency") +
geom_text(aes(label = round(ddd$value, 3)), vjust = 1) +
ggtitle(fit_md$method) +
facet_wrap(~var)
}
if (verbose == TRUE) {
print(g1)
print(g2)
}
}
summ_model <- summ_model[1:(cont - 1), ]
if (verbose == TRUE) {
dfresult <- data.frame(
model = summ_model$model,
accuracy = summ_model$accuracy,
kappa = summ_model$Kappa,
time = summ_model$time_run
) %>%
print() %>%
tidyr::gather(key = var, value = value, -model, -time)
g1 <- ggplot(dfresult, aes(x = model, y = value, fill = model)) +
geom_col() +
geom_text(aes(label = round(value, 3)), size = 2.5, vjust = 1.5) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none"
) +
facet_wrap(~ var, scales = "free")
print(g1)
}
return(summ_model)
}
zero_range <- function(x, tol = 1000 * .Machine$double.eps) {
if (length(x) == 1) {
return(TRUE)
}
if (length(x) != 2) {
stop("x must be length 1 or 2")
}
if (any(is.na(x))) {
return(NA)
}
if (x[1] == x[2]) {
return(TRUE)
}
if (all(is.infinite(x))) {
return(FALSE)
}
m <- min(abs(x))
if (m == 0) {
return(FALSE)
}
abs( (x[1] - x[2]) / m) < tol
}
rescale <- function(x, to = c(0, 1),
from = range(x, na.rm = TRUE, finite = TRUE)) {
if (zero_range(from) || zero_range(to)) {
return(ifelse(is.na(x), NA, mean(to)))
}
(x - from[1]) / diff(from) * diff(to) + to[1]
}
#' Plot predict observed and residual
#'
#' @title Plot predict observed and residual
#' @description This function plots predict, observed values and residual
#' @param result return of function rum_model_performance
#' @param residual logic value indicates if plot or not residuals
#' @details details
#' @importFrom dplyr tibble
#' @importFrom ggplot2 ggplot geom_segment geom_point geom_abline facet_wrap
#' @importFrom ggplot2 ggtitle aes theme_bw scale_color_continuous guides xlim ylim
#' @importFrom dplyr '%>%'
#' @examples
#' \dontrun{
#' plot_predict_observed_residual(result, TRUE)
#' }
#' @export
#'
plot_predict_observed_residual <- function(result, residual = FALSE) {
vobs <- vpred <- residuo <- NULL
nr <- nrow(result)
i <- 1
for (i in 1:nr) {
ddd <- result$dfpredobs[[i]]
ddd$model <- result$model[i]
if (i == 1) {
dresult <- ddd
} else {
dresult <- rbind(dresult, ddd)
}
}
if (residual == TRUE) {
g1 <- dresult %>%
ggplot(aes(y = vobs, x = vpred)) +
geom_segment(aes(
y = vpred, yend = vobs,
x = vpred, xend = vpred
)) +
xlim(range(c(ddd$vpred, ddd$vobs))) +
ylim(range(c(ddd$vpred, ddd$vobs))) +
geom_abline(slope = 1, intercept = 0) +
geom_point(aes(y = vobs), shape = 1) +
geom_point(aes(color = abs(residuo))) +
scale_color_continuous(low = "green", high = "red") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
guides(color = FALSE) +
facet_wrap(~model, scales = "free") + theme_bw()
} else {
g1 <- dresult %>%
ggplot(aes(y = vobs, x = vpred)) +
geom_abline(slope = 1, intercept = 0) +
geom_point(aes(y = vobs), shape = 1) +
geom_point(aes(color = abs(residuo))) +
scale_color_continuous(low = "green", high = "red") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
guides(color = FALSE) +
facet_wrap(~model, scales = "free") + theme_bw()
}
return(g1)
}
#' Plot Confusion Matrix
#'
#' @title Plot Confusion Matrix
#' @description This function Plot Confusion Matrix
#' @param obs observed values
#' @param pred predicted values
#' @importFrom ggplot2 ggplot geom_tile geom_text scale_fill_gradient theme
#' @importFrom ggplot2 ggtitle element_text aes
#' @importFrom tidyr gather
#' @importFrom caret confusionMatrix
#' @importFrom tibble rownames_to_column
#' @importFrom knitr kable
#' @importFrom kableExtra kable_styling scroll_box
#' @export
plot_confusion_matrix <- function(obs, pred) {
actual <- prediction <- freq <- NULL
g1 <- caret::confusionMatrix(pred, obs)$table %>%
prop.table(margin = 1) %>%
as.data.frame.matrix() %>%
tibble::rownames_to_column(var = "actual") %>%
tidyr::gather(key = "prediction", value = "freq", -actual) %>%
ggplot2::ggplot(ggplot2::aes(x = actual, y = prediction, fill = freq)) +
ggplot2::geom_tile() +
ggplot2::geom_text(
ggplot2::aes(label = round(freq, 2)), size = 3,
color = "gray20"
) +
ggplot2::scale_fill_gradient(
low = "yellow", high = "red", limits = c(0, 1),
name = "Relative Frequency"
) +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) +
ggplot2::ggtitle("Confusion Matrix")
print(g1)
kp <- caret::confusionMatrix(pred, obs)$table %>%
as.data.frame.matrix() %>%
knitr::kable() %>%
kableExtra::kable_styling(bootstrap_options = c("striped"), font_size = 8)
print(" ")
print("----------------------------------------------------------")
print("confusion matrix")
print(kp)
ddd <- caret::confusionMatrix(pred, obs)$overall
dddf <- data.frame(names(ddd), ddd)
rownames(dddf) <- NULL
names(dddf) <- c("metric", "value")
print(" ")
print("----------------------------------------------------------")
print("metrics")
print(knitr::kable(dddf))
ddd <- data.frame( (caret::confusionMatrix(pred, obs))$byClass)
ddd <- data.frame(labgeo::abbrev_colnames(ddd, 16))
print(" ")
print("----------------------------------------------------------")
print("metrics")
print(knitr::kable(ddd[, 1:4]))
print(knitr::kable(ddd[, 5:8]))
print(knitr::kable(ddd[, 9:11]))
}
rmp_regressao <- function(fit_run_model, df_valida, verbose = FALSE) {
vpred <- vobs <- model <- mbe <- mae <- rmse <- nse <-
r2 <- var_exp <- var <- value <- NULL
nm <- length(fit_run_model)
summ_model <- dplyr::tibble(
model = character(nm), r2 = numeric(nm), rmse = numeric(nm),
mbe = numeric(nm), mae = numeric(nm), nse = numeric(nm),
var_exp = numeric(nm), time = numeric(nm),
fit = list(nm), dfpredobs = list(nm)
)
cont <- 1
for (i in seq_along(fit_run_model)) {
fit_md <- fit_run_model[[i]]
if (is.null(fit_md) == FALSE) {
v <- tryCatch({
predict(fit_md, df_valida[, -1])
},
error = function(e) {
print(paste("Error:", conditionMessage(e)))
return(NULL)
}
)
if (is.null(v) == FALSE) {
v <- unlist(v)
nr <- nrow(df_valida)
ddd <- data.frame(
model = rep(fit_md$method, nr),
vobs = df_valida[, 1], vpred = v,
residuo = abs(v - df_valida[, 1]),
stringsAsFactors = FALSE
) %>% na.omit()
names(ddd)[2] <- "vobs"
names(ddd)[3] <- "vpred"
names(ddd)[4] <- "residuo"
summ_model$model[cont] <- fit_md$method
summ_model$fit[cont] <- list(fit_md)
summ_model$dfpredobs[cont] <- list(ddd)
acc <- pred_acc(ddd$vpred, ddd$vobs)
summ_model$r2[cont] <- acc$rsquared
summ_model$rmse[cont] <- acc$root_mean_square_error
summ_model$mbe[cont] <- acc$mean_bias_error
summ_model$mae[cont] <- acc$mean_absolute_error
summ_model$nse[cont] <- acc$Nash_Sutcliffe_efficiency
summ_model$var_exp[cont] <- acc$variance_explained_perc
summ_model$time[cont] <- fit_md$times$everything[3]
cont <- cont + 1
}
}
}
summ_model <- summ_model[1:(cont - 1), ]
if (verbose == TRUE) {
dgr <- summ_model %>%
select(model, mbe, mae, rmse, nse, r2, var_exp) %>%
na.omit() %>%
tidyr::gather(key = var, value = value, -model)
g1 <- ggplot2::ggplot(dgr, aes(x = model, y = value, fill = model)) +
ggplot2::geom_col() +
ggplot2::geom_text(
aes(label = round(value, 3)),
size = 2.5, vjust = 1.5
) +
ggplot2::facet_wrap(~var, scales = "free") +
ggplot2::theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none"
) +
ggplot2::ggtitle(model)
print(g1)
}
return(summ_model)
}
#' @title fit null model
#' @description calculate RMSE of null model
#' @param train_y vector with train outcome variable
#' @param test_y vector with test outcome variable
#' @importFrom caret postResample
#' @examples
#' \dontrun{
#' null_model(train_y = train$Ozone, test_y = test$Ozone)
#' }
#' @export
null_model <- function(train_y, test_y) {
media <- mean(train_y)
vr <- caret::postResample(rep(media, length(test_y)), test_y)
return(RMSE = vr[1])
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.