#' Run Models Performance
#'
#'
#' @title Run Models Performance
#' @description This function performs the summary of perfomance of models fitter by run_models function
#' @param fit_run_model list generated by function run_models
#' @param df_valida validation data frame
#' @param verbose plot graphics of comparation between models
#' @details details
#' @importFrom caret postResample
#' @importFrom dplyr tibble
#' @importFrom ggplot2 ggplot geom_point ggtitle geom_abline
#' @importFrom ggplot2 geom_tile facet_wrap geom_text
#' @importFrom ggplot2 xlim ylim aes labs geom_col
#' @importFrom ggplot2 scale_x_discrete scale_y_discrete
#' @importFrom ggplot2 scale_fill_gradientn
#' @importFrom tidyr gather
#' @importFrom viridis scale_color_viridis
#' @importFrom stats var
#' @examples
#' \dontrun{
#' library(labgeo)
#' library(dplyr)
#' data("c_stock")
#' vf = labgeo::factor_detect(c_stock, 40)
#' df = c_stock %>% mutate_at(vf,funs(factor)) %>% filter(stock_10 < 10)
#' dp = data_preparation(df = df, p = 0.20, prune = 0.99)
#' treino = dp$treino
#' teste = dp$teste
#' nm = names(treino)[1]
#' vrf = recursive_feature_elimination(treino,sizes = c(5,10,15,20,25,30,40),
#' nfolds = 5,cpu_cores = 7, metric = "Rsquared")
#' vsel = c(nm,vrf$optVariables)
#' dfsel = treino %>% select(one_of(vsel))
#' models = c("ridge", "rf", "cubist","pls")
#' fit_models = run_models(df = dfsel, models = models,
#' cpu_cores = 7, tune_length = 5, metric = "Rsquared")
#' dfresult = run_models_performance(fit_models, df_valida = teste, verbose = T)
#' }
#' @export
run_models_performance <- function(fit_run_model,
df_valida, verbose = FALSE) {
package_inicio <- search()[ifelse(
unlist(gregexpr("package:", search())) == 1, TRUE, FALSE)]
if (fit_run_model[[1]]$modelType == "Classification") {
result <- rmp_classificacao(fit_run_model, df_valida, verbose)
package_fim <- search()[ifelse(
unlist(gregexpr("package:", search())) == 1, TRUE, FALSE)]
package_list <- setdiff(package_fim, package_inicio)
if (length(package_list) > 0) for (package in package_list)
detach(package, character.only = TRUE)
return(result)
} else {
result <- rmp_regressao(fit_run_model, df_valida, verbose)
package_fim <- search()[ifelse(unlist(
gregexpr("package:", search())) == 1, TRUE, FALSE)]
package_list <- setdiff(package_fim, package_inicio)
if (length(package_list) > 0) for (package in package_list)
detach(package, character.only = TRUE)
return(result)
}
}
#' Calculate statiscs of accuracy to regression models
#'
#'
#' @title Calculate statiscs of accuracy to regression models
#' @description This function Calculate statiscs of accuracy to regression models
#' @param obs vector with observated data
#' @param pred vector with predicted value
#' @details details
#' @importFrom stats cor t.test
#' @examples
#' \dontrun{
#' v.obs = c(0,25,75,100)
#' v.pred = c(1.24,70,97)
#' pred_acc(v.obs, v.pred)
#' }
#' @export
pred_acc <- function(obs, pred) {
mu <- mean(obs)
mbe <- mean(obs - pred) ## mean bias error ()
mae <- mean(abs(obs - pred)) ## mean absolute error
mse <- mean( (obs - pred) ^ 2) ## mean square error
rme <- mbe / mu * 100 ## relative mean error
rmae <- mae / mu * 100 ## relative mean absolute error
rmse <- sqrt(mse) ## root mean square error
rrmse <- rmse / mu * 100 ## relative root mean square error
mo <- mean( (obs - mu) ^ 2)
nse <- 1 - (mse / mo) ## Nash-Sutcliffe efficiency
r2 <- stats::cor(obs, pred) ^ 2
#t <- stats::t.test(obs, pred, paired = TRUE)
vecv <- (1 - sum( (obs - pred) ^ 2) /
sum( (obs - mean(obs)) ^ 2)) * 100 ## variance explained by models
list(
mean_bias_error = mbe,
relative_mean_bias_error = rme,
mean_absolute_error = mae,
relative_mean_absolute_error = rmae,
mean_square_error = mse,
root_mean_square_error = rmse,
relative_rmse = rrmse,
Nash_Sutcliffe_efficiency = nse,
variance_explained_perc = vecv,
rsquared = r2
#t_test = list(t)
)
}
rmp_classificacao <- function(fit_run_model, df_valida, verbose = FALSE) {
Freq <- var <- valor <- Prediction <- Reference <- model <- NULL
nm <- length(fit_run_model)
summ_model <- dplyr::tibble(
model = character(nm), fit = list(nm), dfpredobs = list(nm),
accuracy = numeric(nm), Kappa = numeric(nm), time_run = numeric(nm),
byclass = list(nm), cf = list(nm),
g1 = list(nm), g2 = list(nm)
)
cont = 1
for (i in 1:length(fit_run_model)) {
fit_md <- fit_run_model[[i]]
if (is.null(fit_md) == FALSE) {
v <- tryCatch({ predict(fit_md, df_valida)},
error = function(e){NULL})
if (is.null(v) == FALSE) {
ddd <- data.frame(observado = df_valida[, 1], predito = v)
summ_model$model[i] <- fit_md$method
summ_model$fit[i] <- list(fit_md)
summ_model$dfpredobs[i] <- list(ddd)
cf <- caret::confusionMatrix(ddd$predito,
ddd$observado, mode = "everything")
summ_model$accuracy[i] <- cf$overall[1]
summ_model$Kappa[i] <- cf$overall[2]
summ_model$byclass[i] <- list(cf$byClass)
summ_model$time_run[i] = fit_md$times$everything[3]
summ_model$cf[i] <- list(cf$table)
cont = cont + 1
}
if (verbose == TRUE) {
confusion <- data.frame(cf$table)
freqcols <- prop.table(cf$table, 2) %>%
data.frame() %>% dplyr::rename(freq_col = Freq)
freqrows <- prop.table(cf$table, 1) %>%
data.frame() %>% dplyr::rename(freq_row = Freq)
ddd <- dplyr::left_join(freqcols, freqrows,
by = c("Prediction", "Reference")) %>%
tidyr::gather(key = var, value = valor, -Prediction, -Reference)
g1 <- ggplot(confusion, mapping = aes(x = Reference, y = Prediction)) +
geom_tile(colour = "white", fill = "lightyellow2") +
scale_x_discrete(name = "Actual Class") +
scale_y_discrete(name = "Predicted Class") +
geom_tile(
aes(x = Reference, y = Prediction),
data = subset(confusion, as.character(Reference) ==
as.character(Prediction)),
color = "black", size = 1, fill = "red", alpha = 0.2
) +
geom_text(aes(label = confusion$Freq), vjust = 1) +
ggtitle(fit_md$method)
g2 <- ggplot(ddd, mapping = aes(x = Reference, y = Prediction)) +
geom_tile(aes(fill = valor), colour = "white") +
scale_fill_gradientn(
colours = c("lightyellow2", "white", "palegreen"),
values = rescale(c(0, 50, 100))
) +
scale_x_discrete(name = "Actual Class") +
scale_y_discrete(name = "Predicted Class") +
labs(fill = "Normalized\nFrequency") +
geom_text(aes(label = round(ddd$valor, 2)), vjust = 1) +
ggtitle(fit_md$method) +
facet_wrap(~var)
}
if (verbose == TRUE) {
print(g1)
print(g2)
}
}
}
summ_model = summ_model[1:(cont-1), ]
if (verbose == TRUE) {
dfresult <- data.frame(model = summ_model$model,
accuracy = summ_model$accuracy,
kappa = summ_model$Kappa,
time = summ_model$time_run) %>%
print() %>% tidyr::gather(key = var, value = valor, -model, -time)
g1 = ggplot(dfresult, aes(x = model, y = valor, fill = model)) +
geom_col() +
geom_text(aes(label = round(valor, 3)), size = 2.5, vjust = 1.5) +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position="none") +
facet_wrap(~ var, scales = 'free')
print(g1)
}
return(summ_model)
}
## from scales package
## https://github.com/hadley/scales
zero_range <- function(x, tol = 1000 * .Machine$double.eps) {
if (length(x) == 1)
return(TRUE)
if (length(x) != 2)
stop("x must be length 1 or 2")
if (any(is.na(x)))
return(NA)
if (x[1] == x[2])
return(TRUE)
if (all(is.infinite(x)))
return(FALSE)
m <- min(abs(x))
if (m == 0)
return(FALSE)
abs( (x[1] - x[2]) / m) < tol
}
rescale <- function(x, to = c(0, 1),
from = range(x, na.rm = TRUE, finite = TRUE)) {
if (zero_range(from) || zero_range(to)) {
return(ifelse(is.na(x), NA, mean(to)))
}
(x - from[1]) / diff(from) * diff(to) + to[1]
}
#' Plot predict observed and residual
#'
#' @title Plot predict observed and residual
#' @description This function plots predict, observed values and residual
#' @param result return of function rum_model_performance
#' @param residual logic value indicates if plot or not residuals
#' @details details
#' @importFrom dplyr tibble
#' @importFrom ggplot2 ggplot geom_segment geom_point geom_abline facet_wrap
#' @importFrom ggplot2 ggtitle aes theme_bw scale_color_continuous guides xlim ylim
#' @importFrom dplyr '%>%'
#' @examples
#' \dontrun{
#' plot_predict_observed_residual(result, TRUE)
#' }
#' @export
#'
plot_predict_observed_residual <- function(result, residual = FALSE) {
observado <- predito <- residuo <- NULL
nr <- nrow(result)
i <- 1
for (i in 1:nr) {
ddd <- result$dfpredobs[[i]]
ddd$model <- result$model[i]
if (i == 1) {
dresult <- ddd
} else {
dresult <- rbind(dresult, ddd)
}
}
if (residual == TRUE) {
g1 <- dresult %>%
ggplot(aes( y = observado, x = predito)) +
geom_segment(aes(y = predito, yend = observado,
x = predito, xend = predito)) +
xlim(range(c(ddd$predito, ddd$observado))) +
ylim(range(c(ddd$predito, ddd$observado))) +
geom_abline(slope = 1, intercept = 0) +
geom_point(aes(y = observado), shape = 1) +
geom_point(aes(color = abs(residuo))) +
scale_color_continuous(low = "green", high = "red") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
guides(color = FALSE) +
facet_wrap(~model, scales = "free") + theme_bw()
} else {
g1 <- dresult %>%
ggplot(aes( y = observado, x = predito)) +
geom_abline(slope = 1, intercept = 0) +
geom_point(aes(y = observado), shape = 1) +
geom_point(aes(color = abs(residuo))) +
scale_color_continuous(low = "green", high = "red") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
guides(color = FALSE) +
facet_wrap(~model, scales = "free") + theme_bw()
}
#print(g1)
return(g1)
}
#' Plot Confusion Matrix
#'
#' @title Plot Confusion Matrix
#' @description This function Plot Confusion Matrix
#' @param obs observed values
#' @param pred predicted values
#' @importFrom ggplot2 ggplot geom_tile geom_text scale_fill_gradient theme
#' @importFrom ggplot2 ggtitle element_text
#' @importFrom tidyr gather
#' @importFrom caret confusionMatrix
#' @importFrom tibble rownames_to_column
#' @importFrom knitr kable
#' @importFrom kableExtra kable_styling scroll_box
#' @export
plot_confusio_matrix <- function(obs, pred) {
actual <- prediction <- freq <- y_test <- preds <- NULL
caret::confusionMatrix(pred, obs)$table %>%
prop.table(margin = 1) %>%
as.data.frame.matrix() %>%
rownames_to_column(var = "actual") %>%
tidyr::gather(key = "prediction", value = "freq", -actual) %>%
ggplot2::ggplot(aes(x = actual, y = prediction, fill = freq)) +
geom_tile() +
geom_text(aes(label = round(freq, 2)), size = 3, color = "gray20") +
scale_fill_gradient(low = "yellow", high = "red", limits = c(0, 1),
name = "Relative Frequency") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
ggtitle("Confusion Matrix - Random Forest (100 Trees)")
caret::confusionMatrix(y_test, preds)$table %>%
as.data.frame.matrix() %>%
kable("html") %>%
kable_styling(bootstrap_options = c("striped"), font_size = 8) %>%
scroll_box(height = "400px")
}
rmp_regressao <- function(fit_run_model, df_valida, verbose = FALSE) {
predito <- observado <- model <- mbe <- mae <- rmse <- nse <-
r2 <- var_exp <- var <- valor <- NULL
nm <- length(fit_run_model)
summ_model <- dplyr::tibble(
model = character(nm), r2 = numeric(nm), rmse = numeric(nm),
mbe = numeric(nm), mae = numeric(nm), nse = numeric(nm),
var_exp = numeric(nm), time = numeric(nm),
fit = list(nm), dfpredobs = list(nm)
)
cont = 1
for (i in 1:length(fit_run_model)) {
fit_md <- fit_run_model[[i]]
if (is.null(fit_md) == FALSE) {
v <- tryCatch({ predict(fit_md, df_valida[ ,-1])},
error = function(e){
print(paste( "Error:", conditionMessage(e)))
return(NULL)})
if (is.null(v)==FALSE) {
v = unlist(v)
nr = nrow(df_valida)
ddd <- data.frame(model = rep(fit_md$method, nr), observado = df_valida[, 1], predito = v,
residuo = abs(v - df_valida[, 1]), stringsAsFactors = FALSE) %>% na.omit()
names(ddd)[2] <- "observado"
names(ddd)[3] <- "predito"
names(ddd)[4] <- "residuo"
summ_model$model[cont] <- fit_md$method
summ_model$fit[cont] <- list(fit_md)
summ_model$dfpredobs[cont] <- list(ddd)
acc <- pred_acc(ddd$predito, ddd$observado)
summ_model$r2[cont] <- acc$rsquared
summ_model$rmse[cont] <- acc$root_mean_square_error
summ_model$mbe[cont] <- acc$mean_bias_error
summ_model$mae[cont] <- acc$mean_absolute_error
summ_model$nse[cont] <- acc$Nash_Sutcliffe_efficiency
summ_model$var_exp[cont] <- acc$variance_explained_perc
summ_model$time[cont] <- fit_md$times$everything[3]
cont = cont + 1
}
}
}
summ_model = summ_model[1:(cont-1), ]
if (verbose == TRUE) {
dgr <- summ_model %>%
select(model, mbe, mae, rmse, nse, r2, var_exp) %>% na.omit() %>%
tidyr::gather(key = var, value = valor, -model)
g1 <- ggplot2::ggplot(dgr, aes(x = model , y = valor, fill = model)) +
ggplot2::geom_col() +
ggplot2::geom_text(aes(label = round(valor, 3)), size = 2.5, vjust = 1.5) +
ggplot2::facet_wrap(~var, scales = "free") +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position="none") +
ggplot2::ggtitle(model)
print(g1)
}
return(summ_model)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.