#' Power plots with ggplot2
#'
#' \code{powerPlot} generates Power Plots for \code{\link{lm}} or
#' \code{\link{bestfit}} objects with \code{\link{ggplot2}}.
#' @param formula a formula of type y~yhat, where y and yhat are the names of
#' the observed values and the fitted values
#' @param data an augmented data.frame (i.e. a data.frame with .fitted values)
#' @return a power plot
#' @name powerPlot
#' @export
powerPlot <- function(y, yhat, ...) {
UseMethod("powerPlot")
}
#' @name powerPlot
#' @param axis option to plot predicted values on the x axis (inverted) or in
#' the y axis (standard)
#' @param Smooth option to add a regression line to the plot
#' @param se option to add confidence interval of regression line to the plot
#' @param metrics TRUE or FALSE. If TRUE, metrics (RMSE, MAE and MAPE) are
#' displayed at the plot area.
#' @param R2 TRUE or FALSE. If TRUE, R2 between predicted and observed values
#' are printed.
#' @examples
#' library(sf)
#' library(broom)
#' library(ggplot2)
#' centro_2015$padrao <- as.numeric(centro_2015$padrao)
#' fit <- lm(log(valor) ~ area_total + quartos + suites + garagens +
#' log(dist_b_mar) + I(1/padrao),
#' data = centro_2015, subset = -c(31, 39))
#' s <- summary(fit)
#' centro_2015 <- expand.model.frame(fit, ~ valor + dist_b_mar + padrao,
#' na.expand = T)
#' centro_2015 <- augment(fit, newdata = centro_2015)
#' # Transformed Scale:
#' powerPlot(.fitted ~ log(valor), data = centro_2015,
#' system = "ggplot2", Smooth = TRUE, methods = c("lm", "loess"))
#' # Inverted axes:
#' powerPlot(log(valor) ~ .fitted, data = centro_2015,
#' system = "ggplot2", Smooth = TRUE, methods = c("lm", "loess"))
#'
#' # Original Scale:
#'
#' ## Mediana
#' p1 <- powerPlot(exp(.fitted) ~ valor, data = centro_2015,
#' system = "ggplot2", Smooth = TRUE, methods = c("lm", "loess"))
#' p1 + labs(title = "Poder de Predição", subtitle = "Mediana")
#'
#' ## Média
#' p2 <- powerPlot(exp(fitted(fit) + s$sigma^2/2) ~ valor, data = centro_2015,
#' system = "ggplot2", Smooth = TRUE, methods = c("lm", "loess"))
#' p2 + labs(title = "Poder de Predição", subtitle = "Média")
#'
#' ## Moda
#' p3 <- powerPlot(exp(fitted(fit) - s$sigma^2) ~ valor, data = centro_2015,
#' system = "ggplot2", Smooth = TRUE, methods = c("lm", "loess"))
#' p3 + labs(title = "Poder de Predição", subtitle = "Moda")
#' @export
powerPlot.default <- function(formula, data,
system = c("base", "ggplot2", "lattice"),
Smooth = FALSE, methods = "lm", se = FALSE,
metrics = c("none", "rmse", "mae", "mape"),
R2 = FALSE, labels = TRUE, palette = "Paired",
...){
data <- data
system <- match.arg(system)
metrics <- match.arg(metrics, several.ok = TRUE)
# Creates col vector for chosen palette
palNum <- which(rownames(RColorBrewer::brewer.pal.info) == palette)
maxcolors <- RColorBrewer::brewer.pal.info[palNum, "maxcolors"]
col <- RColorBrewer::brewer.pal(n = maxcolors, name = palette)
f <- formula
lhs <- f[[2]]
rhs <- f[[3]]
if (system == "ggplot2") {
p <- ggplot(data,
aes(x = rlang::eval_tidy(rhs), y = rlang::eval_tidy(lhs))) +
geom_point(colour = col[8], alpha=0.5) +
scale_y_continuous(labels = scales::label_number_auto()) +
scale_x_continuous(labels = scales::label_number_auto()) +
# xlab(bquote(~hat(Y))) +
# ylab("Y") +
xlab(deparse(rhs)) +
ylab(deparse(lhs)) +
geom_abline(color = col[1])
if (Smooth == TRUE) {
i <- 0
for (method in methods) {
i <- i + 2
p <- p +
stat_smooth(method = method, se = se,
color = ifelse(method == "lm", col[2],
ifelse(method == "loess",
col[8], col[i]))
)
}
}
if (labels == TRUE)
p <- p + ggrepel::geom_text_repel(aes(label = rownames(data))) +
if (R2 == TRUE){
RSQ <- substitute(R^2~"="~r2,
list(r2 = brf(cor(y, yhat)^2, nsmall = 2)))
p <- p +
ggpp::geom_label_npc(aes(npcx = "center", npcy = "top",
label = as.character(as.expression(RSQ))),
parse = TRUE,
color = "darkblue")
}
if ("none" %in% metrics) {
return(p)
} else {
npcx <- c(rmse = "left", mae = "right", mape = "center")
npcy <- c(rmse = "top", mae = "bottom", mape = "bottom")
col <- c(rmse = "blue", mae = "darkgreen", mape = "red")
require(Metrics)
a <- vector(mode = "character", length = 0)
for (metric in metrics) {
a[[metric]] <- paste(metric, " = ",
brf(do.call(metric, args = list(y, yhat)),
nsmall = 0))
}
DF <- data.frame(x.char = npcx[metrics], y.char = npcy[metrics],
text = a[metrics], color = col[metrics])
p <- p +
ggpp::geom_label_npc(data = DF,
aes(npcx = x.char, npcy = y.char, label = text),
color = col[metrics]
)
}
return(p)
} else if (system == "lattice") {
f <- as.formula(paste("y ~ yhat"))
xyplot(f,
panel = function(x, y) {
panel.xyplot(x, y)
panel.abline(lm(y ~ x))
}) +
latticeExtra::layer(panel.smoother(y ~ x, se = FALSE, span = 0.5))
} else {
car::scatterplot(formula = f, data = data,
col = "grey",
boxplots = "",
id = T,
pch = 20,
cex = .75,
las = 1,
regLine = list(method=lm, lty=1, lwd=2, col="green"),
smooth=list(smoother = car::loessLine, style = "none",
lty.smooth = 1, col.smooth = "blue"),
...
)
abline(a = 0, b = 1, col = "red")
}
}
#' @rdname powerPlot
#' @param object object of class \code{\link{lm}}, \code{\link{rq}},
#' \code{\link{lmerMod}} or \code{\link{bestfit}}.
#' @param FUN function used to transform the dependent variable (the inverse
#' of this function will be used to retransform the data to the original scale).
#' @param \dots further args passed to powerPlot.default
#' @examples
#' # powerPlot.lm:
#' par(pty="s") #default par(pty="m")
#' powerPlot(fit)
#' powerPlot(fit, main = "Poder de Predição")
#' powerPlot(fit, FUN = "log",
#' main = "Poder de Predição")
#' title(sub = "Em reais", adj = 1)
#' powerPlot(fit, axis = "inverted")
#' p <- powerPlot(fit, system = "ggplot2",
#' FUN = "log", axis = "inverted")
#' p +
#' labs(title = "Poder de Predição", subtitle = "Em Reais") +
#' theme(aspect.ratio = 1)
#' # Changes Dep Var Transformation
#' fit1 <- lm(sqrt(valor) ~ area_total + quartos + suites + garagens +
#' log(dist_b_mar) + I(1/padrao), centro_2015)
#' powerPlot(fit1)
#' powerPlot(fit1, FUN = "sqrt", axis = "inverted")
#' @export
#'
powerPlot.lm <- function(object,
axis = c("standard", "inverted"),
system = c("base", "ggplot2", "lattice"),
scale = c("transformed", "original"),
...) {
data <- stats::model.frame(object)
system <- match.arg(system)
axis <- match.arg(axis)
scale <- match.arg(scale)
params <- parameters(object)
lhs <- params$lhs
depvar <- params$response
depvarTrans <- params$depvarTrans
DF <- expand.model.frame(model = object, extras = depvar, na.expand = TRUE)
DF$`.fitted` <- fitted(object)
#DF <- broom::augment(object, newdata = DF)
f_lhs <- ".fitted"
f_rhs <- lhs
if (!is.na(depvarTrans) & scale == "original"){
DF[, ".fitted"] <- inverse(DF[, ".fitted"], depvarTrans)
f_rhs <- depvar
}
if (axis=="standard"){
f <- as.formula(paste(f_lhs, " ~ ", f_rhs, sep = ""))
} else {
f <- as.formula(paste(f_rhs, " ~ ", f_lhs, sep = ""))
}
p <- powerPlot(formula = f, data = DF, system = system, ...)
p
}
#' @rdname powerPlot
#' @param fit chosen fit
#' @param scale Plot values in original or transformed scale?
#' @examples
#' # powerPlot.bestfit:
#' library(sf)
#' dados <- st_drop_geometry(centro_2015)
#' dados$padrao <- as.numeric(dados$padrao)
#' best_fit <- bestfit(valor ~ ., dados,
#' transf = c("rec", "rsqrt", "log", "sqrt"),
#' subset = -c(31, 39))
#' p1 <- powerPlot(best_fit, system ="ggplot2")
#' p2 <- powerPlot(best_fit, system ="ggplot2", scale = "original")
#' cowplot::plot_grid(p1, p2, nrow = 1, ncol = 2)
#' p1 <- powerPlot(best_fit, system = "ggplot2",
#' axis = "inverted")
#' p2 <- powerPlot(best_fit, system = "ggplot2",
#' axis = "inverted", scale = "original")
#' cowplot::plot_grid(p1, p2, nrow = 1, ncol = 2)
#' powerPlot(best_fit, fit = 514)
#' p1 <- powerPlot(best_fit, fit = 514, system = "ggplot2",
#' axis = "inverted")
#' p2 <- powerPlot(best_fit, fit = 514, system = "ggplot2",
#' scale = "original", axis = "inverted")
#' cowplot::plot_grid(p1, p2, nrow = 1, ncol = 2)
#'
#' @export
powerPlot.bestfit <- function(object, fit = 1, ...) {
scale <- match.arg(scale)
s <- summary(object, fit = fit)
z <- s$fit
p <- powerPlot.lm(object = z, ...)
p
}
#' @rdname powerPlot
#' @examples
#' # powerPlot.lmerMod:
#' library(lme4)
#' Mfit <- lmer(log(valor) ~ area_total + quartos + suites + garagens +
#' dist_b_mar + (1|padrao), dados)
#' powerPlot(Mfit)
#' powerPlot(Mfit, FUN = "log")
#' @export
#'
powerPlot.lmerMod <- function(object, FUN, ...){
require(broom.mixed)
z <- object
Y <- z@resp$y
Y_ajustado <- z@resp$mu
if (!missing(FUN)) {
Y <- inverse(Y, FUN)
Y_ajustado <- inverse(Y_ajustado, FUN)
}
p <- powerPlot(Y, Y_ajustado, ...)
if (!missing(FUN)) {
p <- p +
scale_y_continuous(labels = scales::label_number(accuracy = .01,
big.mark = ".",
decimal.mark = ",")) +
scale_x_continuous(labels = scales::label_number(accuracy = .01,
big.mark = ".",
decimal.mark = ","))
}
return(p)
}
#' @rdname powerPlot
#' @examples
#' library(quantreg)
#' set.seed(1)
#' dados <- data.frame(
#' Area = runif(100, min = 360, max = 600)
#' )
#' dados$LVU <- 12 - .0075*dados$Area + rnorm(100, mean = 0, sd = .25)
#' dados$VU <- exp(dados$LVU)
#' medFit <- rq(VU~Area, data = dados)
#' powerPlot(medFit)
#' powerPlot(medFit, axis = "inverted")
#' @export
#'
powerPlot.rq <- function(object, FUN, ...){
z <- object
data <- stats::model.frame(z)
Y <- data[, attr(z$terms, "response")]
Y_ajustado <- z$fitted.values
if (!missing(FUN)) {
Y <- inverse(Y, FUN)
Y_ajustado <- inverse(Y_ajustado, FUN)
}
p <- powerPlot(y = Y, yhat = Y_ajustado, ...)
if (!missing(FUN)) {
p <- p +
scale_y_continuous(labels = scales::label_number(accuracy = .01,
big.mark = ".",
decimal.mark = ",")) +
scale_x_continuous(labels = scales::label_number(accuracy = .01,
big.mark = ".",
decimal.mark = ","))
}
return(p)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.