appraiseR: R Package For Real Estate Appraisals

Documented in predictResponse

#' Mass grid predictions for marginal effects plotting
#'
#' \code{predictResponse} makes predictions for plotting marginal effects for
#' model terms with or without confidence/prediction intervals
#'
#' @export
#' @examples
#'
#' 1. Random generated data
#'
#' 1.1 Just one factor variable
#'
#' n <- 30
#' set.seed(1)
#' dados <- data.frame(PU = c(rnorm(n, mean = 250, sd = 100),
#'                            rnorm(n, mean = 500, sd = 100),
#'                            rnorm(n, mean = 750, sd = 100)),
#'                     Bairro = gl(3, n, 3*n, labels = LETTERS[1:3]))
#'
#' fit <- lm(PU ~ Bairro, data = dados)
#'
#' predictResponse("Bairro", fit)
#' predictResponse("Bairro", fit, residuals = T)
#' predictResponse("Bairro", fit, at = list(Bairro = "A"))
#'
#' 1.2 Unbalanced data
#'
#' n <- 28
#' set.seed(1)
#' dados <- data.frame(PU = c(rnorm(n/2, mean = 250, sd = 100),
#'                            rnorm(n, mean = 500, sd = 100),
#'                            rnorm(3*n, mean = 750, sd = 100)),
#'                     Bairro = c(rep("A", n/2), rep("B", n), rep("C", 3*n))
#'                     )
#'
#' fit <- lm(PU ~ Bairro, data = dados)
#'
#' predictResponse("Bairro", fit)
#' predictResponse("Bairro", fit, residuals = T)
#' predictResponse("Bairro", fit, at = list(Bairro = "A"))
#'
#' 1.3 Just one continuous variable
#' library(MASS)
#' sample_cov <- matrix(c(1000^2, -40000,
#'                        -40000,  50^2),
#'                      ncol = 2, byrow = T)
#' n <- 50
#' set.seed(4)
#' dados <- mvrnorm(n = n,
#'                 mu = c(5000, 360),
#'                 Sigma = sample_cov,
#'                 empirical = T)
#' colnames(dados) <- c("PU", "Area")
#' dados <- as.data.frame(dados)
#'
#' fit <- lm(PU ~ Area, data = dados)
#'
#' predictResponse("Area", fit, residuals = T)
#' predictResponse("Area", fit, residuals = T, at = list(Area = 425))
#' predictResponse("Area", fit, residuals = T, at = list(Area = 500))
#'
#' 1.4 One continuous variable and one factor
#'
#' n = 25
#' set.seed(1)
#' Area <- rnorm(n = 3*n, mean = 360, sd = 50)
#'
#' Bairro <- factor(sample(LETTERS[1:3], size = 3*n, replace = T))
#'
#' dados <- data.frame(Area, Bairro)
#'
#' dados <- within(dados, {
#'   PU <- ifelse(Bairro == "A", 7500,
#'                ifelse(Bairro == "B", 10000, 12500))
#'   PU <- PU - 15*Area + rnorm(n = 3*n, mean = 0, sd = 1000)
#' })
#'
#' fit <- lm(PU ~ Area + Bairro, data = dados)
#' fit1 <- lm(PU ~ Area + Bairro - 1, data = dados)
#'
#' predictResponse("Bairro", fit, residuals = T)
#' predictResponse("Bairro", fit1, residuals = T)
#'
#' 1.5 Polynomial regression
#'
#' n <- 100
#' set.seed(1)
#' Area <- rnorm(n = n, mean = 600, sd = 100)
#' PU <- 10000 + .225*Area -.0275*Area^2 + .000025*Area^3 +
#'        rnorm(n = n, mean = 0, sd = 250)
#'
#' dados <- data.frame(cbind(PU, Area))
#'
#' fit <- lm(PU ~ Area, data = dados)
#'
#' p <- predictResponse("Area", fit)
#'
#' fit2 <- lm(PU ~ poly(Area, 2, raw = TRUE), data = dados)
#'
#' p <- predictResponse("Area", fit2)
#'
#' fit3 <- lm(PU ~ poly(Area, 3, raw = TRUE), data = dados)
#' fit3a <- lm(PU ~ poly(Area, 3), data = dados)
#'
predictResponse <-
  function(x, object,
           interval =  c("none", "confidence", "prediction", "both"),
           level = 0.80,
           at,
           FUN,
           residuals = FALSE,
           ...){

  if (!hasIntercept(object)) object <- update(object, ~.+1)

  interval <- match.arg(interval)
  variable <- x
  params <- parameters(object)
  parametros <- params$parameters
  response <- params$response
  lhs <- params$lhs
  depvarTrans <- params$depvarTrans
  predictors <- params$predictors

  if(!missing(at))  grid <- createGrid(variable, object, at = at) else
    grid <- createGrid(variable, object)

  #new <- cbind(1, grid$new)
  #names(new)[1] <- "(Intercept)"
  new <- grid$new
  p_local <- grid$p_local
  mframe <- grid$mframe

  tt <- attr(terms(object), "variables")
  sel <- lapply(tt, all.vars)[-1]
  res <- lapply(all.vars(terms(object)), \(x) which(sapply(sel, \(y) x %in% y)))
  res <- setNames(res, all.vars(terms(object)))

  # y <- as.matrix(new) %*% coef(object)

  if(interval == "both") {
    yc <- stats::predict.lm(object = object, type = "terms",
                            newdata = remove_missing_levels(object, new),
                            interval = "confidence", level = level, ...)
    yp <- stats::predict.lm(object = object, type = "terms",
                            newdata = remove_missing_levels(object, new),
                            interval = "prediction", level = level, ...)

    k <- attr(yc[["fit"]], "constant")
    if (!missing(at)) {
      yAt <- unique(rowSums(yc[["fit"]][, unlist(res[setdiff(predictors, variable)])-1,
                                 drop = F]))
    } else {
      yAt <- 0
    }

      ycFit <- rowSums(yc[["fit"]][, res[[variable]]-1, drop = F]) + k + yAt
      ycLwr <- rowSums(yc[["lwr"]][, res[[variable]]-1, drop = F]) + k + yAt
      ycUpr <- rowSums(yc[["upr"]][, res[[variable]]-1, drop = F]) + k + yAt

      ypFit <- rowSums(yp[["fit"]][, res[[variable]]-1, drop = F]) + k + yAt
      ypLwr <- rowSums(yp[["lwr"]][, res[[variable]]-1, drop = F]) + k + yAt
      ypUpr <- rowSums(yp[["upr"]][, res[[variable]]-1, drop = F]) + k + yAt

      yc <- data.frame(fit = ycFit, lwr = ycLwr, upr = ycUpr)
      yp <- data.frame(fit = ypFit, lwr = ypLwr, upr = ypUpr)

      y <- dplyr::inner_join(yc, yp, by = "fit",
                             suffix = c(".conf", ".pred"))

  } else if (interval %in% c("confidence", "prediction")) {
    y <- stats::predict.lm(object = object, newdata = new, type = "terms",
                           interval = interval, level = level, ...)

    k <- attr(y[["fit"]], "constant")
    if (!missing(at)) {
      yAt <- unique(rowSums(y[["fit"]][, unlist(res[setdiff(predictors, variable)])-1,
                                 drop = F]))
    } else {
      yAt <- 0
    }
    yFit <- rowSums(y[["fit"]][, res[[variable]]-1, drop = F]) + k + yAt
    yLwr <- rowSums(y[["lwr"]][, res[[variable]]-1, drop = F]) + k + yAt
    yUpr <- rowSums(y[["upr"]][, res[[variable]]-1, drop = F]) + k + yAt
    y <- data.frame(fit = yFit, lwr = yLwr, upr = yUpr)
  } else { # i.e., if interval = "none"
    y <- stats::predict.lm(object = object, newdata = new, type = "terms")
    k <- attr(y, "constant")
    if (!missing(at)) {
      yAt <- unique(rowSums(y[, unlist(res[setdiff(predictors, variable)])-1,
                              drop = F]))
    } else {
      yAt <- 0
    }
    y <- rowSums(y[, res[[variable]]-1, drop = F]) + k + yAt
  }

  if (!missing(FUN)) {
    Y <- inverse(y, FUN)
    Y <- cbind(Y, campo_arbitrio(Y))
  } else if (is.na(depvarTrans) | depvarTrans == "identity"){
    Y <- cbind(y, campo_arbitrio(y))
  } else {
    Y <- inverse(y, FUN = depvarTrans)
    CA <- campo_arbitrio(Y)
    Y <- cbind(Y, CA)
    Y <- inverse(Y, FUN = invFunc(depvarTrans))
  }

  pred <- data.frame(new[, variable], Y)
  colnames(pred)[1] <- variable
  colnames(pred)[2] <- ifelse(!missing(FUN), response, lhs)

  z <- list()
  z$k <- k
  z$grid <- new
  z$p_local <- p_local
  z$pred <- pred
  mframe <- broom::augment(object, newdata = mframe)
  z$mframe <- mframe

  if (!missing(at)) z$yAt <- yAt

  if (residuals == TRUE) {
    if (length(predictors) > 1){
      z$pres <-residuals(object, "partial")
    } else {
      if(is.factor(mframe[, variable]) | is.character(mframe[, variable])){
        z$pres <- residuals(object)
      } else {
        z$pres <- residuals(object, "partial")
      }
    }
  }

  return(z)

  }