R/semiconti.R
In assessor: Assessment Tools for Regression Models with Discrete and Semicontinuous Outcomes

Documented in resid_semiconti

#' Residuals for regression models with semicontinuous outcomes
#'
#' Calculates the DPIT residuals for regression models with semi-continuous outcomes.
#' The semi-continuous regression model such as
#' a Tweedie regression model from `tweedie` package or a Tobit regression model
#' from `VGAM`, `AER` packages is used in this function.
#'
#' @usage resid_semiconti(model, plot=TRUE, scale = "normal")
#' @seealso [resid_2pm()]
#'
#' @param model Model object (e.g., `tweedie`, `vglm`, and `tobit`)
#' @param plot A logical value indicating whether or not to return QQ-plot
#' @param scale You can choose the scale of the residuals between `normal` and `uniform` scales. The default scale is `normal`.
#'
#' @returns Residuals. If plot=TRUE, also produces a QQ plot.
#'
#' @details
#' The DPIT residual for the \eqn{i}th semicontinuous observation is defined as follows:
#' \deqn{\hat{r}_i = \frac{\hat{F}(Y_i|X_i)}{n}\sum_{j=1}^{n}I\bigg(\hat{p}_0(X_j) \leq \hat{F}(Y_i|X_i)\bigg),}
#' which has a null distribution of uniformity.
#' \eqn{\hat{F}} refers to the fitted cumulative distribution function,
#' and \eqn{\hat{p}_0} refers to the fitted probability of being zero.
#'
#'
#' @references Lu Yang (2024). Diagnostics for Regression Models with Semicontinuous Outcomes, Biometrics, https://arxiv.org/abs/2401.06347
#'
#' @importFrom stats ecdf
#' @import tweedie
#' @importFrom tweedie dtweedie
#' @importFrom tweedie ptweedie
#' @importFrom VGAM fitted
#' @export
#'
#' @examples
#' ## Tweedie model
#' library(tweedie)
#' library(statmod)
#' n <- 500
#' x11 <- rnorm(n)
#' x12 <- rnorm(n)
#' beta0 <- 5
#' beta1 <- 1
#' beta2 <- 1
#' lambda1 <- exp(beta0 + beta1 * x11 + beta2 * x12)
#' y1 <- rtweedie(n, mu = lambda1, xi = 1.6, phi = 10)
#' # Choose parameter p
#' # True model
#' model1 <-
#'   glm(y1 ~ x11 + x12,
#'     family = tweedie(var.power = 1.6, link.power = 0)
#'   )
#' resid.tweedie <- resid_semiconti(model1)
#'
#' ## Tobit regression model
#' library(VGAM)
#' beta13 <- 1
#' beta14 <- -3
#' beta15 <- 3
#'
#' set.seed(1234)
#' x11 <- runif(n)
#' x12 <- runif(n)
#' lambda1 <- beta13 + beta14 * x11 + beta15 * x12
#' sd0 <- 0.3
#' yun <- rnorm(n, mean = lambda1, sd = sd0)
#' y <- ifelse(yun >= 0, yun, 0)
#'
#' # Using VGAM package
#' # True model
#' fit1 <- vglm(formula = y ~ x11 + x12, tobit(Upper = Inf, Lower = 0, lmu = "identitylink"))
#' # Missing covariate
#' fit1miss <- vglm(formula = y ~ x11, tobit(Upper = Inf, Lower = 0, lmu = "identitylink"))
#'
#' resid.tobit1 <- resid_semiconti(fit1, plot = TRUE)
#' resid.tobit2 <- resid_semiconti(fit1miss, plot = TRUE)
#'
#' # Using AER package
#' library(AER)
#' # True model
#' fit2 <- tobit(y ~ x11 + x12, left = 0, right = Inf, dist = "gaussian")
#' # Missing covariate
#' fit2miss <- tobit(y ~ x11, left = 0, right = Inf, dist = "gaussian")
#' resid.aer1 <- resid_semiconti(fit2, plot = TRUE)
#' resid.aer2 <- resid_semiconti(fit2miss, plot = TRUE)


resid_semiconti <- function(model, plot = TRUE, scale = "normal") {
  if (!(scale %in% c("normal", "uniform"))) stop("scale has to be either normal or uniform")
  is.vglm <- isS4(model)
  if (is.vglm) {
    if (!(paste(model@call)[1] %in% c("vglm"))) stop("model has to be tweedie, vglm or tobit")
  }
  if (!is.vglm) {
    if (!is.null(model$family)) {
      model.family <- model$family[[1]]
    } # Tweedie
    else {
      model.family <- "AER"
    } # AER
    if (!(paste(model$call)[1] %in% c("tobit")) & model.family != "Tweedie") stop("model has to be tweedie, vglm or tobit")
  }

  if (!is.vglm && model.family == "Tweedie") {
    y1 <- model$y
    p.max <- get("p", envir = environment(model$family$variance))
    n <- length(y1)
    lambda1f <- model$fitted.values
    phi1f <- summary(model)$dis
    p1f <- dtweedie(rep(0, n), mu = lambda1f, xi = p.max, phi = phi1f)
    cdf1 <- ptweedie(y1, mu = lambda1f, xi = p.max, phi = phi1f)

    func <- ecdf(p1f)

    newp <- cdf1 * func(cdf1)
  }

  if (!is.vglm && model.family == "AER") {
    p1f <- pnorm(0, mean = VGAM::fitted(model), sd = summary(model)$scale)
    cdf1 <- pnorm(y, mean = VGAM::fitted(model), sd = summary(model)$scale)
    newp <- cdf1 * ecdf(p1f)(cdf1)
  }

  if (is.vglm) {
    y <- model@y
    p1f <- pnorm(0, mean = fitted(model), sd = exp(coef(model)[2]))
    cdf1 <- pnorm(y, mean = fitted(model), sd = exp(coef(model)[2]))
    newp <- cdf1 * ecdf(p1f)(cdf1)
    newp <- as.vector(newp)
  }


  if (plot == T) {
    if (scale == "normal") {
      newp <- qnorm(newp)
      n <- length(newp)
      qqplot(qnorm(ppoints(n)), (newp),
        main = "QQ plot", xlab = "Theoretical Quantiles", ylab = "Sample Quantiles",
        cex.lab = 1, cex.axis = 1, cex.main = 1.5, lwd = 1.5
      )
      abline(0, 1, col = "red", lty = 5, cex.lab = 2, cex.axis = 2, cex.main = 2, lwd = 1.5)
    }
    if (scale == "uniform") {
      newp <- newp
      n <- length(newp)
      qqplot(ppoints(n), newp,
        main = "QQ plot", xlab = "Theoretical Quantiles", ylab = "Sample Quantiles",
        cex.lab = 1, cex.axis = 1, cex.main = 1.5, lwd = 1.5
      )
      abline(0, 1, col = "red", lty = 5, cex.lab = 2, cex.axis = 2, cex.main = 2, lwd = 1.5)
    }
  } else {
    if (scale == "normal") newp <- qnorm(newp)
    if (scale == "uniform") newp <- newp
  }
  return(newp)
}

Any scripts or data that you put into this service are public.

assessor documentation built on April 3, 2025, 9:21 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

assessor
Assessment Tools for Regression Models with Discrete and Semicontinuous Outcomes

R/semiconti.R
In assessor: Assessment Tools for Regression Models with Discrete and Semicontinuous Outcomes

Defines functions resid_semiconti

Documented in resid_semiconti

Try the assessor package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

assessor Assessment Tools for Regression Models with Discrete and Semicontinuous Outcomes

R/semiconti.R In assessor: Assessment Tools for Regression Models with Discrete and Semicontinuous Outcomes

Defines functions resid_semiconti

Documented in resid_semiconti

Try the assessor package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

assessor
Assessment Tools for Regression Models with Discrete and Semicontinuous Outcomes

R/semiconti.R
In assessor: Assessment Tools for Regression Models with Discrete and Semicontinuous Outcomes