Nothing
#' Ordered curve for assessing mean structures
#'
#' Creates a plot to assess the mean structure of regression models. The
#' plot compares the cumulative sum of the response variable and its hypothesized value.
#' Deviation from the diagonal suggests the possibility that the mean structure of the model is incorrect.
#'
#'
#' @details
#' The ordered curve plots \deqn{\hat{L}_1(t)=\frac{\sum_{i=1}^n\left[Y_i1(Z_i\leq t)\right]}{\sum_{i=1}^nY_i}} against
#' \deqn{\hat{L}_2(t)=\frac{\sum_{i=1}^n\left[\hat{\lambda}_i1(Z_i\leq t)\right]}{\sum_{i=1}^n\hat{\lambda}_i},}
#' where \eqn{\hat{\lambda}_i} is the fitted mean, and \eqn{Z_i} is the threshold variable. \cr
#'
#' If the mean structure is correctly specified in the model,
#' \eqn{\hat L_1(t)} and \eqn{\hat L_2(t)} should be close to each other.
#'
#' If the curve is distant from the diagonal, it suggests incorrectness in the mean structure.
#' Moreover, if the curve is above the diagonal, the summation of the response is larger than
#' the fitted mean, which implies that the mean is underestimated, and vice versa. \cr
#'
#' The role of `thr` (threshold variable \eqn{Z}) is to determine the rule for accumulating \eqn{\hat{\lambda}_i} and \eqn{Y_i}, \eqn{i=1,\ldots,n}
#' for the ordered curve.
#' The candidate for `thr` could be any function of predictors such as a single predictor (e.g., `x1`),
#' a linear combination of predictor (e.g., `x1+x2`), or fitted values (e.g., `fitted(model)`).
#' It can also be a variable being considered to be included in the mean function.
#' If a variable leads to a large discrepancy between the ordered curve and the diagonal,
#' including this variable in the mean function should be considered.
#'
#' For more details, see the reference paper.
#'
#' @references Yang, Lu. "Double Probability Integral Transform Residuals for Regression Models with Discrete Outcomes." arXiv preprint arXiv:2308.15596 (2023).
#'
#' @usage ord_curve(model, thr, line_args=list(), ...)
#'
#' @param model Regression model object (e.g.,`lm`, `glm`, `glm.nb`, `polr`, `lm`)
#' @param thr Threshold variable (e.g., predictor, fitted values, or variable to be included as a covariate)
#' @param line_args A named list of graphical parameters passed to
#' \code{graphics::abline()} to modify the reference (red) 45° line
#' in the QQ plot. If left empty, a default red dashed line is drawn.
#' @param ... Additional graphical arguments passed to
#' \code{stats::qqplot()} for customizing the QQ plot (e.g., \code{pch},
#' \code{col}, \code{cex}, \code{xlab}, \code{ylab}).
#'
#' @returns
#' \itemize{
#' \item x-axis: \eqn{\hat L_1(t)}
#' \item y-axis: \eqn{\hat L_2(t)}
#' }
#' which are defined in Details.
#'
#' @importFrom graphics abline
#'
#' @examples
#' ## Binary example of ordered curve
#' n <- 500
#' set.seed(1234)
#' x1 <- rnorm(n, 1, 1)
#' x2 <- rbinom(n, 1, 0.7)
#' beta0 <- -5
#' beta1 <- 2
#' beta2 <- 1
#' beta3 <- 3
#' q1 <- 1 / (1 + exp(beta0 + beta1 * x1 + beta2 * x2 + beta3 * x1 * x2))
#' y1 <- rbinom(n, size = 1, prob = 1 - q1)
#'
#' ## True Model
#' model0 <- glm(y1 ~ x1 * x2, family = binomial(link = "logit"))
#' ord_curve(model0, thr = model0$fitted.values) # set the threshold as fitted values
#'
#' ## Missing a covariate
#' model1 <- glm(y1 ~ x1, family = binomial(link = "logit"))
#' ord_curve(model1, thr = x2) # set the threshold as a covariate
#'
#' ## Poisson example of ordered curve
#' n <- 500
#' set.seed(1234)
#' x1 <- rnorm(n)
#' x2 <- rnorm(n)
#' beta0 <- 0
#' beta1 <- 2
#' beta2 <- 1
#' lambda1 <- exp(beta0 + beta1 * x1 + beta2 * x2)
#'
#' y <- rpois(n, lambda1)
#'
#' ## True Model
#' poismodel1 <- glm(y ~ x1 + x2, family = poisson(link = "log"))
#' ord_curve(poismodel1, thr = poismodel1$fitted.values)
#'
#' ## Missing a covariate
#' poismodel2 <- glm(y ~ x1, family = poisson(link = "log"))
#' ord_curve(poismodel2, thr = poismodel2$fitted.values)
#' ord_curve(poismodel2, thr = x2)
#'
#' @export
ord_curve <- function(model, thr, line_args = list(), ...) {
UseMethod("ord_curve")
}
#' @rawNamespace S3method(ord_curve,glm)
ord_curve.glm <- function(model, thr, line_args = list(), ...) {
y1 <- model$y
.ord_curve_core(y1 = y1, q10 = stats::fitted.values(model), thr = thr, line_args = line_args, ...)
}
#' @rawNamespace S3method(ord_curve,negbin)
ord_curve.negbin <- function(model, thr, line_args = list(), ...) {
y1 <- model$y
.ord_curve_core(y1 = y1, q10 = stats::fitted.values(model), thr = thr, line_args = line_args, ...)
}
#' @rawNamespace S3method(ord_curve,polr)
ord_curve.polr <- function(model, thr, line_args = list(), ...) {
y1 <- model$y
.ord_curve_core(y1 = y1, q10 = stats::fitted.values(model), thr = thr, line_args = line_args, ...)
}
#' @rawNamespace S3method(ord_curve,lm)
ord_curve.lm <- function(model, thr, line_args = list(), ...) {
y1 <- model$model[, 1]
.ord_curve_core(y1 = y1, q10 = stats::fitted.values(model), thr = thr, line_args = line_args, ...)
}
#' @rawNamespace S3method(ord_curve,default)
ord_curve.default <- function(model, thr, line_args = list(), ...) {
cls <- paste(class(model), collapse = ", ")
stop(
sprintf("Unsupported model class for ord_curve(): %s. Supported: glm, negbin, polr, lm.", cls),
call. = FALSE
)
}
.ord_curve_core <- function(y1, q10, thr, line_args = list(), ...) {
if (length(thr) != length(y1)) {
stop("Length of thr and response has to match.", call. = FALSE)
}
ord <- order(thr)
plot_defaults <- list(
main = paste("Z:", deparse(substitute(thr))),
xlab = expression(L[2](t)),
ylab = expression(L[1](t)),
cex.lab = 1,
cex.axis = 1,
cex.main = 1.5,
lwd = 2,
type = "l",
ylim = c(0, 1),
xlim = c(0, 1)
)
plot_args <- utils::modifyList(plot_defaults, list(...))
do.call(graphics::plot, c(
list(
x = cumsum(q10[ord]) / sum(q10),
y = cumsum(y1[ord]) / sum(y1)
),
plot_args
))
line_defaults <- list(a = 0, b = 1, col = "red", lty = 5, lwd = 2)
ab_args <- utils::modifyList(line_defaults, line_args)
do.call(graphics::abline, ab_args)
invisible(NULL)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.