# R/residuals.R In DGLMExtPois: Double Generalized Linear Models Extending Poisson Regression

#### Documented in residuals.glm_CMPresiduals.glm_hP

#' Extract and Visualize hyper-Poisson and COM-Poisson Model Residuals
#'
#' residuals is a method which extracts model residuals from a \code{"glm_hP"}
#' or \code{"glm_CMP"} object, commonly returned by \code{\link{glm.hP}} or
#' \code{\link{glm.CMP}}. Optionally, it produces a half normal plot with a
#' simulated envelope of the residuals.
#'
#' The response residuals (\eqn{r_i=y_i - \mu_i}{r[i]=y[i] - \mu[i]}), Pearson
#' residuals (\eqn{r^P_i = r_i/\sigma_i}{r[i]^P = r[i]/\sigma[i]}) or randomized
#' quantile residuals are computed. The randomized quantile residuals are
#' obtained computing the cumulative probabilities that the fitted model being
#' less than \emph{y} and less or equal than \emph{y}. A random value from a
#' uniform distribution between both probabilities is generated and the value of
#' the residual is the standard normal variate with the same cumulative
#' probability. Four replications of the quantile residuals are recommended
#' because of the random component (see Dunn and Smyth, 1996 for more details).
#'
#' generate a residuals against fitted values plot and a Normal Q-Q plot.
#'
#' The Normal Q-Q plot may show an unsatisfactory pattern of the Pearson
#' residuals of a fitted model: then we are led to think that the model is
#' incorrectly specified.

#'The half normal plot with simulated envelope indicates that, under the
#'distribution of the response variable, the model is fine when only a few
#'points fall off the envelope.
#'
#'@param object an object of class \code{"glm_hP"} or \code{"glm_CMP"},
#'  typically the result of a call to \code{\link{glm.hP}} or
#'@param type the type of residuals which should be returned. The alternatives
#'  are: "pearson" (default), "response" and "quantile". Can be abbreviated.
#'@param envelope a logical value indicating whether the envelope should be
#'  computed.
#'@param rep	number of replications for envelope construction. Default is 19,
#'  that is the smallest 95 percent band that can be built.
#'@param title	a string indicating the main title of the envelope.
#'@param ... further arguments passed to or from other methods.
#'
#'@return Residual values.
#'
#'
#'@references Dunn, P.K., Smyth, G.K. (1996). Randomized quantile residuals.
#'  Journal of Computational and Graphical Statistics, 5(3), 236-244.
#'
#'  Atkinson, A. (1981). Two graphical displays for outlying and influential
#'  observations in regression. Biometrika, 68 (1), 13–20.
#'
#'@name residuals
NULL

#' @rdname residuals
#' @examples
#' Bids$size.sq <- Bids$size ^ 2
#' hP.fit <- glm.hP(formula.mu = numbids ~ leglrest + rearest + finrest +
#'               whtknght + bidprem + insthold + size + size.sq + regulatn,
#'               formula.gamma = numbids ~ 1, data = Bids)
#' r <- residuals(hP.fit)
#' @export
residuals.glm_hP <- function(object, type = c("pearson", "response", "quantile"),
envelope = FALSE, rep = 19,
title = "Simulated Envelope of Residuals", ...) {
stopifnot(is.logical(envelope))
stopifnot(is.numeric(rep), length(rep) == 1, rep >= 1)
stopifnot(is.character(title), length(title) == 1)

type <- match.arg(type)

if (! envelope)
return(res_hp(object, type))

residuals_sim <- matrix(0, nrow = length(object$residuals), ncol = rep) pb <- progress::progress_bar$new(total = rep)
for (x in 1:rep) {
pb$tick() tmp <- simulation_hp(object, type) residuals_sim[, x] <- tmp } residuals_sim <- apply(residuals_sim, 2, sort) minima <- apply(residuals_sim, 1, min) maxima <- apply(residuals_sim, 1, max) resi <- sort(res_hp(object, type)) n <- length(resi) t <- 1:n normal_score <- stats::qnorm(t / (n + 1)) xx <- c(normal_score, rev(normal_score)) yy <- c(minima, rev(maxima)) type_r <- paste0(toupper(substr(type, 1, 1)), substr(type, 2, nchar(type))) graphics::plot(normal_score, resi, type = "l", xlab = "Standard normal quantiles", ylab = paste(type_r, "residuals"), main = title) graphics::polygon(xx, yy, col = "gray", border = NA) graphics::lines(normal_score, resi) structure( list( type = type, residuals = resi, sim_residuals = residuals_sim ), class = "residuals.glm_hP" ) } res_hp <- function(object, type) { lambda <- object$lambdas
gamma  <- object$gammas if (type == "pearson") { mu <- object$fitted.values
#variance <- lambda + (lambda - (gamma - 1)) * mu - mu ^ 2
variance <- variances_hp(lambda, gamma, object$maxiter_series, object$tol)
r        <- as.vector(object$residuals / sqrt(variance)) names(r) <- seq(r) return(r) } if (type == "response") return(object$residuals)

# type == "quantile"
if (! is.null(object$y)) { y <- object$y
} else if (! is.null(object$model.mu)) { y <- stats::model.response(object$model.mu)
} else {
formula.mu <- stats::as.formula(object$call[["formula.mu"]]) a.mu <- stats::model.frame(formula.mu, data = object$data)
y <- stats::model.response(a.mu)
}
a <- ifelse(y == 0, 0, phP(y - 1, gamma, lambda))
b <- phP(y, gamma, lambda)
u <- stats::runif(length(y), a, b)
qr <- stats::qnorm(u)
return(qr)
}

simulation_hp <- function(object, type) {
repeat {
lambdas <- object$lambdas gammas <- object$gammas
response_n <- get_response(object$formula.mu) data <- object$data
data[response_n] <- sapply(seq_len(nrow(data)),
function(i) simulate_hp(gammas[i], lambdas[i]))
the_call <- object$call the_call[["data"]] = data fit <- tryCatch(eval(the_call), error = function(e) TRUE) if (is.logical(fit)) next if(any(is.nan(res_hp(fit, type)))) next break } return(res_hp(fit, type)) } simulate_hp <- function(gamma, lambda) { pochammer <- function(a, r) if (r == 0) 1 else prod(a:(a + r - 1)) u <- stats::runif(1) y <- 0 p <- 0 value <- f11(lambda,gamma) while (p < u) { p <- p + lambda ^ y / (value * pochammer(gamma, y)) y <- y + 1 } y - 1 } get_response <- function(formula) { tt <- stats::terms(formula) vars <- as.character(attr(tt, "variables"))[-1] response <- attr(tt, "response") vars[response] } #' @rdname residuals #' @examples #' Bids$size.sq <- Bids$size ^ 2 #' CMP.fit <- glm.CMP(formula.mu = numbids ~ leglrest + rearest + finrest + #' whtknght + bidprem + insthold + size + size.sq + regulatn, #' formula.nu = numbids ~ 1, data = Bids) #' r <- residuals(CMP.fit) #' @export residuals.glm_CMP <- function(object, type = c("pearson", "response","quantile"), envelope = FALSE, rep = 19, title = "Simulated Envelope of Residuals", ...) { type <- match.arg(type) if (! envelope) return(res_cmp(object, type)) residuals_sim <- matrix(0, nrow = length(object$residuals), ncol = rep)
pb <- progress::progress_bar$new(total = rep) for (x in 1:rep) { pb$tick()
tmp <- simulation_cmp(object, type)
residuals_sim[, x] <- tmp
}

residuals_sim <- apply(residuals_sim, 2, sort)
minima <- apply(residuals_sim, 1, min)
maxima <- apply(residuals_sim, 1, max)

resi <- sort(res_cmp(object, type))
n <- length(resi)
t <- 1:n

normal_score <- stats::qnorm(t / (n + 1))
xx <- c(normal_score, rev(normal_score))
yy <- c(minima, rev(maxima))

type_r <- paste0(toupper(substr(type, 1, 1)), substr(type, 2, nchar(type)))
graphics::plot(normal_score,
resi,
type = "l",
xlab = "Standard normal quantiles",
ylab = paste(type_r, "residuals"),
main = title
)
graphics::polygon(xx, yy, col = "gray", border = NA)
graphics::lines(normal_score, resi)
structure(
list(
type = type,
residuals = resi,
sim_residuals = residuals_sim
),
class = "residuals.glm_CMP"
)
}

res_cmp <- function(object, type  = c("pearson", "response","quantile")) {
lambda <- object$lambdas nu <- object$nus
if (type == "pearson") {
variance <- variances_cmp(lambda, nu, object$maxiter_series, object$tol)
r <- as.vector(object$residuals / sqrt(variance)) names(r) <- seq(r) return(r) } else if (type == "response") { return(object$residuals)
}
# type == "quantile"
if (! is.null(object$y)) { y <- object$y
} else if (! is.null(object$model.mu)) { y <- stats::model.response(object$model.mu)
} else {
formula.mu <- stats::as.formula(object$call[["formula.mu"]]) a.mu <- stats::model.frame(formula.mu, data = object$data)
y <- stats::model.response(a.mu)
}
a <- rep(0, length(y))
a[y > 0] <- COMPoissonReg::pcmp(y[y > 0] - 1, lambda[y > 0], nu[y > 0])
b <- COMPoissonReg::pcmp(y, lambda, nu)
u <- stats::runif(length(y), a, b)
qr <- stats::qnorm(u)
return(qr)
}

simulation_cmp <- function(object, type) {
repeat {
lambdas <- object$lambdas nus <- object$nus
response_n <- get_response(object$formula.mu) data <- object$data
data[response_n] <- COMPoissonReg::rcmp(nrow(data), lambdas, nus)
the_call <- object\$call
the_call[["data"]] = data
fit <- tryCatch(eval(the_call),
error = function(e) TRUE)
if (is.logical(fit))
next
if(any(is.nan(res_cmp(fit, type))))
next
break
}
return(res_cmp(fit, type))
}


## Try the DGLMExtPois package in your browser

Any scripts or data that you put into this service are public.

DGLMExtPois documentation built on March 26, 2020, 7:42 p.m.