#' PIT Plots for a CMP Object
#'
#' Two plots for the non-randomized PIT are currently available for checking the
#' distributional assumption of the fitted CMP model: the PIT histogram, and
#' the uniform Q-Q plot for PIT.
#'
#' @param object an object class "cmp", obtained from a call to \code{glm.cmp}.
#' @param bins numeric; the number of bins shown in the PIT histogram or the
#' PIT Q-Q plot.
#' @param line logical; if \code{TRUE} (default), the line for displaying the standard
#' uniform distribution will be shown for the purpose of comparison.
#' @param colLine numeric or character: the colour of the line for comparison
#' in PIT histogram.
#' @param lwdLine numeric; the line widths for the comparison line in PIT histogram.
#' @param colHist numeric or character; the colour of the histogram for PIT.
#' @param col1 numeric or character; the colour of the sample uniform Q-Q plot in PIT.
#' @param col2 numeric or character; the colour of the theoretical uniform Q-Q plot in PIT.
#' @param lty1 integer or character string: the line types for the sample
#' uniform Q-Q plot in PIT, see par(lty = .).
#' @param lty2 an integer or character string: the line types for the theoretical uniform
#' Q-Q plot in PIT, see par(lty = .).
#' @param type 1-character string; the type of plot for the sample uniform Q-Q plot in PIT.
#' @param main character string; a main title for the plot.
#' @param ... other arguments passed to plot.default and plot.ts.
#' @import graphics
#' @details
#' The histogram and the Q-Q plot are used to compare the fitted profile with a standard
#' uniform distribution. If they match relatively well, it means the CMP distribution
#' is appropriate for the data.
#'
#' The \code{gg_histcompPIT} and \code{gg_qqcompPIT} functions
#' would provide the same two plots but in ggplot format.
#'
#' @references
#' Czado, C., Gneiting, T. and Held, L. (2009). Predictive model assessment
#' for count data. \emph{Biometrics}, \strong{65}, 1254--1261.
#'
#' Dunsmuir, W.T.M. and Scott, D.J. (2015). The \code{glarma} Package for Observation-Driven
#' Time Series Regression of Counts. \emph{Journal of Statistical Software},
#' \strong{67}, 1--36.
#' @seealso
#' \code{\link{gg_histcompPIT}}, \code{\link{gg_qqcompPIT}},
#' \code{\link{plot.cmp}} and \code{\link{autoplot}}.
#' @examples
#' ## For examples see example(plot.cmp)
#' @name PIT_Plot
NULL
#' @rdname PIT_Plot
#' @export
histcompPIT <- function(object, bins = 10, line = TRUE, colLine = "red", colHist = "royal blue", lwdLine = 2, main = NULL, ...) {
PIT <- compPIT(object, bins = bins)$PIT
height <- diff(PIT[, ncol(PIT)]) * bins
if (max(height) > 2) {
y.upper <- max(height) + 1 / (bins / 2)
}
else {
y.upper <- 2
}
if (is.null(main)) {
main <- paste("PIT for COM-Poisson")
}
barplot(height,
ylim = c(0, y.upper), border = TRUE, space = 0,
xpd = FALSE, xlab = "Probability Integral Transform",
ylab = "Relative Frequency", main = main, col = colHist,
...
)
if (line == TRUE) {
abline(h = 1, lty = 2, col = colLine, lwd = lwdLine)
}
plot.window(xlim = c(0, 1), ylim = c(0, y.upper))
axis(1)
box()
}
#' @rdname PIT_Plot
#' @export
qqcompPIT <- function(object, bins = 10, col1 = "red", col2 = "black", lty1 = 1,
lty2 = 2, type = "l", main = NULL, ...) {
dummy.variable <- seq(0, 1, by = 1 / bins)
PIT <- compPIT(object, bins = bins)$PIT
qq.plot <- PIT[, ncol(PIT)]
if (is.null(main)) {
main <- "Q-Q Plot of Uniform PIT"
}
plot(dummy.variable, qq.plot, lty = lty1, col = col1, xlim = c(0, 1), ylim = c(0, 1), type = type, xlab = "Theoretical", ylab = "Sample", main = main, ...)
abline(0, 1, col = col2, lty = lty2)
list(sample = qq.plot, theoretical = dummy.variable)
invisible()
}
#' ggplot version of PIT Plots for a CMP Object
#'
#' Two plots for the non-randomized PIT are currently available for checking the
#' distributional assumption of the fitted CMP model: the PIT histogram, and
#' the uniform Q-Q plot for PIT.
#'
#' \code{histcompPIT} and \code{qqcompPIT}
#'
#' @param object an object class "cmp", obtained from a call to \code{glm.cmp}.
#' @param bins numeric; the number of bins shown in the PIT histogram or the
#' PIT Q-Q plot.
#' @param ref_line logical; if \code{TRUE} (default), the line for displaying the standard
#' uniform distribution will be shown for the purpose of comparison.
#' @param col_line numeric or character: the colour of the reference line
#' for comparison in PIT histogram.
#' @param size numeric; the line widths for the comparison line in PIT histogram.
#' @param col_hist numeric or character; the colour of the histogram for PIT.
#' @param col1 numeric or character; the colour of the sample uniform Q-Q plot in PIT.
#' @param col2 numeric or character; the colour of the theoretical uniform Q-Q plot in PIT.
#' @param lty1 integer or character string: the line types for the sample
#' uniform Q-Q plot in PIT, see ggplot2::linetype.
#' @param lty2 an integer or character string: the line types for the theoretical uniform
#' Q-Q plot in PIT, see ggplot2::linetype.
#' @import ggplot2
#' @import ggpubr
#'
#' @details
#' The histogram and the Q-Q plot are used to compare the fitted profile with a standard
#' uniform distribution. If they match relatively well, it means the CMP distribution
#' is appropriate for the data.
#'
#' The \code{histcompPIT} and \code{qqcompPIT} functions
#' would provide the same two plots but in base R format.
#' @references
#' Czado, C., Gneiting, T. and Held, L. (2009). Predictive model assessment
#' for count data. \emph{Biometrics}, \strong{65}, 1254--1261.
#'
#' Dunsmuir, W.T.M. and Scott, D.J. (2015). The \code{glarma} Package for Observation-Driven
#' Time Series Regression of Counts. \emph{Journal of Statistical Software},
#' \strong{67}, 1--36.
#' @seealso
#' \code{\link{histcompPIT}}, \code{\link{qqcompPIT}},
#' \code{\link{plot.cmp}} and \code{\link{autoplot}}.
#' @examples
#' ## For examples see example(autoplot)
#' @name PIT_ggPlot
NULL
#' @rdname PIT_ggPlot
#' @export
gg_histcompPIT <-
function(object, bins = 10, ref_line = TRUE, col_line = "red",
col_hist = "royal blue", size = 1) {
x <- NULL
PIT <- compPIT(object, bins = bins)$PIT
height <- diff(PIT[, ncol(PIT)]) * bins
if (max(height) > 2) {
y_lim <- max(height) + 1 / (bins / 2)
} else {
y_lim <- 2
}
p <- ggplot(data.frame(
height = height,
x = (seq(0, 1,
length.out = (bins + 1)
) +
1 / (bins * 2))[-(bins + 1)]
)) +
geom_col(aes(y = height, x = x), fill = col_hist) +
ylim(0, y_lim) +
xlim(0, 1) +
labs(
x = "Probability Integral Transform", y = "Relative Frequency",
title = "Histogram of Uniform PIT for CMP"
)
if (ref_line == TRUE) {
p <- p + geom_hline(
yintercept = 1, linetype = 2, colour = col_line,
size = size
)
}
return(p)
}
#' @rdname PIT_ggPlot
#' @export
gg_qqcompPIT <- function(object, bins = 10, col1 = "red",
col2 = "#999999",
lty1 = 1, lty2 = 2) {
dummy.variable <- seq(0, 1, by = 1 / bins)
PIT <- compPIT(object, bins = bins)$PIT
qq.plot <- PIT[, ncol(PIT)]
p <- ggplot(data.frame(dummy.variable = dummy.variable, PIT = qq.plot)) +
geom_line(aes(x = dummy.variable, y = PIT), linetype = lty1, colour = col1) +
geom_abline(intercept = 0, slope = 1, linetype = lty2, colour = col2) +
labs(x = "theoretical", y = "sample", title = "QQ Plot of Uniform PIT")
return(p)
}
#' Non-randomized Probability Integral Transform
#'
#' Functions to produce the non-randomized probability integral transform (PIT) to check the
#' adequacy of the distributional assumption of the COM-Poisson model. The majority of the
#' code and descriptions are taken from Dunsmuir and Scott (2015).
#'
#' @param object an object class "cmp", obtained from a call to \code{glm.cmp}.
#' @param bins numeric; the number of bins shown in the PIT histogram or the
#' PIT Q-Q plot.
#' @details
#' These functions are used to obtain the predictive probabilities and the probability
#' integral transform for a fitted COM-Poisson model. The majority of the code and
#' descriptions are taken from Dunsmuir and Scott (2015).
#' @return
#' \code{compPredprob} returns a list with values:
#' \item{upper}{the predictive cumulative probabilities used as the upper bound for
#' computing the non-randomized PIT.}
#' \item{lower}{the predictive cumulative probabilities used as the upper bound for
#' computing the non-randomized PIT.}
#'
#' \code{compPIT} returns a list with values:
#' \item{conditionalPIT}{the conditional probability integral transformation given the
#' observed counts.}
#' \item{PIT}{the probability integral transformation.}
#' @references
#' Czado, C., Gneiting, T. and Held, L. (2009). Predictive model assessment
#' for count data. \emph{Biometrics}, \strong{65}, 1254--1261.
#'
#' Dunsmuir, W.T.M. and Scott, D.J. (2015). The \code{glarma} Package for Observation-Driven
#' Time Series Regression of Counts. \emph{Journal of Statistical Software},
#' \strong{67}, 1--36.
#' @examples
#' data(takeoverbids)
#' M.bids <- glm.cmp(numbids ~ leglrest + rearest + finrest + whtknght
#' + bidprem + insthold + size + sizesq + regulatn, data = takeoverbids)
#' compPredProb(M.bids)
#' compPIT(M.bids)
#' @name nrPIT
NULL
#' @rdname nrPIT
#' @export
compPredProb <- function(object) {
lower <- pcomp(object$y - 1,
nu = object$nu, lambda = object$lambda,
summax = object$summax
)
upper <- lower + dcomp(object$y,
nu = object$nu, lambda = object$lambda,
summax = object$summax
)
list(lower = lower, upper = upper)
}
#' @rdname nrPIT
#' @export
compPIT <- function(object, bins = 10) {
dummy.variable <- seq(0, 1, by = 1 / bins)
predProb <- compPredProb(object)
con.PIT <- matrix(0, ncol = length(object$y), nrow = length(dummy.variable))
for (i in 1:length(object$y)) {
for (j in 1:length(dummy.variable)) {
if (dummy.variable[j] <= predProb$lower[i]) {
con.PIT[j, i] <- 0
}
if (dummy.variable[j] > predProb$lower[i] & dummy.variable[j] <
predProb$upper[i]) {
con.PIT[j, i] <- (dummy.variable[j] - predProb$lower[i]) / (predProb$upper[i] -
predProb$lower[i])
}
if (dummy.variable[j] >= predProb$upper[i]) {
con.PIT[j, i] <- 1
}
}
}
PIT <- matrix(0, ncol = length(object$y), nrow = length(dummy.variable))
for (i in 2:length(object$y)) {
PIT[, i] <- apply(con.PIT[, 1:i], 1, sum) * (i - 1)^(-1)
}
list(ConditionalPIT = con.PIT, PIT = PIT)
}
#' Random Normal Probability Integral Transform
#'
#' A function to create the normal conditional (randomized) quantile residuals.
#' The majority of the code and descriptions are taken from Dunsmuir and Scott (2015).
#' @param object an object class "cmp", obtained from a call to \code{glm.cmp}.
#'
#' @import stats
#' @details
#' The function \code{compPredProb} produces the non-randomized probability integral
#' transform(PIT). It returns estimates of the cumulative predictive probabilities as
#' upper and lower bounds of a collection of intervals. If the model is correct, a
#' histogram drawn using these estimated probabilities should resemble a histogram
#' obtained from a sample from the uniform distribution.
#'
#' This function aims to produce observations which instead resemble a sample from
#' a normal distribution. Such a sample can then be examined by the usual tools for
#' checking normality, such as histograms and normal Q-Q plots.
#'
#' For each of the intervals produced by \code{compnormRandPIT}, a random uniform observation
#' is generated, which is then converted to a normal observation by applying the inverse
#' standard normal distribution function (using \code{qnorm}). The vector of these values
#' is returned by the function in the list element \code{rt}. In addition non-random
#' observations which should appear similar to a sample from a normal distribution
#' are obtained by applying \code{qnorm} to the mid-points of the predictive distribution
#' intervals. The vector of these values is returned by the function in the list element
#' \code{rtMid}.
#' @return
#' A list consisting of two elements:
#' \item{rt}{the normal conditional randomized quantile residuals}
#' \item{rdMid}{the midpoints of the predictive probability intervals}
#' @references
#' Berkowitz, J. (2001). Testing density forecasts, with applications to risk management.
#' \emph{Journal of Business \& Economic Statistics}, \bold{19}, 465--474.
#'
#' Dunn, P. K. and Smyth, G. K. (1996). Randomized quantile residuals. \emph{Journal of
#' Computational and Graphical Statistics}, \bold{5}, 236--244.
#'
#' Dunsmuir, W.T.M. and Scott, D.J. (2015). The \code{glarma} Package for Observation-Driven
#' Time Series Regression of Counts. \emph{Journal of Statistical Software},
#' \strong{67}, 1--36.
#' @examples
#' data(takeoverbids)
#' M.bids <- glm.cmp(numbids ~ leglrest + rearest + finrest + whtknght
#' + bidprem + insthold + size + sizesq + regulatn, data = takeoverbids)
#' compnormRandPIT(M.bids)
#' @name rPIT
NULL
#' @rdname rPIT
#' @export
compnormRandPIT <- function(object) {
temp <- compPredProb(object)
rt <- qnorm(runif(length(temp$lower), temp$lower, temp$upper))
rtMid <- qnorm((temp$lower + temp$upper) / 2)
list(rt = rt, rtMid = rtMid)
}
#' Plot Diagnostic for a \code{glm.cmp} Object
#'
#' Eight plots (selectable by \code{which}) are currently available:
#' a plot of deviance residuals against fitted values,
#' a non-randomized PIT histogram,
#' a uniform Q-Q plot for non-randomized PIT,
#' a histogram of the normal randomized residuals,
#' a Q-Q plot of the normal randomized residuals,
#' a Scale-Location plot of sqrt(| residuals |) against fitted values
#' a plot of Cook's distances versus row labels
#' a plot of pearson residuals against leverage.
#' By default, four plots (number 1, 2, 6, and 8 from this list of plots) are provided.
#'
#' @param x an object class 'cmp' object, obtained from a call to \code{glm.cmp}
#' @param which if a subset of plots is required, specify a subset of the numbers 1:8.
#' See 'Details' below.
#' @param ask logical; if \code{TRUE}, the user is asked before each plot.
#' @param bins numeric; the number of bins shown in the PIT histogram or the
#' PIT Q-Q plot.
#' @param ... other arguments passed to or from other methods (currently unused).
#'
#' @import stats
#' @import graphics
#' @import grDevices
#' @export
#' @details
#' The 'Scale-Location' plot, also called 'Spread-Location' plot, takes the square root of
#' the absolute standardized deviance residuals (\emph{sqrt|E|}) in order to diminish
#' skewness is much less skewed than than \emph{|E|} for Gaussian zero-mean E.
#'
#' The 'Scale-Location' plot uses the standardized deviance residuals while the
#' Residual-Leverage plot uses the standardized pearson residuals. They are given as
#' \eqn{R_i/\sqrt{1-h_{ii}}} where \eqn{h_{ii}} are the diagonal entries of the hat matrix.
#'
#' The Residuals-Leverage plot shows contours of equal Cook's distance for values of 0.5
#' and 1.
#'
#' There are two plots based on the non-randomized probability integral transformation (PIT)
#' using \code{\link{compPIT}}. These are a histogram and a uniform Q-Q plot. If the
#' model assumption is appropriate, these plots should reflect a sample obtained
#' from a uniform distribution.
#'
#' There are also two plots based on the normal randomized residuals calculated
#' using \code{\link{compnormRandPIT}}. These are a histogram and a normal Q-Q plot. If the model
#' assumption is appropriate, these plots should reflect a sample obtained from a normal
#' distribution.
#'
#' @seealso
#' \code{\link{compPIT}}, \code{\link{compnormRandPIT}},
#' \code{\link{glm.cmp}} and \code{\link{autoplot}}.
#' @examples
#' data(takeoverbids)
#' M.bids <- glm.cmp(numbids ~ leglrest + rearest + finrest + whtknght
#' + bidprem + insthold + size + sizesq + regulatn, data = takeoverbids)
#'
#' ## The default plots are shown
#' plot(M.bids)
#'
#' ## The plots for the non-randomized PIT
#' # plot(M.bids, which = c(2,3))
plot.cmp <- function(x, which = c(1L, 2L, 6L, 8L),
ask = prod(par("mfcol")) < length(which) && dev.interactive(),
bins = 10,
...) {
# plot 1 deviance residuals vs fitted
# plot 2 Histogram of non-randomized PIT
# plot 3 q-q plot of non-randomized PIT
# plot 4 Histogram of Randomized Residuals
# plot 5 Q-Q Plot of Randomized Residuals
# plot 6 sclae-location plot
# plot 7 cook's distance vs obs number
# plot 8 std pearson resid. vs leverage
object <- x
if (any(!(which %in% 1:8))) {
cat("The acceptable ragne for option 'which' is 1:8.\n")
cat("Anyting outside this range would be ignored.\n")
cat("Use ?plot.cmp to see which plots are available.\n")
}
show <- rep(FALSE, 8)
show[which] <- TRUE
if (ask) {
oask <- devAskNewPage(TRUE)
on.exit(devAskNewPage(oask))
}
if (any(show[c(1L, 6L)] == TRUE)) {
dev_res <- object$d_res
}
if (show[1L]) {
dev.hold()
plot(object$linear_predictors, dev_res,
main = "Residuals vs Fitted",
ylab = "(Deviance) Residuals",
xlab = paste(c("Linear Predicted values", object$call))
)
abline(h = 0, lty = 2)
lines(lowess(object$linear_predictors, dev_res), col = "red")
index.dev.res <- order(abs(dev_res), decreasing = TRUE)[1:3]
text(object$linear_predictors[index.dev.res],
dev_res[index.dev.res],
labels = paste(index.dev.res), cex = 0.7, pos = 4
)
dev.flush()
}
if (show[2L]) {
dev.hold()
histcompPIT(object)
dev.flush()
}
if (show[3L]) {
dev.hold()
qqcompPIT(object)
dev.flush()
}
if (any(show[4L:5L] == TRUE)) {
rt <- compnormRandPIT(object)$rt
}
if (show[4L]) {
dev.hold()
hist(rt,
main = "Histogram of Randomized Residuals", col = "royal blue",
xlab = expression(r[t])
)
box()
dev.flush()
}
if (show[5L]) {
dev.hold()
qqnorm(rt, main = "Q-Q Plot of Randomized Residuals")
abline(0, 1, lty = 2, col = "black")
dev.flush()
}
if (show[6L]) {
dev.hold()
std.dev.res <- rstandard.cmp(object, type = "deviance")
res <- sqrt(abs(std.dev.res))
plot(object$linear_predictors, res,
main = "Scale-Location", xlab = paste(c("Linear Predicted values", object$call)),
ylab = expression(sqrt("|Std. deviance resid.|"))
)
lines(lowess(object$linear_predictors, res), col = "red")
index.dev.res <- order(res, decreasing = TRUE)[1:3]
text(object$linear_predictors[index.dev.res],
dev_res[index.dev.res],
labels = paste(index.dev.res), cex = 0.7, pos = 4
)
dev.flush()
}
if (any(show[7L:8L] == TRUE)) {
rk <- object$rank
h <- hatvalues.cmp(object)
std.pear <- rstandard.cmp(object, type = "pearson")
cook <- cooks.distance.cmp(object)
n <- length(cook)
index.cook <- order(cook, decreasing = TRUE)[1:3]
}
if (show[7L]) {
dev.hold()
plot(cook,
type = "h", main = "Cook's distance",
xlab = paste(c("Obs. number", object$call)),
ylab = "Approx. Cook's distance", ylim = c(0, ymx <- max(cook) * 1.075)
)
text(index.cook, cook[index.cook], labels = paste(index.cook), cex = 0.7, pos = 3)
dev.hold()
}
if (show[8L]) {
ylim <- extendrange(r = range(std.pear, na.rm = TRUE), f = 0.08)
xlim <- extendrange(r = c(0, max(h, na.rm = TRUE)), f = 0.08)
plot(h, std.pear,
main = "Residuals vs Leverage",
xlab = paste(c("Leverage", object$call)), ylab = "Std. Pearson resid.",
xlim = xlim, ylim = ylim
)
abline(h = 0, v = 0, lty = 3, col = "gray")
bound <- par("usr")
x <- NULL
curve(sqrt(rk * 0.5 * (1 - x) / x), 0.01, bound[2], add = TRUE, col = "red", lty = 2)
curve(sqrt(rk * (1 - x) / x), 0.01, bound[2], add = TRUE, col = "red", lty = 2)
curve(-sqrt(rk * 0.5 * (1 - x) / x), 0.01, bound[2], add = TRUE, col = "red", lty = 2)
curve(-sqrt(rk * (1 - x) / x), 0.01, bound[2], add = TRUE, col = "red", lty = 2)
legend("bottomleft",
legend = "Cook's distance",
lty = 2, col = 2, bty = "n"
)
xmax <- min(0.99, bound[2L])
ymult <- sqrt(rk * (1 - xmax) / xmax)
cook.levels <- c(0.5, 1)
aty <- sqrt(cook.levels) * ymult
axis(4,
at = c(-rev(aty), aty), labels = paste(c(rev(cook.levels), cook.levels)),
mgp = c(0.25, 0.25, 0), las = 2,
tck = 0, cex.axis = 0.75, col.axis = 2
)
text(h[index.cook], std.pear[index.cook],
labels = paste(index.cook),
cex = 0.7, pos = 4
)
dev.flush()
}
invisible()
}
#' Plot Diagnostic for a \code{glm.cmp} Object in ggplot style
#'
#' \code{autoplot} uses ggplot2 to draw the diagnostic plots for a 'cmp' class object.
#' \code{gg_plot} is an \emph{alias} for it.
#'
#' Eight plots (selectable by \code{which}) are currently available:
#' a plot of deviance residuals against fitted values,
#' a non-randomized PIT histogram,
#' a uniform Q-Q plot for non-randomized PIT,
#' a histogram of the normal randomized residuals,
#' a Q-Q plot of the normal randomized residuals,
#' a Scale-Location plot of sqrt(| residuals |) against fitted values
#' a plot of Cook's distances versus row labels
#' a plot of pearson residuals against leverage.
#' By default, four plots (number 1, 2, 6, and 8 from this list of plots) are provided.
#'
#' @param object an object class 'cmp' object, obtained from a call to \code{glm.cmp}
#' @param which if a subset of plots is required, specify a subset of the numbers 1:8.
#' See 'Details' below.
#' @param ask logical; if \code{TRUE}, the user is asked before each plot.
#' @param bins numeric; the number of bins shown in the PIT histogram or the
#' PIT Q-Q plot.
#' @param nrow numeric; (optional) number of rows in the plot grid.
#' @param ncol numeric; (optional) number of columns in the plot grid.
#' @param output_as_ggplot logical; if \code{TRUE}, the function would
#' return a list of \code{ggplot} objects; if \code{FALSE}, the
#' function would return an \code{ggarrange} object.
#' @param ... other arguments passed to or from other methods (currently unused).
#' @return
#' return a list of \code{ggplot} objects or a \code{ggarrange} object.
#' @import stats
#' @import ggplot2
#' @import ggpubr
#' @export
#' @details
#' The 'Scale-Location' plot, also called 'Spread-Location' plot, takes the square root of
#' the absolute standardized deviance residuals (\emph{sqrt|E|}) in order to diminish
#' skewness is much less skewed than than \emph{|E|} for Gaussian zero-mean E.
#'
#' The 'Scale-Location' plot uses the standardized deviance residuals while the
#' Residual-Leverage plot uses the standardized pearson residuals. They are given as
#' \eqn{R_i/\sqrt{1-h_{ii}}} where \eqn{h_{ii}} are the diagonal entries of the hat matrix.
#'
#' The Residuals-Leverage plot shows contours of equal Cook's distance for values of 0.5
#' and 1.
#'
#' There are two plots based on the non-randomized probability integral transformation (PIT)
#' using \code{\link{compPIT}}. These are a histogram and a uniform Q-Q plot. If the
#' model assumption is appropriate, these plots should reflect a sample obtained
#' from a uniform distribution.
#'
#' There are also two plots based on the normal randomized residuals calculated
#' using \code{\link{compnormRandPIT}}. These are a histogram and a normal Q-Q plot. If the model
#' assumption is appropriate, these plots should reflect a sample obtained from a normal
#' distribution.
#'
#' @seealso
#' \code{\link{compPIT}}, \code{\link{compnormRandPIT}},
#' \code{\link{glm.cmp}} and \code{\link{plot.cmp}}.
#' @examples
#' data(takeoverbids)
#' M.bids <- glm.cmp(numbids ~ leglrest + rearest + finrest + whtknght
#' + bidprem + insthold + size + sizesq + regulatn, data = takeoverbids)
#'
#' ## The default plots are shown
#' gg_plot(M.bids) # or autoplot(M.bids)
#'
#' ## The plots for the non-randomized PIT
#' gg_plot(M.bids, which = c(2, 3)) # or autoplot(M.bids, which = c(2,3))
autoplot.cmp <- function(object, which = c(1L, 2L, 6L, 8L), bins = 10,
ask = TRUE, nrow = NULL, ncol = NULL,
output_as_ggplot = TRUE, ...) {
# plot 1 deviance residuals vs fitted
# plot 2 Histogram of non-randomized PIT
# plot 3 q-q plot of non-randomized PIT
# plot 4 Histogram of Randomized Residuals
# plot 5 Q-Q Plot of Randomized Residuals
# plot 6 sclae-location plot
# plot 7 cook's distance vs obs number
# plot 8 std pearson resid. vs leverage
x <- y <- linear_predictors <- index <- leg <- cook_level <- NULL
if (any(!(which %in% 1:8))) {
cat("The acceptable ragne for option 'which' is 1:8.\n")
cat("Anyting outside this range would be ignored.\n")
cat("Use ?autoplot to see which plots are available.\n")
}
show <- rep(FALSE, 8)
show[which] <- TRUE
p <- vector("list", sum(show))
show_count <- 0
if (any(show[c(1L, 6L)] == TRUE)) {
dev_res <- object$d_res
}
if (show[1L]) {
index_dev_res <- order(abs(dev_res), decreasing = TRUE)[1:3]
p_temp <- ggplot(data.frame(
x = object$linear_predictors,
y = dev_res
)) +
geom_point(aes(x = x, y = y)) +
labs(
title = "Residuals vs Fitted",
x = paste(c("Linear Predicted values\n", object$call), collapse = ""),
y = "(Deviance) Residuals"
) +
geom_hline(yintercept = 0, linetype = 2) +
geom_smooth(aes(x = x, y = y),
formula = y ~ x, colour = "red", se = FALSE,
method = "loess"
) +
geom_text(aes(x = linear_predictors, y = dev_res, label = index),
hjust = "inward", vjust = "outward",
data =
data.frame(
linear_predictors =
object$linear_predictors[index_dev_res[1:3]],
dev_res = dev_res[index_dev_res[1:3]],
index = paste(index_dev_res[1:3])
)
)
show_count <- show_count + 1
p[[show_count]] <- p_temp
}
if (show[2L]) {
p_temp <- gg_histcompPIT(object)
show_count <- show_count + 1
p[[show_count]] <- p_temp
}
if (show[3L]) {
p_temp <- gg_qqcompPIT(object)
show_count <- show_count + 1
p[[show_count]] <- p_temp
}
if (any(show[4L:5L] == TRUE)) {
rt <- compnormRandPIT(object)$rt
}
if (show[4L]) {
p_temp <- ggplot(data.frame(rt = rt), aes(rt)) +
geom_histogram(bins = 30, fill = "royal blue", colour = "royal blue") +
labs(title = "Histogram of Randomized Residuals", x = expression(r[t]))
show_count <- show_count + 1
p[[show_count]] <- p_temp
}
if (show[5L]) {
p_temp <- ggplot(data.frame(rt = rt), aes(sample = rt)) +
stat_qq() +
stat_qq_line() +
labs(title = "QQ Plot for Randomized Residuals")
show_count <- show_count + 1
p[[show_count]] <- p_temp
}
if (show[6L]) {
std_dev_res <- rstandard.cmp(object, type = "deviance")
res <- sqrt(abs(std_dev_res))
index_res <- order(abs(res), decreasing = TRUE)[1:3]
p_temp <- ggplot(data.frame(x = object$linear_predictors, y = res)) +
geom_point(aes(x = x, y = y)) +
labs(
title = "Scale-Location",
x = paste(c("Linear Predicted values \n", object$call), collapse = ""),
y = expression(sqrt("|Std. deviance resid.|"))
) +
geom_smooth(aes(x = x, y = y),
formula = y ~ x, method = "loess", se = FALSE,
colour = "red"
) +
geom_text(aes(
x = linear_predictors,
y = res, label = index
),
hjust = "inward", vjust = "outward",
data = data.frame(
linear_predictors =
object$linear_predictors[index_res[1:3]],
res = res[index_res[1:3]],
index = index_res[1:3]
)
)
show_count <- show_count + 1
p[[show_count]] <- p_temp
}
if (any(show[7L:8L] == TRUE)) {
rk <- object$rank
h <- hatvalues.cmp(object)
std_pear <- rstandard.cmp(object, type = "pearson")
cook <- cooks.distance.cmp(object)
n <- length(cook)
index_cook <- order(cook, decreasing = TRUE)[1:3]
}
if (show[7L]) {
p_temp <- ggplot(data.frame(cook = cook, index = 1:n)) +
geom_segment(aes(x = index, y = cook, xend = index, yend = 0)) +
labs(
title = "Cook's distance",
x = paste(c("Obs number\n", object$call), collapse = ""),
y = "Approx. Cook's distance"
) +
ylim(0, max(cook) * 1.075) +
geom_text(aes(
x = index_cook, y = cook,
label = index
),
vjust = "outward", data =
data.frame(
index_cook = index_cook[1:3],
cook = cook[index_cook[1:3]],
index = index_cook[1:3]
)
)
show_count <- show_count + 1
p[[show_count]] <- p_temp
}
if (show[8L]) {
options(warn = -1)
ylim <- extendrange(r = range(std_pear, na.rm = TRUE), f = 0.08)
xlim <- extendrange(r = c(0, max(h, na.rm = TRUE)), f = 0.08)
xmax <- min(0.99, xlim[2])
ymult <- sqrt(rk * (1 - xmax) / xmax)
cook_levels <- c(0.5, 1)
at_y <- sqrt(cook_levels) * ymult
p_temp <- ggplot(data.frame(
h = h,
std_pear = std_pear,
leg = "Cook's distance"
)) +
geom_point(aes(x = h, y = std_pear)) +
labs(
title = "Residuals vs Leverage",
x = paste(c("Leverage \n", object$call), collapse = ""),
y = "Std. Pearson resid."
) +
xlim(xlim[1], xlim[2]) +
ylim(ylim[1], ylim[2]) +
geom_hline(yintercept = 0, linetype = 3, colour = "#999999", size = 1.5) +
geom_vline(xintercept = 0, linetype = 3, colour = "#999999", size = 1.5) +
stat_function(aes(colour = leg),
fun = function(x) sqrt(rk * 0.5 * (1 - x) / x),
xlim = c(0.01, xlim[2]),
show.legend = TRUE, linetype = 2,
na.rm = TRUE
) +
stat_function(aes(colour = leg),
fun = function(x) sqrt(rk * (1 - x) / x),
xlim = c(0.01, xlim[2]),
show.legend = TRUE, linetype = 2,
na.rm = TRUE
) +
stat_function(aes(colour = leg),
fun = function(x) -sqrt(rk * 0.5 * (1 - x) / x),
xlim = c(0.01, xlim[2]),
show.legend = TRUE, linetype = 2,
na.rm = TRUE
) +
stat_function(aes(colour = leg),
fun = function(x) -sqrt(rk * (1 - x) / x),
xlim = c(0.01, xlim[2]),
show.legend = TRUE, linetype = 2,
na.rm = TRUE
) +
geom_text(aes(
x = h, y = std_pear,
label = index
),
hjust = "inward", vjust = "outward", data =
data.frame(
h = h[index_cook[1:3]],
std_pear = std_pear[index_cook[1:3]],
index = index_cook[1:3]
),
na.rm = TRUE
) +
geom_text(aes(x = xlim, y = at_y, label = cook_level),
data = data.frame(
xlim = rep(xlim[2], 4),
at_y = c(-rev(at_y), at_y),
cook_level = c(
rev(cook_levels),
cook_levels
)
),
colour = "red", hjust = "outward",
vjust = "outward", na.rm = TRUE
) +
theme(
legend.justification = c(0, 0), legend.position = c(0, 0),
legend.background = element_blank(),
legend.title = element_blank()
)
show_count <- show_count + 1
p[[show_count]] <- p_temp
options(warn = 0)
}
p_ggarrange <- ggpubr::ggarrange(plotlist = p, ncol = ncol, nrow = nrow)
if ((is.null(ncol) & is.null(nrow))) {
print(p_ggarrange)
} else if (class(p_ggarrange)[1] != "list") {
print(p_ggarrange)
} else {
for (i in 1:length(p_ggarrange)) {
if (ask) {
invisible(readline(prompt = "Press <Return> to see next plot:"))
print(p_ggarrange[[i]])
}
}
}
if (!output_as_ggplot) {
p <- p_ggarrange
}
return(invisible(p))
invisible()
}
#' @rdname autoplot.cmp
#' @aliases autoplot.cmp
#' @export
gg_plot <- autoplot.cmp
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.