Nothing
#' @title Diagnostic plots for regression and population size estimation
#' @author Piotr Chlebicki
#'
#' @description Simple diagnostic plots for \code{singleRStaticCountData} class objects.
#'
#' @param x object of \code{singleRStaticCountData} class.
#' @param dfpop TODO
#' @param confIntStrata confidence interval type to use for strata plot.
#' Currently supported values are \code{"normal"} and \code{"logNormal"}.
#' @param plotType character parameter (default \code{"qq"}) specifying type of plot to be made.
#' The following list presents and briefly explains possible type of plots:
#' \itemize{
#' \item \code{qq} -- The quantile-quantile plot for pearson residuals
#' (or standardized pearson residuals if these are available for the model) i.e.
#' empirical quantiles from residuals are plotted against theoretical quantiles
#' from standard distribution.
#' \item \code{marginal} -- A plot made by \code{matplot} with fitted and
#' observed marginal frequencies with labels.
#' \item \code{fitresid} -- Plot of fitted linear predictors against
#' (standardized) pearson residuals.
#' \item \code{bootHist} -- Simple histogram for statistics obtained from
#' bootstrapping (if one was performed and the statistics were saved).
#' \item \code{rootogram} -- Rootogram, for full explanation see:
#' Kleiber and Zeileis Visualizing Count Data Regressions Using Rootograms (2016),
#' in short it is a \code{barplot} where height is the square root of observed marginal
#' frequencies adjusted by difference between square root of observed and fitted marginal
#' frequencies connected by line representing fitted marginal frequencies.
#' The less of a difference there is between the 0 line and beginning of a bar
#' the more accurate fitt was produced by the model.
#' \item \code{dfpopContr} -- Plot of \code{dfpopsize} against unit contribution.
#' On the plot is y = x line i.e. what deletion effect would be if removing the
#' unit from the model didn't effect regression coefficients. The further away
#' the observation is from this line the more influential it is.
#' \item \code{dfpopBox} -- Boxplot of \code{dfpopsize} for getting the general
#' idea about the distribution of the "influence" of each unit on
#' population size estimate.
#' \item \code{scaleLoc} -- The scale - location plot i.e. square root of
#' absolute values of (standardized) pearson residuals against linear predictors
#' for each column of linear predictors.
#' \item \code{cooks} -- Plot of cooks distance for detecting influential observations.
#' \item \code{hatplot} -- Plot of hat values for each linear predictor for detecting influential observations.
#' \item \code{strata} -- Plot of confidence intervals and point estimates for strata provided in \code{...} argument
#' }
#' @param histKernels logical value indicating whether to add density lines
#' to histogram.
#' @param ... additional optional arguments passed to the following functions:
#' \itemize{
#' \item For \code{plotType = "bootHist"}
#' \itemize{
#' \item \code{graphics::hist} -- with \code{x, main, xlab, ylab} parameters fixed.
#' }
#' \item For \code{plotType = "rootogram"}
#' \itemize{
#' \item \code{graphics::barplot} -- with \code{height, offset, ylab, xlab, ylim} parameters fixed.
#' \item \code{graphics::lines} -- with \code{x, y, pch, type, lwd, col} parameters fixed.
#' }
#' \item For \code{plotType = "dfpopContr"}
#' \itemize{
#' \item \code{dfpopsize} -- with \code{model, observedPop} parameters fixed.
#' \item \code{plot.default} -- with \code{x, y, xlab, main} parameters fixed.
#' }
#' \item For \code{plotType = "dfpopBox"}
#' \itemize{
#' \item \code{dfpopsize} -- with \code{model, observedPop} parameters fixed.
#' \item \code{graphics::boxplot} -- with \code{x, ylab, main} parameters fixed.
#' }
#' \item For \code{plotType = "scaleLoc"}
#' \itemize{
#' \item \code{plot.default} -- with \code{x, y, xlab, ylab, main, sub} parameters fixed.
#' }
#' \item For \code{plotType = "fitresid"}
#' \itemize{
#' \item \code{plot.default} -- with \code{x, y, xlab, ylab, main, sub} parameters fixed.
#' }
#' \item For \code{plotType = "cooks"}
#' \itemize{
#' \item \code{plot.default} -- with \code{x, xlab, ylab, main} parameters fixed.
#' }
#' \item For \code{plotType = "hatplot"}
#' \itemize{
#' \item \code{hatvalues.singleRStaticCountData}
#' \item \code{plot.default} -- with \code{x, xlab, ylab, main} parameters fixed.
#' }
#' \item For \code{plotType = "strata"}
#' \itemize{
#' \item \code{stratifyPopsize.singleRStaticCountData}
#' }
#' }
#'
#' @method plot singleRStaticCountData
#' @return No return value only the plot being made.
#' @importFrom stats ppoints qqline qqnorm density dlnorm
#' @importFrom graphics abline barplot hist lines matplot legend boxplot panel.smooth axis text arrows par
#' @seealso [estimatePopsize()] [dfpopsize()] [marginalFreq()] [stats::plot.lm()] [stats::cooks.distance()] [hatvalues.singleRStaticCountData()]
#' @export
plot.singleRStaticCountData <- function(x,
plotType = c("qq", "marginal", "fitresid",
"bootHist", "rootogram", "dfpopContr",
"dfpopBox", "scaleLoc", "cooks",
"hatplot", "strata"),
confIntStrata = c("normal", "logNormal"),
histKernels = TRUE,
dfpop,
...) {
if (missing(plotType) || is.null(plotType)) {
plotType <- eval(formals()$plotType)[1]
} else if (is.numeric(plotType) && length(plotType) == 1) {
# Handle numeric index
valid_plots <- eval(formals()$plotType)
if (plotType > 0 && plotType <= length(valid_plots)) {
plotType <- valid_plots[as.integer(plotType)]
} else {
stop("Numeric plotType must be between 1 and ", length(valid_plots))
}
} else if (is.character(plotType)) {
# Use match.arg for character input validation against the options
plotType <- match.arg(plotType)
} else {
stop("plotType must be a character string, or a numeric index")
}
## sugested by Victoria Wimmer
oldpar <- graphics::par(no.readonly = TRUE)
on.exit(graphics::par(oldpar))
if (isTRUE(plotType == "bootHist") && isTRUE(!is.numeric(x$populationSize$boot))) {
stop("Trying to plot bootstrap results with no bootstrap performed")
}
plotType <- match.arg(plotType)
parNum <- length(x$model$etaNames)
if (parNum == 1)
type <- "pearsonSTD"
else
type <- "pearson"
if (plotType == "fitresid") {
res <- residuals(x, type = "response")[, 1]
} else {
res <- residuals(x, type = type)[, 1]
}
switch(plotType,
qq = {
stats::qqnorm(res);
stats::qqline(res);
},
marginal = {
M <- marginalFreq(x);
FF <- M$table;
FF[names(M$y)] <- M$y;
FF[setdiff(names(M$table), names(M$y))] <- 0;
graphics::matplot(
y = cbind(M$table, FF),
x = 0:max(x$y), type = "o",
col = 1:2, pch = 21:22, lty = 2,
main = "Plot of observed and fitted marginal frequencies",
ylab = "Frequency",
xlab = "Counts",
...);
legend("topright",
legend = c(x$model$family,
"Observed"),
col = 1:2, pch = 21:22)
},
bootHist = {
h <- graphics::hist(
x$populationSize$boot,
ylab = "Number of bootstrap samples",
xlab = expression(hat(N)),
main = "Bootstrap of population size estimates",
...
);
if (isTRUE(histKernels)) {
xxx <- density(x$populationSize$boot, kernel = "epanechnikov");
graphics::lines(
x = xxx$x,
y = dlnorm(
x = xxx$x,
meanlog = log(mean(x$populationSize$boot) /
sqrt(1 + var(x$populationSize$boot) /
mean(x$populationSize$boot) ^ 2)),
sdlog = sqrt(log(1 + var(x$populationSize$boot) / mean(x$populationSize$boot) ^ 2))
) * length(x$populationSize$boot) * diff(h$breaks)[1],
lty = 2,
col = 8
)
graphics::lines(
x = xxx$x,
y = dnorm(
x = xxx$x,
mean = mean(x$populationSize$boot),
sd = sd(x$populationSize$boot)
) * length(x$populationSize$boot) * diff(h$breaks)[1],
lty = 3,
col = 9
)
graphics::lines(
x = xxx$x,
y = xxx$y * length(x$populationSize$boot) * diff(h$breaks)[1],
lty = 4,
col = 10
)
graphics::legend(
"topright",
c("log-normal density", "normal density", "Epanechnikov kernel"),
lty = 2:4,
col = 8:10
)
}
},
rootogram = {
M <- marginalFreq(x);
FF <- M$table;
FF[names(M$y)] <- M$y;
FF[setdiff(names(M$table), names(M$y))] <- 0;
FF <- FF[-1];
bp <- graphics::barplot(
sqrt(FF),
offset = sqrt(M$table[-1]) - sqrt(FF),
ylab = expression(sqrt("Frequency")),
xlab = "captures",
ylim = c(min(sqrt(M$table[-1]) - sqrt(FF)) - 1, max(sqrt(M$table[-1]) + 1)),
...);
graphics::lines(bp, sqrt(M$table[-1]),
type = "o",
pch = 19,
lwd = 2,
col = 2);
graphics::abline(h = 0,
lty = 2)
},
dfpopContr = {
if (missing(dfpop)) dfpop <- dfpopsize(x, ...);
contr <- x$model$pointEst(
pw = x$priorWeights,
eta = x$linearPredictors,
contr = TRUE
);
plot(x = dfpop, y = contr,
main = paste0("Observation deletion effect on point estimate of",
"\npopulation size estimate vs observation contribution"),
xlab = "Deletion effect", ylab = "Observation contribution",
...);
abline(a = 0, b = 1, col = "red")
},
dfpopBox = {
if (missing(dfpop)) dfpop <- dfpopsize(x, ...);
graphics::boxplot(
dfpop,
ylab = "Deletion effect",
main = paste0("Boxplot of observation deletion effect on",
"\npoint estimate of population size estimate"),
...
)
},
scaleLoc = {
if (parNum == 1) {
lp <- x$linearPredictors
if (x$model$family == "zelterman") {
lp <- lp[x$which$reg]
}
plot(y = sqrt(abs(res)), x = lp,
xlab = "Linear predictors",
ylab = expression(sqrt("Std. Pearson resid.")),
main = "Scale-Location plot",
...);
graphics::panel.smooth(y = sqrt(abs(res)), x = lp, iter = 0)
} else {
graphics::par(mfrow = c(parNum, 1));
for (k in 1:parNum) {
plot(y = sqrt(abs(res)), x = x$linearPredictors[, k],
xlab = "Linear predictors",
ylab = expression(sqrt("Pearson resid.")),
main = "Scale-Location plot",
sub = paste0("For linear predictors associated with: ",
x$model$etaNames[k]),
...);
graphics::panel.smooth(y = sqrt(abs(res)),
x = x$linearPredictors[, k],
iter = 0)
}
}
},
fitresid = {
if (parNum == 1) {
lp <- x$linearPredictors
if (x$model$family == "zelterman") {
lp <- lp[x$which$reg]
res <- res[x$which$reg]
}
plot(y = res, x = lp,
xlab = "Linear predictors",
ylab = "Response residuals",
main = "Residuals vs Fitted",
...);
abline(lty = 2, col = "darkgrey", h = 0);
graphics::panel.smooth(y = res, x = lp, iter = 0)
} else {
graphics::par(mfrow = c(parNum, 1));
for (k in 1:parNum) {
plot(y = res, x = x$linearPredictors[, k],
xlab = "Linear predictors",
ylab = "Response residuals",
main = "Residuals vs Fitted",
sub = paste0("For linear predictors associated with: ",
x$model$etaNames[k]),
...);
abline(lty = 2, col = "darkgrey", h = 0);
graphics::panel.smooth(y = res,
x = x$linearPredictors[, k],
iter = 0)
}
}
},
cooks = {
A <- cooks.distance.singleRStaticCountData(x);
plot(A,
main = "Cook's distance",
ylab = "Cook's distance",
xlab = "Observation index",
ylim = c(0, max(A) * 1.1),
...)
},
hatplot = {
A <- hatvalues.singleRStaticCountData(x, ...);
graphics::par(mfrow = c(parNum, 1));
for (k in 1:parNum) {
plot(A[, k],
xlab = "Observation index",
ylab = "Hat values",
main = paste0("For linear predictors associated with: ",
x$model$etaNames[k]),
...)
}
},
strata = {
if (missing(confIntStrata)) confIntStrata <- "logNormal"
result <- stratifyPopsize(x, ...)
est <- result[, 3]
obs <- result[, 2]
nm <- result[, 1]
if (confIntStrata == "logNormal")
cnf <- result[, 8:9]
else
cnf <- result[, 6:7]
tilt <- 0
plot(y = 1:NROW(result), x = est,
xlim = range(cnf),
xlab = "Sub population size estimate", ylab="",
main = paste0(
"Confidence intervals and point estimates for specified sub populations\n",
"Observed population sizes are presented as navy coloured points"
),
yaxt = "n", pch = 19,
...
)
points(y = 1:NROW(result), x = obs, col = "navy", pch = 19)
axis(side = 2, at = 1:NROW(result), labels = FALSE)
text(
y = 1:NROW(result),
x = graphics::par("usr", no.readonly = TRUE)[3] - (range(cnf)[2] - range(cnf)[1]) / 20,
adj = 1,
nm,
srt = tilt,
cex = .6,
xpd = TRUE
)
arrows(cnf[ ,1], 1:NROW(result),
cnf[ ,2], 1:NROW(result),
length = 0.05,
angle = 90,
code = 3)
})
invisible()
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.