#' Quantile-quantile plot and goodness of fit tests for detection functions
#'
#' Constructs a quantile-quantile (Q-Q) plot for fitted model as a graphical
#' check of goodness of fit. Formal goodness of fit testing for detection
#' function models using Kolmogorov-Smirnov and Cramer-von Mises tests. Both
#' tests are based on looking at the quantile-quantile plot produced by
#' \code{\link{qqplot.ddf}} and deviations from the line x=y.
#'
#' The Kolmogorov-Smirnov test asks the question "what's the largest vertical
#' distance between a point and the y=x line?" It uses this distance as a
#' statistic to test the null hypothesis that the samples (EDF and CDF in our
#' case) are from the same distribution (and hence our model fits well). If the
#' deviation between the y=x line and the points is too large we reject the
#' null hypothesis and say the model doesn't have a good fit.
#'
#' Rather than looking at the single biggest difference between the y=x line
#' and the points in the Q-Q plot, we might prefer to think about all the
#' differences between line and points, since there may be many smaller
#' differences that we want to take into account rather than looking for one
#' large deviation. Its null hypothesis is the same, but the statistic it uses
#' is the sum of the deviations from each of the point to the line.
#
#' @section Details:
#' Note that a bootstrap procedure is required to ensure that the p-values from
#' the procedure are correct as the we are comparing the cumulative
#' distribution function (CDF) and empirical distribution function (EDF) and we
#' have estimated the parameters of the detection function.
#'
#' @param model fitted distance detection function model object
#' @param plot the Q-Q plot be plotted or just report statistics?
#' @param nboot number of replicates to use to calculate p-values for the
#' goodness of fit test statistics
#' @param ks perform the Kolmogorov-Smirnov test (this involves many bootstraps
#' so can take a while)
#' @param \dots additional arguments passed to \code{\link{plot}}
#' @export
#' @return A list of goodness of fit related values: \item{edf}{matrix of lower
#' and upper empirical distribution function values} \item{cdf}{fitted
#' cumulative distribution function values} \item{ks}{list with K-S statistic
#' (\code{Dn}) and p-value (\code{p})} \item{CvM}{list with CvM statistic
#' (\code{W}) and p-value (\code{p})}
#' @author Jeff Laake, David L Miller
#' @seealso \code{\link{ddf.gof}}, \code{\link{cdf.ds}}
#' @references Burnham, K.P., S.T. Buckland, J.L. Laake, D.L. Borchers, T.A.
#' Marques, J.R.B. Bishop, and L. Thomas. 2004. Further topics in distance
#' sampling. pp: 385-389. In: Advanced Distance Sampling, eds. S.T. Buckland,
#' D.R.Anderson, K.P. Burnham, J.L. Laake, D.L. Borchers, and L. Thomas.
#' Oxford University Press.
#' @keywords utility
#' @importFrom graphics abline
qqplot.ddf <- function(model, plot=TRUE, nboot=100, ks=FALSE, ...){
# get the edf/cdf values
edf_cdf <- get_edf_cdf(model)
if(plot){
plot(edf_cdf$upper.edf, edf_cdf$cdfvalues,
xlab="Empirical cdf", ylab="Fitted cdf",
xlim=c(0,1), ylim=c(0,1), asp=1, ...)
abline(0, 1, ...)
}
# don't do KS if the model is not "ds"
if(!inherits(model, "ds") & ks){
warning("Can't calculate Kolmogorov-Smirnov p-value for non-\"ds\" models, only calculating Cramer-von Mises")
ks <- FALSE
}
# do the tests
gof_p <- gof.pvalues(model, ks=ks, nboot=nboot)
# build return object
return(list(edf=cbind(edf_cdf$lower.edf, edf_cdf$upper.edf),
cdf=edf_cdf$cdfvalues,
ks=list(Dn=gof_p$Dn, p=gof_p$ks),
CvM=list(W=gof_p$W, p=gof_p$cramer),
nboot = attr(gof_p, "nboot"),
boot_success = attr(gof_p, "boot_success")
)
)
}
get_edf_cdf <- function(model){
fun <- function(x, z, lt){
if(lt){
length(z[z<x])
}else{
length(z[z<=x])
}
}
if(inherits(model, "ds")){
cdfvalues <- sort(cdf.ds(model)$fitted)
}else if(inherits(model, "io") |
inherits(model, "trial") |
inherits(model, "rem") ){
cdfvalues <- sort(cdf.ds(model$ds)$fitted)
}else if(inherits(model, "io.fi")){
data <- model$data
data <- data[data$object %in% as.numeric(names(model$fitted)), ]
n <- length(data$distance)/2
cdfvalues <- rep(0, n)
for(i in 1:n){
newdata <- data[data$object %in% as.numeric(names(model$fitted))[i], ]
cdfvalues[i] <- predict.io.fi(model, newdata=newdata, integrate=TRUE,
int.range=newdata$distance[1])$fitted
if(model$meta.data$left!=0){
widthvalue <- predict.io.fi(model, newdata=newdata, integrate=TRUE,
int.range=model$meta.data$width)$fitted
cdfvalues[i] <- cdfvalues[i]/widthvalue
}
}
if(model$meta.data$left==0){
cdfvalues <- cdfvalues/predict.io.fi(model, integrate=TRUE)$fitted
}
cdfvalues <- sort(cdfvalues)
}else if(inherits(model, "trial.fi")){
data <- model$data
data <- data[data$observer==1&data$object %in%
as.numeric(names(model$fitted)), ]
n <- length(data$distance)
cdfvalues <- rep(0, n)
for(i in 1:n){
newdata <- data[data$object %in% as.numeric(names(model$fitted))[i], ]
cdfvalues[i] <- predict.trial.fi(model, newdata=newdata, integrate=TRUE,
int.range=newdata$distance[1])$fitted
if(model$meta.data$left != 0){
widthvalue <- predict.trial.fi(model, newdata=newdata, integrate=TRUE,
int.range=model$meta.data$width)$fitted
cdfvalues[i] <- cdfvalues[i]/widthvalue
}
}
if(model$meta.data$left==0){
cdfvalues <- cdfvalues/predict.trial.fi(model, newdata=data,
integrate=TRUE)$fitted
}
cdfvalues <- sort(cdfvalues)
}else if(inherits(model, "rem.fi")){
data <- model$data
data <- data[data$object %in% as.numeric(names(model$fitted)),]
n <- length(data$distance)/2
cdfvalues <- rep(0, n)
for(i in 1:n){
newdata <- data[data$object %in% as.numeric(names(model$fitted))[i], ]
cdfvalues[i] <- predict.rem.fi(model, newdata=newdata, integrate=TRUE,
int.range=newdata$distance[1])$fitted
if(model$meta.data$left!=0){
widthvalue <- predict.rem.fi(model, newdata=newdata, integrate=TRUE,
int.range=model$meta.data$width)$fitted
cdfvalues[i] <- cdfvalues[i]/widthvalue
}
}
if(model$meta.data$left==0){
cdfvalues <- cdfvalues/predict.rem.fi(model, newdata=data,
integrate=TRUE)$fitted
}
cdfvalues <- sort(cdfvalues)
}
n <- length(cdfvalues)
lower.edf <- (unlist(sapply(cdfvalues, fun, z=cdfvalues, lt=TRUE)))/n
upper.edf <- (unlist(sapply(cdfvalues, fun, z=cdfvalues, lt=FALSE)))/n
return(list(lower.edf = lower.edf,
upper.edf = upper.edf,
cdfvalues = cdfvalues,
n = n))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.