#' @title Compare Expected Number of Species at ModelSites
#' @details Very similar to elpd_compare().
#' Assume each ModelSite with detection parameters are iid samples from a common distribution.
#' In this regime the expected number of species for a ModelSite and the observed number of species, are both random variables.
#'
#' For a correct model and given covariates for a ModelSite, the difference between the observed number of species and expected number of species is a random variable
#' with mean 0, and fixed variance.
#' For a ModelSite drawn from the common distribution, the mean is still 0, and the variance is a random variable.
#' The variance of species numbers for a ModelSite drawn at random can be estimated from
#' the fixed variance computed for many iid ModelSite covariate draws by averaging:
#' \deqn{ V[D] = E[D^2] - E[D]^2 = E[E[D^2|X]] - E[E[D|X]]^2 = E[E[D^2|X]] = E[E[(N - E[N|X])^2|X]] = E[E[N^2|X] - E[N|X]^2] = E[V[N|X]] }
#' The variance of species numbers for a ModelSite drawn at random can also be estimated from many observed ModelSites by averaging:
#' \deqn{ V[D] = V[N - E[N|X]]}
#' The mean difference can also be estimated from observations (and should be zero):
#' \deqn{ 0 = E[D] = E[E[D|X]] ~ \frac{1}{m}\sum_{i = 1}^m d_i }
#'
#' *No bias correction for use of insample data is included.*
#' @return A matrix with a column for the aggregate (summed) difference of model sites between models,
#' and the standard error of this difference (computed as the sample standard deviation of difference, multiplied by the square root of the number of ModelSites)
#' @examples
#' inputdata <- readRDS("./private/data/clean/7_2_10_input_data.rds")
#' observed <- detectednumspec(inputdata$holdoutdata$yXobs[, inputdata$species],
#' inputdata$holdoutdata$yXobs[, "ModelSiteID", drop = TRUE])
#' modelspecs <- readRDS("./tmpdata/7_3_00_modelspecs.rds")
#' filenames <- lapply(modelspecs, function(x) x$filename)
#' a <- vapply(filenames, file.exists, FUN.VALUE = FALSE)
#' fittedmods <- lapply(filenames, function(x) {
#' fit <- readRDS(x)
#' return(fit)})
#' prednumbers_l <- lapply(
#' fittedmods, function(fit) {
#' predsumspecies_newdata(fit,
#' inputdata$holdoutdata$Xocc,
#' inputdata$holdoutdata$yXobs,
#' ModelSiteVars = "ModelSiteID")
#' })
#' predicted <- do.call(cbind, lapply(prednumbers_l, function(x) x["Esum_det", , drop = TRUE]))
#' predictedV <- do.call(cbind, lapply(prednumbers_l, function(x) x["Vsum_det", , drop = TRUE]))
#' Enum_compare(observed, predicted, predictedV)
#' @param observed A list of the number of species observed for each ModelSite
#' @param predicted A dataframe or matrix with each column the expected number of species detected from a single model. Each row is a ModelSite in the same order as [observed].
#' @param predictedV Same as [predicted], but the variance of the number of species detected.
#' @export
Enum_compare <- function(observed, predicted, predictedV){
resids <- drop(observed) - predicted
Eresid_fromobs <- colMeans(resids)
Eresid_frommodel <- Eresid_fromobs * 0
Vresid_fromobs <- apply(resids, MARGIN = 2, var)
Vresid_frommodel <- colMeans(predictedV)
SE_Eresid_obs_frommodel <- sqrt(Vresid_frommodel/length(drop(observed)))
SE_Eresid_obs_fromobs <- sqrt(Vresid_fromobs/length(drop(observed)))
out <- data.frame("E[D]_model" = Eresid_frommodel,
"E[D]_obs" = Eresid_fromobs,
"SE(E[D]_obs)_model" = SE_Eresid_obs_frommodel,
"SE(E[D]_obs)_obs" = SE_Eresid_obs_fromobs,
"V[D]_model" = Vresid_frommodel,
"V[D]_obs" = Vresid_fromobs,
check.names = FALSE
)
return(out)
}
#' @describeIn Enum_compare The coverage of approximate 95% credence interval of each model.
#' @export
Enum_coverage <- function(observed, predicted, predictedV){
resids <- drop(observed) - predicted
in95ci <- abs(resids) < 2 * sqrt(predictedV)
out <- list(
mean = colMeans(in95ci),
in_ci = in95ci
)
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.