Nothing
#' @title Covariance Estimation
#'
#' @description When the variance of a derived statistic (e.g., a difference) is
#' required, the covariance between the two statistics must be
#' calculated. This function uses results generated by various
#' functions (e.g., a \code{\link{lm.sdf}}) to find the covariance
#' between two statistics.
#' @param varEstA a list of two \code{data.frame}s returned by a function after
#' the \code{returnVarEstInputs} argument was turned on.
#' The statistic named in the \code{varA} argument must be
#' present in each \code{data.frame}.
#' @param varEstB a list of two \code{data.frame}s returned by a function after
#' the \code{returnVarEstInputs} argument was turned on.
#' The statistic named in the \code{varA} argument must be
#' present in each \code{data.frame}. When the same as
#' \code{varEstA}, the covariance is within one result.
#' @param varA a character that names the statistic in the \code{varEstA}
#' argument for which a covariance is required
#' @param varB a character that names the statistic in the \code{varEstB}
#' argument for which a covariance is required
#' @param jkSumMultiplier when the jackknife variance estimation method---or
#' balanced repeated replication (BRR)
#' method---multiplies the final jackknife variance estimate by a value,
#' set \code{jkSumMultiplier} to that value.
#' For an \code{edsurvey.data.frame} or
#' a \code{light.edsurvey.data.frame},
#' the recommended value can be recovered with
#' \code{EdSurvey::getAttributes(}\code{myData,} \code{"jkSumMultiplier")}.
#' @param returnComponents set to \code{TRUE} to return the imputation variance seperate from the sampling variance
#'
#' @details
#' These functions are not vectorized, so \code{varA} and
#' \code{varB} must contain exactly one variable name.
#'
#' The method used to compute the covariance is in
#' the vignette titled
#' \href{https://www.air.org/sites/default/files/EdSurvey-Statistics.pdf}{\emph{Statistical Methods Used in EdSurvey}}
#'
#' The method used to compute the degrees of freedom is in the vignette titled
#' \href{https://www.air.org/sites/default/files/EdSurvey-Statistics.pdf}{\emph{Statistical Methods Used in EdSurvey}}
#' in the section \dQuote{Estimation of Degrees of Freedom.}
#'
#' @return
#' a numeric value; the jackknife covariance estimate. If \code{returnComponents} is \code{TRUE}, returns a vector of
#' length three, \code{V} is the variance estimate, \code{Vsamp} is the sampling component of the variance, and \code{Vimp} is the imputation component of the variance
#'
#' @example man/examples/varEstToCov.R
#' @author Paul Bailey
#' @export
varEstToCov <- function(varEstA, varEstB = varEstA, varA, varB = varA, jkSumMultiplier, returnComponents = FALSE) {
if (!varA %in% unique(varEstA$JK$variable)) {
stop(paste0("Could not find ", sQuote("varA"), " value of ", dQuote(varA), " in varEstA$JK$variable. Existing values include ", pasteItems(unique(varEstA$JK$variable)), "."))
}
if (!varB %in% unique(varEstB$JK$variable)) {
stop(paste0("Could not find ", sQuote("varB"), " value of ", dQuote(varB), " in varEstB$JK$variable. Existing values include ", pasteItems(unique(varEstB$JK$variable)), "."))
}
if (!is.null(varEstA$PV) & !is.null(varEstB$PV)) {
if (!varA %in% unique(varEstA$PV$variable)) {
stop(paste0("Could not find ", sQuote("varA"), " value of ", dQuote(varA), " in varEstA$PV$variable. Existing values include ", pasteItems(unique(varEstA$PV$variable)), "."))
}
if (!varB %in% unique(varEstB$PV$variable)) {
stop(paste0("Could not find ", sQuote("varB"), " value of ", dQuote(varB), " in varEstB$PV$variable. Existing values include ", pasteItems(unique(varEstB$PV$variable)), "."))
}
}
mergeVar <- c("PV", "JKreplicate")
if (any(!mergeVar %in% names(varEstA$JK))) {
stop(paste0("Merge vars missing from varEstA$JK ", pasteItems(mergeVar[!mergeVar %in% names(varEstA$JK)]), "."))
}
if (any(!mergeVar %in% names(varEstB$JK))) {
stop(paste0("Merge vars missing from varEstB$JK ", pasteItems(mergeVar[!mergeVar %in% names(varEstB$JK)]), "."))
}
JK <- merge(subset(varEstA$JK, variable == varA),
subset(varEstB$JK, variable == varB),
by = mergeVar, all.x = FALSE, all.y = FALSE,
suffixes = c(".A", ".B")
)
if (nrow(JK) < 1) {
stop("Could not find appropriate values in JK results to calculate covariance.")
}
JK$cov <- jkSumMultiplier * JK$value.A * JK$value.B
JK <- JK[!is.na(JK$cov), ]
CovJK <- sum(JK$cov) / length(unique(JK$PV))
# now get Imputation Covariance
if (!is.null(varEstA$PV) & !is.null(varEstB$PV)) {
mergeVar <- c("PV")
if (sum(!mergeVar %in% names(varEstA$PV)) > 0) {
stop(paste0("Merge vars missing from varEstA$PV ", pasteItems(mergeVar[!mergeVar %in% names(varEstA$PV)]), "."))
}
if (sum(!mergeVar %in% names(varEstB$PV)) > 0) {
stop(paste0("Merge vars missing from varEstB$PV ", pasteItems(mergeVar[!mergeVar %in% names(varEstB$PV)]), "."))
}
PV <- merge(subset(varEstA$PV, variable == varA),
subset(varEstB$PV, variable == varB),
by = mergeVar, all.x = FALSE, all.y = FALSE,
suffixes = c(".A", ".B")
)
if (nrow(PV) < 1) {
stop("Could not find appropriate values in PV results to calculate covariance.")
}
PV$cov <- PV$value.A * PV$value.B
M <- length(unique(PV$PV))
if (M == 1) {
stop("Insufficient number of plausible values in PV results to calculate imputation variance.")
}
# see statistics vignette for source of the scaling factor
CovImp <- (M + 1) / (M * (M - 1)) * sum(PV$cov)
} else { # ends if(!is.null(varEstA$PV) & !is.null(varEstB$PV))
CovImp <- 0
}
if (returnComponents) {
V <- CovJK + CovImp
return(list(V = V, Vsamp = CovJK, Vimp = CovImp))
}
return(CovJK + CovImp)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.