Nothing
#' Quickly perform a variance estimation in common cases
#'
#' @description \code{qvar} (for "quick variance estimation") is a function
#' performing analytical variance estimation in most common cases, that is:
#' \itemize{\item stratified simple random sampling \item non-response
#' correction (if any) through reweighting \item calibration (if any)}
#'
#' Used with \code{define = TRUE}, it defines a so-called variance wrapper, that
#' is a standalone ready-to-use function that can be applied to the survey dataset
#' without having to specify the methodological characteristics of the survey
#' (see \code{\link{define_variance_wrapper}}).
#'
#' @param data The \code{data.frame} containing all the technical information
#' required to prepare the variance estimation process (see other arguments
#' below). Note that this file should contain all the units sampled,
#' including the out-of-scope and non-responding units. If a variance
#' estimation is to be performed right away (when \code{define = FALSE}),
#' it should also contain the variables of interest.
#' @param ... One or more calls to a statistic wrapper (e.g. \code{total()},
#' \code{mean()}, \code{ratio()}). See examples and
#' \code{\link[=standard_statistic_wrapper]{standard statistic wrappers}}
#' @param where A logical vector indicating a domain on which the variance
#' estimation is to be performed.
#' @param by A qualitative variable whose levels are used to define domains
#' on which the variance estimation is performed.
#' @param alpha A numeric vector of length 1 indicating the threshold
#' for confidence interval derivation (\code{0.05} by default).
#' @param display A logical verctor of length 1 indicating whether
#' the result of the estimation should be displayed or not.
#'
#' @param id The identification variable of the units in \code{data}.
#' It should be unique for each row in \code{data} and not contain any
#' missing values.
#' @param dissemination_dummy A character vector of length 1, the name
#' of the logical variable in \code{data} indicating whether the unit
#' does appear in the disseminated file and should be used for point
#' estimates. It should not contain any missing values.
#' @param dissemination_weight A character vector of length 1, the name
#' of the numerical variable in \code{data} corresponding to the
#' dissemination weight of the survey. It should not contain any missing
#' values.
#'
#' @param sampling_weight A character vector of length 1, the name of the
#' numeric variable in \code{data} corresponding to the sampling weights
#' of the survey. It should not contain any missing values.
#' @param strata A character vector of length 1, the name of the factor
#' variable in \code{data} whose level match the stratification
#' used in the survey. Character variables are coerced to factor.
#' If defined, it should not contain any missing value. If \code{NULL},
#' the variance estimation process does not take any stratification
#' into account.
#'
#' @param scope_dummy A character vector of length 1, the name of the logical
#' variable in \code{data} indicating whether the unit belongs to the
#' scope of the survey or not. Numerical variables are coerced to logical.
#' If defined, it should not contain any missing value. If \code{NULL},
#' all units are supposed to be within the scope of the survey.
#'
#' @param nrc_weight A character vector of length 1, the name of the
#' numerical variable in \code{data} corresponding to the weights
#' after non-response correction. If defined, all responding units
#' should have a non-missing value. If \code{NULL}, all
#' units are supposed to be responding and the variance estimation
#' process does not include a second phase in order to take non-response
#' into account.
#' @param response_dummy A character vector of length 1, the name of of the logical
#' variable in \code{data} indicating whether the unit is a responding
#' unit or not. Numerical variables are coerced to logical. \code{response_dummy}
#' should be defined as long as a \code{nrc_weight} is provided. All units
#' in the scope of the survey should have a non-missing value.
#' @param nrc_dummy A character vector of length 1, the name of
#' the logical variable in \code{data} indicating whether the
#' units did take part in the non-response correction process.
#' All units in the scope of the survey should have a non-missing
#' value.
#'
#' @param calibration_weight A character vector of length 1, the name of the
#' numerical variable in \code{data} corresponding to the calibrated
#' weights. If defined, all responding units should have
#' a non-missing value. If \code{NULL}, the variance estimation
#' process does not take any calibration step into account.
#' @param calibration_dummy A character vector of length 1, the name of of the logical
#' variable in \code{data} indicating whether the unit did take part
#' in the calibration process or not. Numerical variables are coerced to
#' logical. If defined, all responding units should have a non-missing
#' value. If \code{NULL}, calibration is supposed to have been performed
#' on all responding units.
#' @param calibration_var A character vector, the name of the variable(s) used in
#' the calibration process. Logical variables are coerced to numeric.
#' Character and factor variables are automatically discretized.
#' \code{calibration_var} should be defined as long as a \code{calibration_weight} is
#' provided. All units taking part in the calibration process should have
#' only non-missing values for all variables in \code{calibration_var}.
#'
#' @param define Logical vector of lentgh 1. Should a variance wrapper
#' be defined instead of performing a variance estimation (see details
#' and examples)?
#' @param envir An environment containing a binding to \code{data}.
#'
#' @details \code{qvar} performs not only technical but also
#' methodological checks in order to ensure that the standard variance
#' estimation methodology does apply (e.g. equal probability of
#' inclusion within strata, number of units per stratum).
#'
#' Used with \code{define = TRUE}, the function returns a variance
#' estimation \emph{wrapper}, that is a ready-to-use function that
#' implements the described variance estimation methodology and
#' contains all necessary data to do so (see examples).
#'
#' Note: To some extent, \code{qvar} is analogous to the \code{qplot} function
#' in the ggplot2 package, as it is an easier-to-use function for common
#' cases. More complex cases are to be handled by using the core functions of
#' the gustave package, e.g. \code{\link{define_variance_wrapper}}.
#'
#' @seealso \code{\link{define_variance_wrapper}}, \code{\link{standard_statistic_wrapper}}
#'
#' @examples ### Example from the Information and communication technologies (ICT) survey
#'
#' # The (simulated) Information and communication technologies (ICT) survey
#' # has the following characteristics:
#' # - stratified one-stage sampling design
#' # - non-response correction through reweighting in homogeneous response groups
#' # - calibration on margins.
#'
#' # The ict_survey data.frame is a (simulated) subset of the ICT
#' # survey file containing the variables of interest for the 612
#' # responding firms.
#'
#' # The ict_sample data.frame is the (simulated) sample of 650
#' # firms corresponding to the ict_survey file. It contains all
#' # technical information necessary to estimate a variance with
#' # the qvar() function.
#'
#' ## Methodological description of the survey
#'
#' # Direct call of qvar()
#' qvar(
#'
#' # Sample file
#' data = ict_sample,
#'
#' # Dissemination and identification information
#' dissemination_dummy = "dissemination",
#' dissemination_weight = "w_calib",
#' id = "firm_id",
#'
#' # Scope
#' scope_dummy = "scope",
#'
#' # Sampling design
#' sampling_weight = "w_sample",
#' strata = "strata",
#'
#' # Non-response correction
#' nrc_weight = "w_nrc",
#' response_dummy = "resp",
#' hrg = "hrg",
#'
#' # Calibration
#' calibration_weight = "w_calib",
#' calibration_var = c(paste0("N_", 58:63), paste0("turnover_", 58:63)),
#'
#' # Statistic(s) and variable(s) of interest
#' mean(employees)
#'
#' )
#'
#' # Definition of a variance estimation wrapper
#' precision_ict <- qvar(
#'
#' # As before
#' data = ict_sample,
#' dissemination_dummy = "dissemination",
#' dissemination_weight = "w_calib",
#' id = "firm_id",
#' scope_dummy = "scope",
#' sampling_weight = "w_sample",
#' strata = "strata",
#' nrc_weight = "w_nrc",
#' response_dummy = "resp",
#' hrg = "hrg",
#' calibration_weight = "w_calib",
#' calibration_var = c(paste0("N_", 58:63), paste0("turnover_", 58:63)),
#'
#' # Replacing the variables of interest by define = TRUE
#' define = TRUE
#'
#' )
#'
#' # Use of the variance estimation wrapper
#' precision_ict(ict_sample, mean(employees))
#'
#' # The variance estimation wrapper can also be used on the survey file
#' precision_ict(ict_survey, mean(speed_quanti))
#'
#' ## Features of the variance estimation wrapper
#'
#' # Several statistics in one call (with optional labels)
#' precision_ict(ict_survey,
#' "Mean internet speed in Mbps" = mean(speed_quanti),
#' "Turnover per employee" = ratio(turnover, employees)
#' )
#'
#' # Domain estimation with where and by arguments
#' precision_ict(ict_survey,
#' mean(speed_quanti),
#' where = employees >= 50
#' )
#' precision_ict(ict_survey,
#' mean(speed_quanti),
#' by = division
#' )
#'
#' # Domain may differ from one estimator to another
#' precision_ict(ict_survey,
#' "Mean turnover, firms with 50 employees or more" = mean(turnover, where = employees >= 50),
#' "Mean turnover, firms with 100 employees or more" = mean(turnover, where = employees >= 100)
#' )
#'
#' # On-the-fly evaluation (e.g. discretization)
#' precision_ict(ict_survey, mean(speed_quanti > 100))
#'
#' # Automatic discretization for qualitative (character or factor) variables
#' precision_ict(ict_survey, mean(speed_quali))
#'
#' # Standard evaluation capabilities
#' variables_of_interest <- c("speed_quanti", "speed_quali")
#' precision_ict(ict_survey, mean(variables_of_interest))
#'
#' # Integration with %>% and dplyr
#' library(magrittr)
#' library(dplyr)
#' ict_survey %>%
#' precision_ict("Internet speed above 100 Mbps" = mean(speed_quanti > 100)) %>%
#' select(label, est, lower, upper)
#'
#' @export
qvar <- function(data, ..., by = NULL, where = NULL,
alpha = 0.05, display = TRUE,
id, dissemination_dummy, dissemination_weight,
sampling_weight, strata = NULL,
scope_dummy = NULL,
nrc_weight = NULL, response_dummy = NULL, nrc_dummy = NULL,
calibration_weight = NULL, calibration_dummy = NULL, calibration_var = NULL,
define = FALSE, envir = parent.frame()
# TODO: Add objects_to_include
){
# Step 1: Define the variance wrapper
call <- as.list(match.call())[-1]
call$envir <- envir
qvar_variance_wrapper <- do.call(
define_qvar_variance_wrapper,
call[names(call) %in% names(formals(define_qvar_variance_wrapper))]
)
# Step 2: Export the variance wrapper
if(define){
note("As define = TRUE, a ready-to-use variance wrapper is (invisibly) returned.")
return(invisible(qvar_variance_wrapper))
}
# Step 3: Estimate variance
qvar_data <- data[data[, id] %in% environment(qvar_variance_wrapper)$reference_id, ]
call$data <- substitute(qvar_data)
call$envir <- environment()
do.call(
qvar_variance_wrapper,
call[names(call) == "" | names(call) %in% names(formals(qvar_variance_wrapper))]
)
}
# Unexported (and undocumented) functions
define_qvar_variance_wrapper <- function(data, id, dissemination_dummy, dissemination_weight,
sampling_weight, strata = NULL,
scope_dummy = NULL,
nrc_weight = NULL, response_dummy = NULL, nrc_dummy = NULL,
calibration_weight = NULL, calibration_var = NULL, calibration_dummy = NULL,
envir = parent.frame()
){
# Step 1: Control arguments consistency and display the welcome message ----
# Step 1.1: Arguments consistency
is_missing <- c(
data = missing(data),
id = missing(id),
dissemination_dummy = missing(dissemination_dummy),
dissemination_weight = missing(dissemination_weight),
sampling_weight = missing(sampling_weight)
)
if(any(is_missing)) stop(
"The following arguments are missing: ",
paste(names(which(is_missing)), collapse = ", "), "."
)
inconsistency <- list(
nrc_weight_but_no_response_dummy = !is.null(nrc_weight) && is.null(response_dummy),
resp_or_nrc_dummy_but_no_nrc_weight = is.null(nrc_weight) && (!is.null(response_dummy) || !is.null(nrc_dummy)),
calibration_weight_but_no_calibration_var = !is.null(calibration_weight) && is.null(calibration_var),
calibration_or_calibration_var_but_no_calibration_weight = is.null(calibration_weight) && (!is.null(calibration_dummy) || !is.null(calibration_var))
)
if(any(unlist(inconsistency))) stop(
"Some arguments are inconsistent:",
if(inconsistency$nrc_weight_but_no_response_dummy)
"\n - weights after non-response correction are provided (nrc_weight argument) but no variable indicating responding units (response_dummy argument)" else "",
if(inconsistency$resp_or_nrc_dummy_but_no_nrc_weight)
"\n - a variable indicating responding units and/or a variable indicating the units taking part in the non-response correction process are provided (response_dummy and nrc_dummy argument) but no weights after non-response correction (nrc_weight argument)." else "" ,
if(inconsistency$calibration_weight_but_no_calibration_var)
"\n - calibrated weights are provided (calibration_weight argument) but no calibration variables (calibration_var argument)" else "" ,
if(inconsistency$calibration_or_calibration_var_but_no_calibration_weight)
"\n - a variable indicating the units taking part in a calibration process and/or calibration variables are provided (calibration_dummy and calibration_var arguments) but no calibrated weights (calibration_weight argument)" else ""
)
# Step 1.2: Welcome message
message(
"Survey variance estimation with the gustave package",
"\n\nThe following features are taken into account:",
if(!is.null(strata)) "\n - stratified simple random sampling" else
"\n - simple random sampling WITHOUT stratification",
if(!is.null(scope_dummy)) "\n - out-of-scope units" else "",
if(!is.null(nrc_weight)) "\n - non-response correction through reweighting" else "",
if(!is.null(calibration_weight)) "\n - calibration on margins" else "",
"\n"
)
# Step 2: Control that arguments do exist and retrieve their value ----
# Step 2.1: Evaluation of all arguments
deparse_data <- deparse(substitute(data))
data <- eval(substitute(data), envir = envir)
if(!is.data.frame(data)) stop("data argument must refer to a data.frame")
arg <- lapply(as.list(match.call())[-1], eval)
# Step 2.2: Expected length
should_be_single_variable_name <- intersect(c(
"id", "dissemination_dummy", "dissemination_weight",
"sampling_weight", "strata", "scope_dummy",
"nrc_weight", "response_dummy", "nrc_dummy",
"calibration_weight", "calibration_dummy"
), names(arg))
should_be_variable_name_vector <- intersect(c("calibration_var"), names(arg))
should_be_variable_name <- c(should_be_single_variable_name, should_be_variable_name_vector)
# Step 2.3: Check whether arguments are character vectors and
# have the expected length
is_single_variable_name <- sapply(
arg[should_be_single_variable_name],
function(arg) is.null(arg) || is_variable_name(arg, max_length = 1)
)
if(any(!is_single_variable_name)) stop(
"The following arguments do not refer to a variable name (character vector of length 1): ",
names(is_single_variable_name)[!is_single_variable_name]
)
is_variable_name_vector <- sapply(
arg[should_be_variable_name_vector],
function(arg) is.null(arg) || is_variable_name(arg, max_length = Inf)
)
if(any(!is_variable_name_vector)) stop(
"The following arguments do not refer to a vector of variable names: ",
names(is_variable_name_vector)[!is_variable_name_vector]
)
# Step 2.4: Check the presence of the variables in data
is_not_in_data <- lapply(should_be_variable_name, function(param){
tmp <- variable_not_in_data(var = arg[[param]], data = data)
if(is.null(tmp)) return(NULL)
paste0("\n - ", param, " argument: ", paste0(tmp, collapse = " "))
})
if(length(unlist(is_not_in_data)) > 0) stop(
"Some variables do not exist in ", deparse_data, ": ",
unlist(is_not_in_data[!is.null(is_not_in_data)])
)
# Step 2.5: Retrieve the value of the arguments
data <- data[order(data[[arg$id]]), ]
list2env(c(
lapply(
arg[should_be_single_variable_name], function(param)
stats::setNames(data[[param]], data[[arg$id]])
),
lapply(arg[should_be_variable_name_vector], function(param){
tmp <- data[param]
row.names(tmp) <- data[[arg$id]]
tmp
})
), envir = environment())
# Step 3: Control arguments value ----
# id
if(anyNA(id))
stop("The id variable (", arg$id, ") should not contain any missing (NA) values.")
if(any(duplicated(id)))
stop("The id variable (", arg$id, ") should not contain any duplicated values.")
# dissemination_dummy
if(is.numeric(dissemination_dummy)){
note("The dissemination dummy variable (", arg$dissemination_dummy, ") is of type numeric. It is automatically coerced to logical.")
# TODO: Check whether there is only 1/0 values and do it for all dummies (check_dummy() function)
dissemination_dummy <- as.logical(dissemination_dummy)
}
if(!is.logical(dissemination_dummy))
stop("The dissemination dummy variable (", arg$dissemination_dummy, ") should be of type logical or numeric.")
if(anyNA(dissemination_dummy))
stop("The dissemination dummy variable (", arg$dissemination_dummy, ") should not contain any missing (NA) values.")
# dissemination_weight
if(!is.numeric(dissemination_weight))
stop("The dissemination weights (", arg$dissemination_weight, ") should be numeric.")
if(anyNA(dissemination_weight[dissemination_dummy])) stop(
"The dissemination weights (", arg$dissemination_weight, ") should not contain ",
"any missing (NA) values for disseminated units (", arg$dissemination_dummy, ")."
)
# sampling_weight
if(!is.numeric(sampling_weight))
stop("The sampling weights (", arg$sampling_weight, ") should be numeric.")
if(anyNA(sampling_weight))
stop("The sampling weights (", arg$sampling_weight, ") should not contain any missing (NA) values.")
# strata
if(is.null(strata)) strata <- stats::setNames(factor(rep("1", length(id))), id)
if(!is.null(strata)){
if(is.character(strata)){
note("The strata variable (", arg$strata, ") is of type character. It is automatically coerced to factor.")
strata <- factor(strata)
}
if(!is.factor(strata))
stop("The strata variable (", arg$strata, ") should be of type factor or character.")
if(anyNA(strata))
stop("The strata variable (", arg$strata, ") should not contain any missing (NA) values.")
}
# scope_dummy
if(is.null(scope_dummy)) scope_dummy <- rep(TRUE, length(id)) else{
if(is.numeric(scope_dummy)){
note("The scope dummy variable (", arg$scope_dummy, ") is of type numeric. It is automatically coerced to logical.")
scope_dummy <- as.logical(scope_dummy)
}
if(!is.logical(scope_dummy))
stop("The scope dummy variable (", arg$scope_dummy, ") should be of type logical or numeric.")
if(anyNA(scope_dummy))
stop("The scope dummy variable (", arg$scope_dummy, ") should not contain any missing (NA) values.")
disseminated_out_of_scope <- id[dissemination_dummy & !scope_dummy]
if(length(disseminated_out_of_scope) > 0) stop(
"The following units are out-of-scope (", arg$scope_dummy, ") but nonetheless disseminated (",
arg$dissemination_dummy, "): ", display_only_n_first(disseminated_out_of_scope), "."
)
}
# response_dummy
if(is.null(response_dummy)) response_dummy <- scope_dummy else{
if(is.numeric(response_dummy)){
note("The response dummy variable (", arg$response_dummy, ") is of type numeric. It is automatically coerced to logical.")
response_dummy <- as.logical(response_dummy)
}
if(!is.logical(response_dummy))
stop("The response dummy variable (", arg$response_dummy, ") should be of type logical or numeric.")
if(anyNA(response_dummy))
stop("The response dummy variable (", arg$response_dummy, ") should not contain any missing (NA) values.")
}
# nrc_dummy
if(is.null(nrc_dummy)) nrc_dummy <- scope_dummy else{
if(is.numeric(nrc_dummy)){
note("The non-reponse correction dummy variable (", arg$nrc_dummy, ") is of type numeric. It is automatically coerced to logical.")
nrc_dummy <- as.logical(nrc_dummy)
}
if(!is.logical(nrc_dummy))
stop("The non-reponse correction dummy variable (", arg$nrc_dummy, ") should be of type logical or numeric.")
if(anyNA(nrc_dummy))
stop("The non-reponse correction dummy variable (", arg$nrc_dummy, ") should not contain any missing (NA) values.")
}
# nrc_weight
if(!is.null(nrc_weight)){
if(!is.numeric(nrc_weight))
stop("The weights after non-response correction (", arg$nrc_weight, ") should be numeric.")
if(anyNA(nrc_weight[response_dummy %in% TRUE & nrc_dummy %in% TRUE])) stop(
"The weights after non-response correction (", arg$nrc_weight, ") should not contain any missing (NA) values ",
"for responding units (", arg$response_dummy, ") having taken part in the non-reponse correction process (", arg$nrc_dummy, ")."
)
}
# calibration_dummy
if(is.null(calibration_dummy) && !is.null(calibration_weight)) calibration_dummy <- response_dummy
if(!is.null(calibration_dummy)){
if(is.numeric(calibration_dummy)){
note("The dummy variable indicating the units used in the calibation process (", arg$calibration_dummy, ") is of type numeric. It is automatically coerced to logical.")
calibration_dummy <- as.logical(calibration_dummy)
}
if(!is.logical(calibration_dummy))
stop("The dummy variable indicating the units used in the calibation process (", arg$calibration_dummy, ") should be of type logical or numeric.")
if(anyNA(calibration_dummy))
stop("The dummy variable indicating the units used in the calibation process (", arg$calibration_dummy, ") should not contain any missing (NA) values.")
}
# calibration_weight
if(!is.null(calibration_weight)){
if(!is.numeric(calibration_weight))
stop("The weights after calibration (", arg$calibration_weight, ") should be numeric.")
if(anyNA(calibration_weight[calibration_dummy %in% TRUE]))
stop("The weights after calibration (", arg$calibration_weight, ") should not contain any missing (NA) values for units used in the calibration process.")
}
# calibration_var
if(!is.null(calibration_var)){
calibration_var_quanti <- names(which(sapply(calibration_var, function(var) is.numeric(var) || is.logical(var))))
calibration_var_quali <- names(which(sapply(calibration_var, function(var) is.factor(var) || is.character(var))))
calibration_var_pb_type <- setdiff(arg$calibration_var, c(calibration_var_quanti, calibration_var_quali))
if(length(calibration_var_pb_type) > 0) stop(
"The following calibration variables are neither quantitative (numeric, logical) nor qualitative (factor, character): ",
display_only_n_first(calibration_var_pb_type), "."
)
if(length(calibration_var_quali) > 0) note(
"Note: The following calibration variables are qualitative (factor, character): ",
display_only_n_first(calibration_var_quali), ". They will be automatically discretized."
)
calibration_var_pb_NA <- names(which(sapply(calibration_var, function(var) anyNA(var[calibration_dummy %in% TRUE]))))
if(length(calibration_var_pb_NA) > 0) stop(
"The following calibration variables contain missing (NA) values for units used in the calibration process: ",
display_only_n_first(calibration_var_pb_NA, collapse = " "), "."
)
}
# Step 4: Define methodological quantities ----
samp_exclude <- stats::setNames(rep(FALSE, length(id)), id)
# Logical controls
inconsistency <- list(
out_of_scope_and_responding = id[!scope_dummy & response_dummy]
)
if(any(sapply(inconsistency, length) > 0)) stop(
"Some arguments are inconsistent:",
if(length(inconsistency$out_of_scope_and_responding) > 0) paste0(
"\n - the following units are classified both as out-of-scope units (",
arg$scope_dummy, " variable) and as responding units (", arg$response_dummy,
" variable): ", display_only_n_first(inconsistency$out_of_scope_and_responding), "."
)
)
# Exclude strata with only one sampled unit
strata_with_one_sampled_unit <-
names(which(tapply(id[!samp_exclude], strata[!samp_exclude], length) == 1))
if(length(strata_with_one_sampled_unit) > 0){
warn(
"The following strata contain less than two sampled units: ",
display_only_n_first(strata_with_one_sampled_unit), ". ",
"They are excluded from the variance estimation process (but kept for point estimates)."
)
samp_exclude <- samp_exclude | as.character(strata) %in% strata_with_one_sampled_unit
}
# Enforce equal probabilities in each stratum
sampling_weight_equal <- sampling_weight
strata_with_unequal_sampling_weight <-
names(which(tapply(sampling_weight_equal[!samp_exclude], strata[!samp_exclude], stats::sd) > 1e-6))
if(length(strata_with_unequal_sampling_weight) > 0){
# TODO: Enhance warning message when strata = NULL
warn(
"The following strata contain units whose sampling weights are not exactly equal: ",
display_only_n_first(strata_with_unequal_sampling_weight), ". ",
"The mean weight per stratum is used instead."
)
sampling_weight_equal[!samp_exclude] <-
tapply(sampling_weight_equal, strata, base::mean)[as.character(strata[!samp_exclude])]
}
# Reference id and reference weight
guessed_weight <- sampling_weight
if(!is.null(nrc_weight)) guessed_weight[response_dummy & nrc_dummy] <- nrc_weight[response_dummy & nrc_dummy]
if(!is.null(calibration_weight)) guessed_weight[calibration_dummy] <- calibration_weight[calibration_dummy]
guessed_weight_not_matching_dissemination_weight <- id[dissemination_dummy & guessed_weight != dissemination_weight]
if(length(guessed_weight_not_matching_dissemination_weight)) stop(
"The following units have a disseminated weight (", arg$dissemination_weight,
") that does not match the one guessed from the survey description: ",
display_only_n_first(guessed_weight_not_matching_dissemination_weight), "."
)
reference_id <- id[dissemination_dummy]
reference_weight <- dissemination_weight[dissemination_dummy]
# Sampling
samp <- list()
samp$id <- id
samp$exclude <- samp_exclude[samp$id]
samp$weight <- sampling_weight_equal[samp$id]
samp$strata <- strata[samp$id]
samp$precalc <- suppressMessages(with(samp, var_srs(
y = NULL, pik = 1 / weight[!exclude], strata = strata[!exclude]
)))
samp <- samp[c("id", "exclude", "precalc")]
# Non-reponse
if(!is.null(nrc_weight)){
nrc <- list()
nrc$id <- id[response_dummy]
nrc$sampling_weight <- sampling_weight[nrc$id]
nrc$response_prob <- (sampling_weight / nrc_weight)[nrc$id]
}else nrc <- NULL
# Calibration
if(!is.null(calibration_weight)){
calib <- list()
calib$id <- id[response_dummy & calibration_dummy]
calib$weight <- calibration_weight[calib$id]
calib$var <- calibration_var[calib$id, , drop = FALSE]
calib$var[calibration_var_quanti] <-
lapply(calib$var[calibration_var_quanti], Matrix)
calib$var[calibration_var_quali] <-
lapply(calib$var[calibration_var_quali], discretize_qualitative_var)
calib$var <- do.call(cbind, calib$var)
# TODO: Handle the node stack overflow problem
calib$precalc <- res_cal(y = NULL, x = calib$var, w = calib$weight)
calib <- calib[c("id", "precalc")]
}else calib <- NULL
# Step 5: Define the variance wrapper ----
qvar_variance_wrapper <- define_variance_wrapper(
variance_function = qvar_variance_function,
reference_id = reference_id,
reference_weight = reference_weight,
default_id = arg$id,
technical_data = list(samp = samp, nrc = nrc, calib = calib)
)
qvar_variance_wrapper
}
qvar_variance_function <- function(y, samp, nrc, calib){
var <- list()
# Calibration
if(!is.null(calib)){
y <- add_zero(y, calib$id, remove = FALSE)
y[calib$id, ] <- res_cal(y = y[calib$id, , drop = FALSE], precalc = calib$precalc)
}
# Non-response
if(!is.null(nrc)){
var[["nr"]] <- var_pois(y[nrc$id, , drop = FALSE], pik = nrc$response_prob, w = nrc$sampling_weight)
y[nrc$id, ] <- as.matrix(Diagonal(x = 1 / nrc$response_prob) %*% y[nrc$id, , drop = FALSE])
}
# Sampling
y <- add_zero(y, rownames = samp$id)
var[["sampling"]] <- var_srs(y = y[!samp$exclude, , drop = FALSE], precalc = samp$precalc)
# Final summation
Reduce(`+`, var)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.