#' The function returns a dataframe with planned and actual coefficients of variation (CV) in a multivariate multi-domain allocation problem.
#'
#' @param stratif A dataframe with strata information.
#' @param errors: A dataframe with planned CV.
#' @param alloc: A vector with an allocation into strata.
#' @param minnumstrat: Optional. A number indicating the lower bound for the number of allocated units in each stratum (default is 2).
#' @return A dataframe with planned and actual CV for each combination of domain, domain category and auxiliary variable.
beat.1cv <- function(stratif, errors,alloc, minnumstrat = 2)
{
# Begin body
# First input data frame
colnames(stratif) <- toupper(colnames(stratif))
# Second input data frame
colnames(errors) <- toupper(colnames(errors))
#
#check inputs:
# functions
checkData <- function(strata = NULL, errors = NULL) {
# controls on strata dataframe
if (!is.null(strata)) {
if (sum(grepl("N", colnames(strata))) < 1)
stop("In strata dataframe the indication of population (N) is missing")
if (sum(grepl("STRAT", toupper(colnames(strata)), fixed = TRUE)) <
1)
stop("In strata dataframe the indication of stratum (STRATUM) is missing")
if (sum(grepl("DOM1", toupper(colnames(strata)), fixed = TRUE)) <
1)
stop("In strata dataframe the indication of at least Ine domain (DOM1) is missing")
if(sum(grepl('CENS',toupper(colnames(strata)),fixed=TRUE))
< 1) stop('In strata dataframe the indication of strata
to be sampled or censused (CENS) is missing')
if(sum(grepl('COST',toupper(colnames(strata)),fixed=TRUE))
< 1) stop('In strata dataframe the indication of
interviewing cost in strata (COST) is missing')
if (sum(grepl("M1", toupper(colnames(strata)), fixed = TRUE)) < 1)
stop("In strata dataframe the indication of at least one mean (M1) is missing")
if (sum(grepl("S1", toupper(colnames(strata)), fixed = TRUE)) < 1)
stop("In strata dataframe the indication of at least one standard deviation (S1) is missing")
if(sum(grepl('^M+[0123456789]',toupper(colnames(strata)),perl=TRUE))!= sum(grepl('^S+[0123456789]',toupper(colnames(strata)),perl=TRUE)))
stop('In strata dataframe the number of means (Mx)
differs from the number of standard deviations (Sx)')
}
# controls on errors dataframe
if (!is.null(errors)) {
if (sum(grepl("DOM", toupper(colnames(errors)), fixed = TRUE)) <
1)
stop("In errors dataframe the indication of domain (DOM) is missing")
if (sum(grepl("CV", toupper(colnames(errors)), fixed = TRUE)) <
1)
stop("In errors dataframe the indication of at least one constraint (CV) is missing")
}
# crossed controls between errors and strata
if (!is.null(errors) && !is.null(strata)) {
if
(sum(grepl('^S+[0123456789]',toupper(colnames(strata)),perl=TRUE)) != sum(grepl('CV',toupper(colnames(errors)),fixed=TRUE)))
stop('In strata dataframe the number of means and std deviations differs from the number of coefficient of
variations in errors dataframe')
if (sum(grepl("DOM", toupper(colnames(strata)), fixed = TRUE)) != nrow(errors))
stop("The different domains (DOMx) in strata dataframe are not represented in errors dataframe")
}
}
# check n<minnumstr
check_n <- function(allocation=NULL, strata=NULL, minnumstrat) {
if (!is.null(allocation)) {
if(any(allocation<minnumstrat & strata$N>minnumstrat)){
error_msg=paste(c(paste("The allocated sample size is lower than the required minimum number of sampled unit per stratum (minnumstrat=", minnumstrat,"), even if the stratum size allows to select a higher size. This happens in the following strata:", sep=""), paste(strata$STRATUM[which(allocation<minnumstrat & strata$N>minnumstrat)], collapse=";")), collapse=" ")
stop(error_msg)
}
if(any(strata$CENS==1 & strata$N!=allocation)){
stop(paste(c(paste("The following stratum(a) must be censused:"), paste(strata$STRATUM[which(strata$CENS==1 & strata$N!=allocation)], collapse="; ")), collapse=" "))
}
}
}
#
checkData(strata = stratif, errors = errors)
#
check_n(allocation=alloc, strata=stratif, minnumstrat)
#
ordina_variabili <- function(dati, prefisso, n_var) {
#sort df columns ,
#s.t. they are sorted as follows: prefisso1,...prefisson_var
if (!is.data.frame(dati))
stop()
as.matrix(dati[, paste(prefisso, 1:n_var, sep = ""),
drop = FALSE])
# as.data.frame(dati[, paste(prefisso, 1:n_var, sep = ""),
# drop = FALSE])
}
ordina_variabili_dom <- function(dati, prefisso, n_var) {
#sort df columns ,
#s.t. they are sorted as follows: prefisso1,...prefisson_var
if (!is.data.frame(dati))
stop()
as.data.frame(dati[, paste(prefisso, 1:n_var, sep = ""),
drop = FALSE])
}
#
# --------------------------------------------------------------
# Initialization of parameters. Attribution of initial
# values
# ------------------------------------------------------------
val <- NULL #to store number of categories of each domain
m <- NULL #to store the means
s <- NULL #to store the standard deviation
cv <- NULL #to store the cv
nstrat <- nrow(stratif)
nvar <- length(grep("CV", names(errors)))
ndom <- nrow(errors)
# ---------------------------------------------------------
# Initial Data Structures - selection from input variables
# ---------------------------------------------------------
# means
med <- ordina_variabili(stratif, "M", nvar)
# variances estimates
esse <- ordina_variabili(stratif, "S", nvar)
#check
if (ncol(med) != ncol(esse))
stop(print("Error: Number of M variables don't match the number of S variables"))
if (ncol(med) != nvar)
stop(print("Error: Number of variables don't match the number of planned CV"))
# populations
N <- as.vector(stratif$N)
# vector cens
cens <- as.vector(stratif$CENS)
# strata with N < minnumstrata are set to take-all strata
cens[N < minnumstrat] <- 1
# vector cost
cost <- as.vector(stratif$COST)
# check and default for cost and cens
if (is.null(cost))
cost <- rep(1, nstrat)
if (is.null(cens))
cens <- rep(0, nstrat)
nocens <- 1 - cens
# check variable cens
if (sum(cens) == length(cens)) {
warning(print("Warning: Variable CENS always equal 1"))
}
# domains
name_dom <- sapply(1:ndom, function(i) paste("DOM", i, sep = ""))
dom <- ordina_variabili_dom(stratif, "DOM", ndom)
# numbers of different domains of interest (numbers of
# modalities/categories for each type of domain) if (ndom
# ==1) (nvalues<-nlevels((as.factor(stratif$DOM1)))) if
# (ndom>1) {nvalues<-sapply(nom_dom, function(vari)
# {val<-c(val, nlevels(as.factor(dom[,vari])))})}
nvalues <- sapply(name_dom, function(vari) {
val <- c(val, nlevels(as.factor(dom[, vari])))
})
# -------------------------------------------------
# disjunctive matrix
# -------------------------------------------------
crea_disj <- function(data, vars) {
out <- NULL
sapply(vars, function(vari) {
col <- as.factor(data[, vari])
out <<- cbind(out, outer(col, levels(col),
function(y,x)
{
ifelse(y == x, 1, 0)
}))
})
out
}
disj <- crea_disj(stratif, name_dom)
# -------------------------------------------------
# Building means (m) and deviations matrices (s) for
# different domains of interest
# -------------------------------------------------
#(m) and (s)
nc <- ncol(disj)
for (i in 1:nc) {
m <- cbind(m, disj[, i] * med)
s <- cbind(s, disj[, i] * esse)
}
#
# -------------------------------------------------------------
# computation of the coefficients of variation CVs for
# different domains of interest
# -------------------------------------------------------------
cvDom <- NULL #Per stampa
cvDom2 <- NULL #per stampa
for (k in 1:ndom) {
cvx <- ordina_variabili(errors[k, ], "CV", nvar)
ndomvalues <- c(1:nvalues[k])
for (k1 in ndomvalues) {
cv <- cbind(cv, cvx)
cvDom <- c(cvDom, rep(name_dom[k], length(cvx)))
cvDom2 <- c(cvDom2, rep(levels(as.factor(dom[, k]))[k1],
length(cvx)))
}
}
calcola_cv <- function() {
NTOT <- c(rep(0, nvar))
CVfin <- c(rep(0, nvar))
#
# -------------------------------------------------------------
# Populations in strata for different domains of interest
# NTOTj
# -------------------------------------------------------------
NTOT <- colSums((m > 0) * N)
#
# ------------------------------------------------------------
# Computation of the CVs
# ------------------------------------------------------------
varfin <- rowSums(t((s * N)^2 * (1 - round(alloc)/N)/round(alloc))/NTOT^2)
totm <- rowSums(t(m * N))
CVfin <- round(sqrt(varfin/(totm/NTOT)^2), digits = 4)
return(CVfin)
}
# -----------------------------------------------------------
#create the dataframe to be returned as output
out_cv=data.frame(matrix(nrow=nvar*sum(nvalues), ncol=5))
CVfin <- calcola_cv()
colnames(out_cv)=c("TYPE", "DOMAIN","VAR", "PLANNED_CV",
"ACTUAL_CV")
out_cv[,1]=as.vector(cvDom)
out_cv[,2]=as.vector(cvDom2)
out_cv[,3]=as.vector(paste("V", c(1:ncol(med)), sep=""))
out_cv[,4]=as.vector(cv)
out_cv[,5]= as.vector(CVfin)
return(out_cv)
# End body
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.