#######################################################
# - survey pkg problem: #
# WHEN COMPUTING THE DEFF, svymean gets called from #
# svyvar (generic) and here THERE IS NO POINT IN #
# ESTIMATING THE VARIANCE OF THE MEAN (i.e. svyrecvar #
# should not be called). Lumley's solution is highly #
# memory and CPU time hungry WITHOUT REASON! #
# - ReGenesees pkg solution: #
# A modified z.svyvar which calls a modified function #
# z.svymean (generic, since survey.design2 and #
# cal.analytic objects differ ONLY in variance #
# estimation) is a good alternative. #
# HUGE SAVINGS OF MEMORY AND CPU TIME ARE OBTAINED!! #
#######################################################
z.svyvar <- function (x, design, na.rm = FALSE, ...)
{
if (inherits(x, "formula"))
x <- model.frame(x, design$variables, na.action = na.pass)
else if (typeof(x) %in% c("expression", "symbol"))
x <- eval(x, design$variables)
## If z.svyvar has been called on a subset get n from the domain index
## else compute it for the whole sample:
if (is.null(di <- attr(design, "domain.index"))) n <- NROW(x) else n <- length(di)
xbar <- z.svymean(x, design, na.rm = na.rm)
if (NCOL(x) == 1) {
x <- x - xbar
v <- z.svymean(x * x * n/(n - 1), design, na.rm = na.rm, deff = TRUE)
attr(v, "statistic") <- "variance"
return(v)
}
x <- t(t(x) - xbar)
p <- NCOL(x)
a <- matrix(rep(x, p), ncol = p * p)
b <- x[, rep(1:p, each = p)]
v <- z.svymean(a * b * n/(n - 1), design, na.rm = na.rm, deff = TRUE)
v <- matrix(v, ncol = p)
attr(v, "statistic") <- "variance"
v
}
z.svymean <- function(x,design, na.rm=FALSE, deff=FALSE,...){
################################################################
# Similar to the original svymean, without Variance estimation #
# which is not necessary for DEFF estimation. #
################################################################
if (inherits(x,"formula")){
## do the right thing with factors
mf<-model.frame(x,design$variables,na.action=na.pass)
xx<-lapply(attr(terms(x),"variables")[-1],
function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
cols<-sapply(xx,NCOL)
x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
scols<-c(0,cumsum(cols))
for(i in 1:length(xx)){
x[,scols[i]+1:cols[i]]<-xx[[i]]
}
colnames(x)<-do.call("c",lapply(xx,colnames))
}
else {
if(typeof(x) %in% c("expression","symbol"))
x<-eval(x, design$variables)
else if(is.data.frame(x) && any(sapply(x,is.factor))){
xx<-lapply(x, function(xi) {if (is.factor(xi)) 0+(outer(xi,levels(xi),"==")) else xi})
cols<-sapply(xx,NCOL)
scols<-c(0,cumsum(cols))
cn<-character(sum(cols))
for(i in 1:length(xx))
cn[scols[i]+1:cols[i]]<-paste(names(x)[i],levels(x[[i]]),sep="")
x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
for(i in 1:length(xx)){
x[,scols[i]+1:cols[i]]<-xx[[i]]
}
colnames(x)<-cn
}
}
x<-as.matrix(x)
## Missing values treatment
nas<-rowSums(is.na(x))
if (na.rm && sum(nas)>0){
# If domain has some non-NA values, use them for estimation:
if (length(design[nas==0,]$prob) > 0) {
design<-design[nas==0,]
if (length(nas)>length(design$prob))
x<-x[nas==0,,drop=FALSE]
else
x[nas>0,]<-0
}
# If domain has only NAs, cannot do anything (i.e. behave as na.rm=FALSE)
else {
na.rm <- FALSE
}
}
pweights<-1/design$prob
psum<-sum(pweights)
average<-colSums(x*pweights/psum)
## > DEFF STUDY 5/12/2021 < ##
## If z.svymean has been called on a subset get n from the domain index
## else compute it for the whole sample:
if (is.null(di <- attr(design, "domain.index"))) n <- NROW(x) else n <- length(di)
if ( ( abs(diff(range(pweights))) / abs(mean(pweights)) ) > 1E-6) {
if (isTRUE(deff)) {
average <- ( psum / (psum - 1) ) * ( (n - 1) / n ) * average
}
}
return(average)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.