##
## Recursive estimation of linearisation variances
## in multistage samples.
##
svydesign<-function(ids, probs = NULL, strata = NULL, variables = NULL,
fpc = NULL, data=NULL, nest = FALSE, check.strata = !nest,
weights = NULL,pps=FALSE,...){
UseMethod("svydesign", data)
}
detibble<-function(data) {
if ("tbl_df" %in% class(data))
as.data.frame(data)
else
data
}
svydesign.default<-function(ids,probs=NULL,strata=NULL,variables=NULL, fpc=NULL,
data=NULL, nest=FALSE, check.strata=!nest,weights=NULL,pps=FALSE,
calibrate.formula=NULL,variance=c("HT","YG"), ...){
data<-detibble(data)
variance<-match.arg(variance)
if(is.character(pps)){
a<-match.arg(pps,c("brewer","overton","other"))
if (!(pps %in% c("brewer","other")))
return(pps_design(ids=ids,probs=probs, strata=strata,variables=variables, fpc=fpc,
data=data,method=a,call=sys.call(-1),variance=variance,...))
} else if (!is.logical(pps)){
return(pps_design(ids=ids,probs=probs, strata=strata,variables=variables, fpc=fpc,
data=data,method=pps,call=sys.call(-1),variance=variance,...))
}
if (!is.character(pps) || pps!="other"){
if (variance!="HT")
stop("Only variance='HT' supported for this design")
}
## less memory-hungry version for sparse tables
interaction<-function (..., drop = TRUE) {
args <- list(...)
narg <- length(args)
if (narg == 1 && is.list(args[[1]])) {
args <- args[[1]]
narg <- length(args)
}
ls<-sapply(args,function(a) length(levels(a)))
ans<-do.call("paste",c(lapply(args,as.character),sep="."))
ans<-factor(ans)
return(ans)
}
na.failsafe<-function(message="missing values in object"){
function(object,...){
if (NCOL(object)==0)
object
else {
ok <- complete.cases(object)
if (all(ok))
object
else stop(message)
}
}
}
na.id<-na.failsafe("missing values in `id'")
if(inherits(ids,"formula")) {
mf<-substitute(model.frame(ids,data=data, na.action=na.id))
ids<-eval.parent(mf)
if (ncol(ids)==0) ## formula was ~1
ids<-data.frame(id=1:nrow(ids))
} else{
if (is.null(ids))
stop("Must provide ids= argument")
else
ids<-na.id(data.frame(ids))
}
## make ids factor if they are character
for(i in 1:ncol(ids)){
if (is.character(ids[[i]]))
ids[[i]]<-factor(ids[[i]])
}
na.prob<-na.failsafe("missing values in `prob'")
if(inherits(probs,"formula")){
mf<-substitute(model.frame(probs,data=data,na.action=na.prob))
probs<-eval.parent(mf)
}
na.weight<-na.failsafe("missing values in `weights'")
if(inherits(weights,"formula")){
mf<-substitute(model.frame(weights,data=data,na.action=na.weight))
weights<-eval.parent(mf)
} else if (!is.null(weights))
weights<-na.weight(data.frame(weights))
if(!is.null(weights)){
if (!is.null(probs))
stop("Can't specify both sampling weights and probabilities")
else
probs<-as.data.frame(1/as.matrix(weights))
}
na.strata<-na.failsafe("missing values in `strata'")
if (!is.null(strata)){
if(inherits(strata,"formula")){
mf<-substitute(model.frame(strata,data=data, na.action=na.strata))
strata<-eval.parent(mf)
}
if (!is.list(strata))
strata<-data.frame(strata=strata)
has.strata<-TRUE
for(i in 1:NCOL(strata)){ ##drop empty strata
if (is.factor(strata[[i]]))
strata[[i]]<-as.factor(as.character(strata[[i]]))
}
} else {
has.strata <-FALSE
strata<-na.strata(as.data.frame(matrix(1, nrow=NROW(ids), ncol=NCOL(ids))))
}
if (inherits(variables,"formula")){
mf<-substitute(model.frame(variables,data=data,na.action=na.pass))
variables <- eval.parent(mf)
} else if (is.null(variables)){
variables<-data
} else
variables<-do.call("data.frame",variables)
na.fpc<-na.failsafe("missing values in `fpc'")
if (inherits(fpc,"formula")){
mf<-substitute(model.frame(fpc,data=data,na.action=na.fpc))
fpc<-eval.parent(mf)
}
## check for only one PSU: probably a typo
if ((length(unique(ids[,1]))==1) && !(nest && has.strata)){
stop("Design has only one primary sampling unit")
}
## force subclusters nested in clusters
if (NCOL(ids)>1){
N<-ncol(ids)
for(i in 2:N){
ids[,i]<-do.call("interaction", ids[,1:i,drop=FALSE])
}
}
## force clusters nested in strata
if (nest && has.strata && NCOL(ids)){
N<-NCOL(ids)
NS<-NCOL(strata)
for(i in 1:N)
ids[,i]<-do.call("interaction",
c(strata[,1:min(i,NS),drop=FALSE], ids[,i,drop=FALSE]))
}
## check if clusters nested in strata
if (check.strata && nest)
warning("No point in check.strata=TRUE if nest=TRUE")
if(check.strata && !is.null(strata) && NCOL(ids)){
sc<-(rowSums(table(ids[,1],strata[,1])>0))
if(any(sc>1)) stop("Clusters not nested in strata at top level; you may want nest=TRUE.")
}
## force substrata nested in clusters
N<-ncol(ids)
NS<-ncol(strata)
if (N>1){
for(i in 2:N)
strata[,i]<-interaction(strata[,min(i,NS)], ids[,i-1])
}
## PPS: valid choices currently are FALSE and "brewer"
if (is.logical(pps) && pps) stop("'pps' must be FALSE or a character string")
if (is.character(pps)) {
pps<-TRUE
}
## Finite population correction: specified per observation
## Also incorporates design sample sizes formerly in nPSU
if (!is.null(fpc) && !is.numeric(fpc) && !is.data.frame(fpc))
stop("fpc must be a matrix or dataframe or NULL")
fpc<-as.fpc(fpc,strata, ids, pps=pps)
## if FPC specified, but no weights, use it for weights
if (is.null(probs) && is.null(weights)){
if (is.null(fpc$popsize)){
if (missing(probs) && missing(weights))
warning("No weights or probabilities supplied, assuming equal probability")
probs<-rep(1,nrow(ids))
} else {
probs<-1/weights(fpc, final=FALSE)
}
}
if (is.numeric(probs) && length(probs)==1)
probs<-rep(probs, NROW(variables))
if (length(probs)==0) probs<-rep(1,NROW(variables))
if (NCOL(probs)==1) probs<-data.frame(probs)
rval<-list(cluster=ids)
rval$strata<-strata
rval$has.strata<-has.strata
rval$prob<- apply(probs,1,prod)
rval$allprob<-probs
rval$call<-match.call()
rval$variables<-variables
rval$fpc<-fpc
rval$call<-sys.call(-1)
rval$pps<-pps
class(rval)<-c("survey.design2","survey.design")
if (!is.null(calibrate.formula)){
rval<-recalibrate(rval, calibrate.formula)
rval$call<-sys.call(-1)
}
rval
}
onestrat<-function(x,cluster,nPSU,fpc, lonely.psu,stratum=NULL,stage=1,cal=cal){
stratum_center <- attr(x, "recentering")
if (is.null(stratum_center)) stratum_center <- 0
if (is.null(fpc))
f<-rep(1,NROW(x))
else{
f<-ifelse(fpc==Inf, 1, (fpc-nPSU)/fpc)
}
if (nPSU>1)
scale<-f*nPSU/(nPSU-1)
else
scale<-f
if (all(f<0.0000001))## self-representing stratum
return(matrix(0,NCOL(x),NCOL(x)))
scale<-scale[!duplicated(cluster)]
x<-rowsum(x,cluster)
nsubset<-nrow(x)
if (nsubset<nPSU) {
##can't be PPS, so scale must be a constant
x<-rbind(x,matrix(0,ncol=ncol(x),nrow=nPSU-nrow(x)))
scale<-rep(scale[1],NROW(x))
}
if (lonely.psu!="adjust" || nsubset>1 ||
(nPSU>1 & !getOption("survey.adjust.domain.lonely"))) {
stratum_center <- colMeans(x)
}
x<-sweep(x=x, MARGIN=2, STATS=stratum_center, FUN="-")
if (nsubset==1 && nPSU>1 && getOption("survey.adjust.domain.lonely")){
warning("Stratum (",stratum,") has only one PSU at stage ",stage)
if (lonely.psu=="average" && getOption("survey.adjust.domain.lonely"))
scale<-NA
}
if (nPSU>1){
return(crossprod(x*sqrt(scale)))
} else {
rval<-switch(lonely.psu,
certainty=crossprod(x*sqrt(scale)),
remove=crossprod(x*sqrt(scale)),
adjust=crossprod(x*sqrt(scale)),
average=NA*crossprod(x),
fail= stop("Stratum (",stratum,") has only one PSU at stage ",stage),
stop("Can't handle lonely.psu=",lonely.psu)
)
rval
}
}
onestage<-function(x, strata, clusters, nPSU, fpc, lonely.psu=getOption("survey.lonely.psu"),stage=0, cal){
if (NROW(x)==0)
return(matrix(0,NCOL(x),NCOL(x)))
## For the 'adjust' option for lonely PSUs,
## recenter around mean from all PSUs in all strata
if (!is.null(lonely.psu) && lonely.psu == "adjust") {
n_PSUs_from_all_strata <- sum(tapply(X = nPSU, INDEX = as.numeric(strata), FUN = head, 1))
recentering <- colSums(x) / n_PSUs_from_all_strata
} else {
recentering <- 0
}
stratvars<- tapply(1:NROW(x), list(factor(strata)), function(index){
onestrat(x[index,,drop=FALSE] |> `attr<-`('recentering', recentering), clusters[index],
nPSU[index][1], fpc[index], ##changed from fpc[index][1], to allow pps(brewer)
lonely.psu=lonely.psu,stratum=strata[index][1], stage=stage,cal=cal)
})
p<-NCOL(x)
nstrat<-length(unique(strata))
nokstrat<-sum(sapply(stratvars,function(m) !any(is.na(m))))
apply(array(unlist(stratvars),c(p,p,length(stratvars))),1:2,sum,na.rm=TRUE)*nstrat/nokstrat
}
svyrecvar<-function(x, clusters, stratas, fpcs, postStrata=NULL,
lonely.psu=getOption("survey.lonely.psu"),
one.stage=getOption("survey.ultimate.cluster")){
x<-as.matrix(x)
cal<-NULL
if(isFALSE(fpcs$pps))
use_rcpp<-getOption("survey.use_rcpp")
else
use_rcpp<-FALSE ## Brewer's method, doesn't work with the current rcpp version
## Remove post-stratum means, which may cut across clusters
## Also center the data using any "g-calibration" models
if(!is.null(postStrata)){
for (psvar in postStrata){
if (inherits(psvar, "greg_calibration")) {
if (psvar$stage==0){
## G-calibration at population level
x<-as.matrix(qr.resid(psvar$qr,x/psvar$w)*psvar$w)
} else {
## G-calibration within clusters
cal<-c(cal, list(psvar))
use_rcpp<-FALSE
}
} else if (inherits(psvar, "raking")){
## raking by iterative proportional fitting
for(iterations in 1:10){
for(margin in psvar){
psw<-attr(margin, "weights")
x<- x - psw*apply(x/psw, 2, ave, margin)
}
}
} else {
## ordinary post-stratification
psw<-attr(psvar, "weights")
oldw<-attr(psvar, "oldweights")
if (is.null(oldw)) oldw<-rep(1,length(psw))
zeroes<-which(psw==0 & oldw==0)
if (length(zeroes)) psw[zeroes]=1
psvar<-as.factor(psvar)
psmeans<-rowsum(x*oldw/psw,psvar,reorder=TRUE)/as.vector(by(oldw,psvar,sum))
x<- x-psmeans[match(psvar,sort(unique(psvar))),]*psw
}
}
}
if (use_rcpp) {
multistage_rcpp(x, clusters,stratas,fpcs$sampsize, fpcs$popsize,
lonely.psu=getOption("survey.lonely.psu"),
one.stage=one.stage,stage=1,cal=cal)
} else {
multistage(x, clusters,stratas,fpcs$sampsize, fpcs$popsize,
lonely.psu=getOption("survey.lonely.psu"),
one.stage=one.stage,stage=1,cal=cal)
}
}
multistage<-function(x, clusters, stratas, nPSUs, fpcs,
lonely.psu=getOption("survey.lonely.psu"),
one.stage=FALSE,stage,cal){
n<-NROW(x)
v <- onestage(x,stratas[,1], clusters[,1], nPSUs[,1],
fpcs[,1], lonely.psu=lonely.psu,stage=stage,cal=cal)
if (one.stage!=TRUE && !is.null(fpcs) && NCOL(clusters)>1) {
v.sub<-by(1:n, list(as.numeric(clusters[,1])), function(index){
## residuals for G-calibration using population information
## only on clusters at this stage.
for(cali in cal){
if (cali$stage != stage)
next
j<-match(clusters[index,1],cali$index)
if (length(unique(j))!=1)
stop("Internal problem in g-calibration data: stage",stage,
", cluster", j)
j<-j[[1]]
x[index,]<-as.matrix(qr.resid(cali$qr[[j]], x[index,,drop=FALSE]/cali$w[[j]])*cali$w[[j]])
}
multistage(x[index,,drop=FALSE], clusters[index,-1,drop=FALSE],
stratas[index,-1,drop=FALSE], nPSUs[index,-1,drop=FALSE],
fpcs[index,-1,drop=FALSE],
lonely.psu=lonely.psu,one.stage=one.stage-1,
stage=stage+1,cal=cal)*nPSUs[index[1],1]/fpcs[index[1],1]
})
for(i in 1:length(v.sub))
v<-v+v.sub[[i]]
}
dimnames(v)<-list(colnames(x),colnames(x))
v
}
## Ben Schneider's code to use C++. Should *agree* with multistage() except
## for one known bug in lonely.psu
##
multistage_rcpp <- function(x, clusters, stratas, nPSUs, fpcs,
lonely.psu=getOption("survey.lonely.psu"),
one.stage=FALSE, stage, cal){
lonely.psu <- switch(lonely.psu,
certainty = 'certainty',
remove = 'remove',
adjust = 'adjust',
average = 'average',
fail = 'fail',
stop("Can't handle lonely.psu=",lonely.psu)
)
use_singleton_method_for_domains <- isTRUE(getOption("survey.adjust.domain.lonely"))
# Prepare the inputs to pass to Rcpp function
if (is.data.frame(clusters)) {
for (j in seq_len(ncol(clusters))) {
if (!is.numeric(clusters[[j]]))
clusters[[j]] <- as.numeric(as.factor(clusters[[j]]))
}
}
clusters <- as.matrix(clusters)
if (is.data.frame(stratas)) {
for (j in seq_len(ncol(stratas))) {
if (!is.numeric(stratas[[j]]))
stratas[[j]] <- as.numeric(as.factor(stratas[[j]]))
}
}
stratas <- as.matrix(stratas)
if (is.null(fpcs)) {
strata_pop_sizes <- matrix(Inf,
nrow = nrow(nPSUs),
ncol = ncol(nPSUs))
} else {
strata_pop_sizes <- as.matrix(fpcs)
}
strata_samp_sizes <- as.matrix(nPSUs)
# Call the Rcpp function
v <- arma_multistage(Y = as.matrix(x),
samp_unit_ids = clusters,
strata_ids = stratas,
strata_samp_sizes = strata_samp_sizes,
strata_pop_sizes = strata_pop_sizes,
singleton_method = lonely.psu,
use_singleton_method_for_domains = getOption("survey.adjust.domain.lonely"),
use_only_first_stage = one.stage,
stage = stage)
dimnames(v) <- list(colnames(x),colnames(x))
v
}
## fpc not given are zero: full sampling.
as.fpc<-function(df,strata,ids,pps=FALSE){
count<-function(x) sum(!duplicated(x))
sampsize<-matrix(ncol=ncol(ids),nrow=nrow(ids))
for(i in 1:ncol(ids))
split(sampsize[,i],strata[,i])<-lapply(split(ids[,i],strata[,i]),count)
if (is.null(df)){
## No fpc
rval<-list(popsize=NULL, sampsize=sampsize)
class(rval)="survey_fpc"
return(rval)
}
fpc<-as.matrix(df)
if (xor(ispopsize<-any(df>1), all(df>=1))){
big<-which(fpc>=1,arr.ind=TRUE)
small<-which(fpc<1,arr.ind=TRUE)
cat("record",big[1,1]," stage",big[1,2],": fpc=", fpc[big[1,,drop=FALSE]],"\n")
cat("record",small[1,1]," stage ",small[1,2],": fpc=", fpc[small[1,,drop=FALSE]],"\n")
stop("Must have all fpc>=1 or all fpc<=1")
}
if (ispopsize){
if(pps) stop("fpc must be specified as sampling fraction for PPS sampling")
popsize<-fpc
} else {
popsize<-sampsize/(fpc)
}
if (any(popsize<sampsize)){
toobig<-which(popsize<sampsize,arr.ind=TRUE)
cat("record",toobig[1,1],"stage",toobig[1,2],": popsize=",popsize[toobig[1,,drop=FALSE]],
" sampsize=", sampsize[toobig[1,,drop=FALSE]],"\n")
stop("FPC implies >100% sampling in some strata")
}
if (!ispopsize && any(is.finite(popsize) & (popsize>1e10))){
big<-which(popsize>1e10 & is.finite(popsize),arr.ind=TRUE)
warning("FPC implies population larger than ten billion (record",big[1,1]," stage ",big[1,2],")")
}
if(!pps){
## check that fpc is constant within strata.
for(i in 1:ncol(popsize)){
diff<-by(popsize[,i], list(strata[,i]), count)
if (any(as.vector(diff)>1)){
j<-which(as.vector(diff)>1)[1]
warning("`fpc' varies within strata: stratum ",names(diff)[j], " at stage ",i)
}
}
} else{
## check that fpc is constant with clusters
diff<-by(popsize[,i], list(ids[,i]), count)
if (any(as.vector(diff)>1)){
j<-which(as.vector(diff)>1)[1]
warning("`fpc' varies within cluster: cluster ",names(diff)[j], " at stage ",i)
}
}
rval<-list(popsize=popsize, sampsize=sampsize,pps=pps)
class(rval)<-"survey_fpc"
rval
}
"weights.survey_fpc"<-function(object,final=TRUE,...){
if (is.null(object$popsize) || any(object$popsize>1e12))
stop("Weights not supplied and can't be computed from fpc.")
if (final) {
pop<-apply(object$popsize,1,prod)
samp<-apply(object$sampsize,1,prod)
pop/samp
} else {
object$popsize/object$sampsize
}
}
print.survey.design2<-function(x,varnames=FALSE,design.summaries=FALSE,...){
n<-NROW(x$cluster)
if (x$has.strata) cat("Stratified ")
un<-length(unique(x$cluster[,1]))
if(n==un){
cat("Independent Sampling design")
is.independent<-TRUE
if (is.null(x$fpc$popsize))
cat(" (with replacement)\n")
else cat("\n")
} else {
cat(NCOL(x$cluster),"- level Cluster Sampling design")
if (is.null(x$fpc$popsize))
cat(" (with replacement)\n")
else cat("\n")
nn<-lapply(x$cluster,function(i) length(unique(i)))
cat(paste("With (",paste(unlist(nn),collapse=", "),") clusters.\n",sep=""))
is.independent<-FALSE
}
print(x$call)
if (design.summaries){
cat("Probabilities:\n")
print(summary(x$prob))
if(x$has.strata){
if (NCOL(x$cluster)>1)
cat("First-level ")
cat("Stratum Sizes: \n")
oo<-order(unique(x$strata[,1]))
a<-rbind(obs=table(x$strata[,1]),
design.PSU=x$fpc$sampsize[!duplicated(x$strata[,1]),1][oo],
actual.PSU=table(x$strata[!duplicated(x$cluster[,1]),1]))
print(a)
}
if (!is.null(x$fpc$popsize)){
if (x$has.strata) {
cat("Population stratum sizes (PSUs): \n")
s<-!duplicated(x$strata[,1])
a<-x$fpc$popsize[s,1]
names(a)<-x$strata[s,1]
a<-a[order(names(a))]
print(a)
} else {
cat("Population size (PSUs):",x$fpc$popsize[1,1],"\n")
}
}
}
if (varnames){
cat("Data variables:\n")
print(colnames(x))
}
invisible(x)
}
summary.survey.design2<-function(object,...){
class(object)<-c("summary.survey.design2",class(object))
object
}
print.summary.survey.design2<-function(x,...){
y<-x
class(y)<-c("survey.design2",class(x))
print(y,varnames=TRUE,design.summaries=TRUE,...)
}
.svycheck<-function(object){
if (inherits(object,"survey.design") &&
!is.null(object$nPSU))
warning("This is an old-style design object. Please use as.svydesign2 to update it.")
}
as.svydesign2<-function(object){
if (inherits(object,"survey.design2"))
return(object)
if (!inherits(object,"survey.design"))
stop("This function is for updating old-style survey.design objects")
count<-function(x) length(unique(x))
strata<-data.frame(one=object$strata)
if ((nc<-ncol(object$cluster))>1){
for(i in 2:nc){
strata<-cbind(strata,object$cluster[,i-1])
}
}
sampsize<-matrix(ncol=nc,nrow=nrow(object$cluster))
sampsize[,1]<-object$nPSU[match(object$strata, names(object$nPSU))]
if (nc>1){
for(i in 2:nc){
split(sampsize[,i],strata[,i])<-lapply(split(object$cluster[,i],strata[,i]),count)
}
}
if (!is.null(object$fpc)){
popsize<-sampsize
popsize[,1]<-object$fpc$N[match(object$strata,object$fpc$strata)]
} else popsize<-NULL
if (nc>1 && !is.null(object$fpc)){
warning("Assuming complete sampling at stages 2 -",nc)
}
fpc<-list(popsize=popsize,sampsize=sampsize)
class(fpc)<-"survey_fpc"
object$fpc<-fpc
object$strata<-strata
object$nPSU<-NULL
class(object)<-c("survey.design2","survey.design")
object
}
is.pps<-function(x) if(is.null(x$pps)) FALSE else (x$pps!=FALSE)
"[.survey.design2"<-function (x,i, ..., drop=TRUE){
if (!missing(i)){
if (is.calibrated(x) || is.pps(x) || !drop){
## Set weights to zero: no memory saving possible
## There should be an easier way to complement a subscript..
if (is.logical(i))
x$prob[!i]<-Inf
else if (is.numeric(i) && length(i))
x$prob[-i]<-Inf
else {
tmp<-x$prob[i,]
x$prob<-rep(Inf, length(x$prob))
x$prob[i,]<-tmp
}
index<-is.finite(x$prob)
psu<-!duplicated(x$cluster[index,1])
tt<-table(x$strata[index,1][psu])
if(any(tt==1) && getOption("survey.adjust.domain.lonely")){
warning(sum(tt==1)," strata have only one PSU in this subset.")
}
} else {
## subset everything.
if (!is.null(x$variables)) ## phase 2 of twophase design
x$variables<-"[.data.frame"(x$variables,i,..1,drop=FALSE)
x$cluster<-x$cluster[i,,drop=FALSE]
x$prob<-x$prob[i]
x$allprob<-x$allprob[i,,drop=FALSE]
x$strata<-x$strata[i,,drop=FALSE]
x$fpc$sampsize<-x$fpc$sampsize[i,,drop=FALSE]
x$fpc$popsize<-x$fpc$popsize[i,,drop=FALSE]
}
} else {
if(!is.null(x$variables))
x$variables<-x$variables[,..1,drop=FALSE]
}
x
}
svytotal.survey.design2<-function(x,design, na.rm=FALSE, deff=FALSE,influence=FALSE,...){
if (inherits(x,"formula")){
## do the right thing with factors
mf<-model.frame(x,design$variables,na.action=na.pass)
xx<-lapply(attr(terms(x),"variables")[-1],
function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
cols<-sapply(xx,NCOL)
x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
scols<-c(0,cumsum(cols))
for(i in 1:length(xx)){
x[,scols[i]+1:cols[i]]<-xx[[i]]
}
colnames(x)<-do.call("c",lapply(xx,colnames))
} else{
if(typeof(x) %in% c("expression","symbol"))
x<-eval(x, design$variables)
else {
if(is.data.frame(x) && any(sapply(x,is.factor))){
xx<-lapply(x, function(xi) {if (is.factor(xi)) 0+(outer(xi,levels(xi),"==")) else xi})
cols<-sapply(xx,NCOL)
scols<-c(0,cumsum(cols))
cn<-character(sum(cols))
for(i in 1:length(xx))
cn[scols[i]+1:cols[i]]<-paste(names(x)[i],levels(x[[i]]),sep="")
x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
for(i in 1:length(xx)){
x[,scols[i]+1:cols[i]]<-xx[[i]]
}
colnames(x)<-cn
}
}
}
x<-as.matrix(x)
if (na.rm){
nas<-rowSums(is.na(x))
design<-design[nas==0,]
if (length(nas)>length(design$prob))
x<-x[nas==0,,drop=FALSE]
else
x[nas>0,]<-0
}
N<-sum(1/design$prob)
total <- colSums(x/as.vector(design$prob),na.rm=na.rm)
class(total)<-"svystat"
attr(total, "var")<-v<-svyrecvar(x/design$prob,design$cluster,
design$strata, design$fpc,
postStrata=design$postStrata)
attr(total,"statistic")<-"total"
if (influence){
if (na.rm && (length(nas)>length(design$prob))) {
infl<-matrix(0,ncol=NCOL(x),nrow=length(nas))
infl[nas==0,]<-x/design$prob
attr(total, "influence")<-infl
} else {
attr(total, "influence")<-x/design$prob
}
}
if (is.character(deff) || deff){
nobs<-sum(weights(design)!=0)
if (deff=="replace")
vsrs<-svyvar(x,design,na.rm=na.rm)*sum(weights(design))^2/nobs
else
vsrs<-svyvar(x,design,na.rm=na.rm)*sum(weights(design))^2*(N-nobs)/(N*nobs)
attr(total, "deff")<-v/vsrs
}
return(total)
}
svymean.survey.design2<-function(x,design, na.rm=FALSE,deff=FALSE,influence=FALSE,...){
if (inherits(x,"formula")){
## do the right thing with factors
mf<-model.frame(x,design$variables,na.action=na.pass)
xx<-lapply(attr(terms(x),"variables")[-1],
function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
cols<-sapply(xx,NCOL)
x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
scols<-c(0,cumsum(cols))
for(i in 1:length(xx)){
x[,scols[i]+1:cols[i]]<-xx[[i]]
}
colnames(x)<-do.call("c",lapply(xx,colnames))
}
else {
if(typeof(x) %in% c("expression","symbol"))
x<-eval(x, design$variables)
else if(is.data.frame(x) && any(sapply(x,is.factor))){
xx<-lapply(x, function(xi) {if (is.factor(xi)) 0+(outer(xi,levels(xi),"==")) else xi})
cols<-sapply(xx,NCOL)
scols<-c(0,cumsum(cols))
cn<-character(sum(cols))
for(i in 1:length(xx))
cn[scols[i]+1:cols[i]]<-paste(names(x)[i],levels(x[[i]]),sep="")
x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
for(i in 1:length(xx)){
x[,scols[i]+1:cols[i]]<-xx[[i]]
}
colnames(x)<-cn
}
}
x<-as.matrix(x)
if (na.rm){
nas<-rowSums(is.na(x))
design<-design[nas==0,]
if (length(nas)>length(design$prob))
x<-x[nas==0,,drop=FALSE]
else
x[nas>0,]<-0
}
pweights<-1/design$prob
psum<-sum(pweights)
average<-colSums(x*pweights/psum)
x<-sweep(x,2,average)
v<-svyrecvar(x*pweights/psum,design$cluster,design$strata, design$fpc,
postStrata=design$postStrata)
attr(average,"var")<-v
attr(average,"statistic")<-"mean"
if (influence){
if (na.rm && (length(nas)>length(pweights))) {
infl<-matrix(0,ncol=NCOL(x),nrow=length(nas))
infl[nas==0,]<-x*pweights/psum
} else {
infl<-x*pweights/psum
}
attr(average,"influence") <- infl
}
class(average)<-"svystat"
if (is.character(deff) || deff){
nobs<-sum(weights(design)!=0)
if(deff=="replace"){
vsrs<-svyvar(x,design,na.rm=na.rm)/(nobs)
} else {
if(psum<nobs) {
vsrs<-NA*v
warning("Sample size greater than population size: are weights correctly scaled?")
} else{
vsrs<-svyvar(x,design,na.rm=na.rm)*(psum-nobs)/(psum*nobs)
}
}
attr(average, "deff")<-v/vsrs
}
return(average)
}
svyratio.survey.design2<-function(numerator=formula, denominator, design, separate=FALSE,na.rm=FALSE,
formula,covmat=FALSE,deff=FALSE,influence=FALSE,...){
if (separate){
strats<-sort(unique(design$strata[,1]))
if (!design$has.strata)
warning("Separate and combined ratio estimators are the same for unstratified designs")
if(influence)
warning("influence functions not available for separate ratio estimators")
rval<-list(ratios=lapply(strats,
function(s) {
tmp<-svyratio(numerator, denominator,
subset(design, design$strata[,1] %in% s),
separate=FALSE,...)
attr(tmp,"call")<-bquote(Stratum==.(s))
tmp}))
names(rval$ratios)<-strats
class(rval)<-c("svyratio_separate")
rval$call<-sys.call()
rval$strata<-strats
return(rval)
}
if (inherits(numerator,"formula"))
numerator<-model.frame(numerator,design$variables,na.action=na.pass)
else if(typeof(numerator) %in% c("expression","symbol"))
numerator<-eval(numerator, design$variables)
if (inherits(denominator,"formula"))
denominator<-model.frame(denominator,design$variables,na.action=na.pass)
else if(typeof(denominator) %in% c("expression","symbol"))
denominator<-eval(denominator, design$variables)
numerator<-as.matrix(numerator)
denominator<-as.matrix(denominator)
nn<-NCOL(numerator)
nd<-NCOL(denominator)
all<-cbind(numerator,denominator)
nas<-!complete.cases(all)
if ((na.rm==TRUE) && any(nas)){
design<-design[!nas,]
if (NROW(design$cluster) == NROW(all)){
## subset by zero weights
all[nas,]<-1
numerator[nas,]<-0
denominator[nas,]<-1
} else {
## subset by actually dropping rows
all<-all[!nas,,drop=FALSE]
numerator<-numerator[!nas,,drop=FALSE]
denominator<-denominator[!nas,,drop=FALSE]
}
}
allstats<-svytotal(all,design)
rval<-list(ratio=outer(allstats[1:nn],allstats[nn+1:nd],"/"))
vars<-matrix(ncol=nd,nrow=nn)
if (deff=="replace" || deff) deffs<-matrix(ncol=nd,nrow=nn)
for(i in 1:nn){
for(j in 1:nd){
r<-(numerator[,i]-rval$ratio[i,j]*denominator[,j])/sum(denominator[,j]/design$prob)
vars[i,j]<-svyrecvar(r*1/design$prob, design$cluster, design$strata, design$fpc,
postStrata=design$postStrata)
if (deff=="replace" || deff){
deffs[i,j]<-deff(svytotal(r,design,deff=deff))
}
}
}
if (covmat){
ii<-rep(1:nn,nd)
jj<-rep(1:nd,each=nn)
allr<-sweep(numerator[,ii]-t(as.vector(rval$ratio)*t(denominator[,jj,drop=FALSE])),
2, colSums(denominator[,jj,drop=FALSE]/design$prob),"/")
vcovmat<-svyrecvar(allr*1/design$prob, design$cluster, design$strata, design$fpc,
postStrata=design$postStrata)
colnames(vcovmat)<-colnames(denominator)[ii]
rval$vcov<-vcovmat
}
colnames(vars)<-colnames(denominator)
rownames(vars)<-colnames(numerator)
rval$var<-vars
if (deff=="replace" || deff)
attr(rval,"deff")<-deffs
attr(rval,"call")<-sys.call()
if (influence){
if (na.rm && (length(nas)>length(design$prob))) {
infl<-matrix(0,ncol=NCOL(r),nrow=length(nas))
infl[nas==0,]<-r/design$prob
} else {
infl<-r/design$prob
}
attr(rval,"influence") <- infl
}
class(rval)<-"svyratio"
rval
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.