regcalibrate<-function(design, ...) UseMethod("regcalibrate")
is.calibrated<-function(design){ !is.null(design$postStrata)}
regcalibrate.pps<-function(design,formula, population,
stage=NULL, lambda=NULL, aggregate.stage=NULL,...){
if (!is.null(stage) && !(stage==0)) stop("'stage' not supported for pps designs")
if (!is.null(aggregate.stage)) stop("'aggregate.stage' not supported for pps designs")
regcalibrate.survey.design2(design,formula, population,
stage=NULL, lambda=NULL, aggregate.stage=NULL,...)
}
##
## unbounded linear calibration using qr decomposition: less sensitive to
## collinearity than Deville & Sarndal's Newton algorithm.
##
regcalibrate.survey.design2<-function(design, formula, population,
stage=NULL, lambda=NULL, aggregate.stage=NULL, sparse=FALSE,...){
if (is.null(stage))
stage<-if (is.list(population)) 1 else 0
if (!is.null(aggregate.stage)){
aggindex<-design$cluster[[aggregate.stage]]
}
if(stage==0){
## calibration to population totals
if(sparse){
mm<-sparse.model.matrix(formula, model.frame(formula, model.frame(design)))
}else{
mm<-model.matrix(formula, model.frame(formula, model.frame(design)))
}
ww<-weights(design)
if (is.null(lambda)){
sigma2<-rep(1,nrow(mm))
}else if(length(lambda) == nrow(mm)){ # for the heteroskedasticity parameter
sigma2<-drop(lambda)
}else{
sigma2<-drop(mm%*%lambda) # to keep the same functionality when variance = 1
}
if (!is.null(aggregate.stage)){
mm<-apply(mm,2,function(mx) ave(mx,aggindex))
ww<-ave(ww,aggindex)
sigma2<-ave(sigma2,aggindex)
}
whalf<-sqrt(ww)
sample.total<-colSums(mm*ww)
if(any(sample.total==0)){
## drop columsn where all sample and population are zero
zz<-(population==0) & (apply(mm,2,function(x) all(x==0)))
mm<-mm[,!zz]
population<-population[!zz]
sample.total<-sample.total[!zz]
}
if (!is.null(names(population))){
if (!all(names(sample.total) %in% names(population))){
warning("Sampling and population totals have different names.")
cat("Sample: "); print(names(sample.total))
cat("Popltn: "); print(names(population))
}
else if (!all(names(sample.total) == names(population))){
warning("Sample and population totals reordered to make names agree: check results.")
population <- population[match(names(sample.total), names(population))]
}
}
tqr<-qr(mm*whalf/sqrt(sigma2))
## not needed
##if (is.null(lambda) && !all(abs(qr.resid(tqr,whalf*sigma2)/sigma2) <1e-5))
## warning("Calibration models with constant variance must have an intercept")
g<-rep(1,NROW(mm))
Tmat<-crossprod(mm*whalf/sqrt(sigma2))
tT<-solve(Tmat,population-sample.total)
g<-drop(1+mm%*%tT/sigma2)
design$prob<-design$prob/g
caldata<- list(qr=tqr, w=g*whalf*sqrt(sigma2), stage=0, index=NULL)
} else {
## Calibration within clusters (Sarndal's Case C)
if (stage>NCOL(design$cluster))
stop("This design does not have stage",stage)
if (!is.null(aggregate.stage)){
stop("aggregate= not implemented for calibration within clusters")
}
if (!all(length(population[[1]])==sapply(population,length)))
stop("Population totals are not all the same length")
clusters<-unique(design$cluster[,stage])
nc<-length(clusters)
caldata<-list(qr=vector("list",nc), w=vector("list",nc),
stage=stage,index=as.character(clusters))
if(sparse){
mm<-sparse.model.matrix(formula, model.frame(formula, model.frame(design)))
}else{
mm<-model.matrix(formula, model.frame(formula, model.frame(design)))
}
if (is.null(lambda)){
sigma2<-rep(1,nrow(mm))
}else if(length(lambda) == nrow(mm)){ # for the heteroskedasticity parameter
sigma2<-drop(lambda)
}else{
sigma2<-drop(mm%*%lambda) # to keep the same functionality when variance = 1
}
if(NCOL(mm)!=length(population[[1]])){
stop("Population and sample totals are not the same length.")
}
if (any(colnames(mm)!=names(population[[1]]))){
warning("Sampling and population totals have different names.")
cat("Sample: "); print(colnames(mm))
cat("Popltn: "); print(names(population[[1]]))
}
stageweights<-1/apply(design$allprob[,1:stage,drop=FALSE],1,prod)
if (any(duplicated(design$cluster[!duplicated(stageweights),stage])))
stop("Weights at stage", stage, "vary within sampling units")
cwhalf<-sqrt(weights(design)/stageweights)
dwhalf<-sqrt(weights(design))
tqr<-qr(mm)
## not needed
## if (is.null(lambda) && !all(abs(qr.resid(tqr,sigma2)) <1e-3))
## stop("Calibration models with constant variance must have an intercept")
for (i in 1:length(clusters)){
cluster<-clusters[[i]]
these<-which(cluster == as.character(design$cluster[,stage]))
mmi<-mm[these,,drop=FALSE]
sample.total<-colSums(mmi*cwhalf[these]*cwhalf[these])
if(any(sample.total==0)){
## drop columsn where all sample and population are zero
zz<-(population[[i]]==0) & (apply(mmi,2,function(x) all(x==0)))
mmi<-mmi[,!zz,drop=FALSE]
population[[i]]<-population[[i]][!zz]
sample.total<-sample.total[!zz]
}
tqr<-qr(mmi*cwhalf[these]/sqrt(sigma2[these]))
Tmat<-crossprod(mmi*cwhalf[these]/sqrt(sigma2[these]))
tT<-solve(Tmat,population[[i]]-sample.total)
g<-drop(1+mmi%*%tT/sigma2[these])
design$prob[these]<-design$prob[these]/g
caldata$qr[[i]]<-tqr
caldata$w[[i]]<-g*stageweights[these]*sqrt(sigma2[these])*cwhalf[these]^2
}
}
class(caldata)<-"greg_calibration"
design$postStrata<-c(design$postStrata, list(caldata))
design$call<-sys.call(-1)
design
}
regcalibrate.svyrep.design<-function(design, formula, population,compress=NA,lambda=NULL,
aggregate.index=NULL,sparse=FALSE,...){
mf<-model.frame(formula, design$variables)
if(sparse){
mm<-sparse.model.matrix(formula, mf)
}else{
mm<-model.matrix(formula, mf)
}
ww<-design$pweights
if (is.null(lambda)){
sigma2<-rep(1,nrow(mm))
}else if(length(lambda) == nrow(mm)){ # for the heteroskedasticity parameter
sigma2<-drop(lambda)
}else{
sigma2<-drop(mm%*%lambda) # to keep the same functionality when variance = 1
}
repwt<-as.matrix(design$repweights)
if (!design$combined.weights)
repwt<-repwt*design$pweights
if (inherits(aggregate.index,"formula")){
if (length(aggregate.index)!=2)
stop("aggregate.index must be a one-sided formula")
aggregate.index<-model.frame(aggregate.index, design$variables)
if (NCOL(aggregate.index)>1)
stop("aggregate.index must specify a single variable")
aggregate.index<-aggregate.index[[1]]
}
if (!is.null(aggregate.index)){
if (sqrt(max(ave(ww,aggregate.index,FUN=var),na.rm=TRUE))>1e-2*mean(ww))
warning("Sampling weights are not constant within clusters defined by aggregate.index")
mm<-apply(mm,2,function(mx) ave(mx,aggregate.index))
ww<-ave(ww,aggregate.index)
sigma2<-ave(sigma2,aggregate.index)
repwt<-apply(repwt,2,function(wx) ave(wx, aggregate.index))
}
whalf<-sqrt(ww)
sample.total<-colSums(mm*ww)
if(any(sample.total==0)){
## drop columsn where all sample and population are zero
zz<-(population==0) & (apply(mm,2,function(x) all(x==0)))
mm<-mm[,!zz]
population<-population[!zz]
sample.total<-sample.total[!zz]
}
if(length(sample.total)!=length(population)){
stop("Population and sample totals are not the same length.")
}
if (!is.null(names(population)) && any(names(sample.total)!=names(population))){
warning("Sampling and population totals have different names.")
cat("Sample: "); print(colnames(mm))
cat("Popltn: "); print(names(population[[1]]))
}
Tmat<-crossprod(mm*whalf/sqrt(sigma2))
tT<-solve(Tmat,population-sample.total)
gtotal<-drop(1+mm%*%tT/sigma2)
design$pweights<-design$pweights*gtotal
for(i in 1:NCOL(repwt)){
whalf<-sqrt(repwt[,i])
Tmat<-crossprod(mm*whalf/sqrt(sigma2))
sample.total<-colSums(mm*whalf*whalf)
g<-drop(1+mm%*%solve(Tmat,population-sample.total)/sigma2)
repwt[,i]<-as.vector(design$repweights[,i])*g
}
if (!design$combined.weights)
repwt<-repwt/gtotal
if (compress ||
(is.na(compress && inherits(design$repweights,"repweights_compressed")))){
repwt<-compressWeights(repwt)
}
design$repweights<-repwt
design$call<-sys.call(-1)
design$degf<-NULL
design$degf<-degf(design)
design
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.