R/surveyrep.R

Defines functions dim.svyrep.design degf.twophase degf.survey.design2 degf.svyrep.design degf rake postStratify.svyrep.design postStratify svreptable print.svrepstat SE.svrepstat SE.default SE as.data.frame.svrepstat coef.svrepstat withReplicates.svyrep.design withReplicates.svrepstat withReplicates.svrepvar withReplicates logLik.svrepglm residuals.svrepglm vcov.svyratio svrepratio predict.svrepglm svyglm.svyrep.design svycoxph.svyrep.design svreptotal svrepmean as.matrix.svrepvar svrepvar svrVar weights.survey.design weights.svyrep.design update.svyrep.design subset.svyrep.design image.svyrep.design print.summary.svyrep.design summary.svyrep.design print.svyrep.design svrepdesign.default svrepdesign as.svrepdesign.default as.svrepdesign brrweights jknweights jk1weights hadamard hadamard.doubler

Documented in as.svrepdesign as.svrepdesign.default brrweights coef.svrepstat degf degf.survey.design2 degf.svyrep.design degf.twophase dim.svyrep.design hadamard image.svyrep.design jk1weights jknweights postStratify postStratify.svyrep.design predict.svrepglm print.summary.svyrep.design print.svyrep.design rake residuals.svrepglm SE SE.default SE.svrepstat subset.svyrep.design summary.svyrep.design svrepdesign svrepdesign.default svreptable svrVar svycoxph.svyrep.design svyglm.svyrep.design update.svyrep.design weights.survey.design weights.svyrep.design withReplicates withReplicates.svrepstat withReplicates.svrepvar withReplicates.svyrep.design

  
hadamard.doubler<-function(H){
    rbind(cbind(H,H),cbind(H,1-H))
  }
  
hadamard<- function(n){
    m<-n-(n %% 4)
    ## hadamard.list, hadamard.sizes in sysdata.rda
    precooked<- which(m < hadamard.sizes & m+4 >=hadamard.sizes)
    if (length(precooked))
      return(hadamard.list[[min(precooked)]])
    if (all(m<hadamard.sizes))
      return(hadamard.list[[1]])
    
    sizes<-hadamard.sizes*2^pmax(0,ceiling(log((n+1)/hadamard.sizes,2)) )
    bestfit<- which.min(sizes-n)
    H<-NULL
    if (sizes[bestfit]-n >4)
        H<-paley(n,sizes[bestfit])
    if (is.null(H)){
      ndoubles<-ceiling(log(sizes/hadamard.sizes, 2))[bestfit]
      H<-hadamard.list[[bestfit]]
      for(i in seq(length=ndoubles))
        H<-hadamard.doubler(H)
    }
    H
  }



jk1weights<-function(psu, fpc=NULL,
                     fpctype=c("population","fraction","correction"),
                     compress=TRUE){
  fpctype<-match.arg(fpctype)
  unq<-unique(psu)
  n<-length(unq)
  if (is.null(fpc))
    fpc<-1
  else {
    fpc<-unique(fpc)
    if (length(fpc)>1) stop("More than one fpc value given")
    if (fpc<0) stop("Negative finite population correction")
    if (fpctype=="population" && fpc<n) stop("Population size smaller than sample size. No can do.")
    fpc <-switch(fpctype, population=(fpc-n)/fpc, fraction=1-fpc, correction=fpc)
 }

  if (compress){
      if(fpc==0 && getOption("survey.drop.replicates")) ## exhaustively sampled strata do not need replicates.
         repweights<-matrix(ncol=0,nrow=length(psu))
      else {
          repweights<-matrix(n/(n-1),n,n)
          diag(repweights)<-0
      }
      repweights<-list(weights=repweights, index=match(psu,unq))
      class(repweights)<-c("repweights_compressed","repweights")
      rval<-list(type="jk1", repweights=repweights,scale=fpc*(n-1)/n)
      rval
  } else {
      if(fpc==0 && getOption("survey.drop.replicates")) ## exhaustively sampled strata do not need replicates.
          return(list(type="jk1",repweights=matrix(ncol=0,nrow=length(psu)), scale=0))
      repweights<-outer(psu, unq, "!=")*n/(n-1)
      class(repweights)<-"repweights"
      rval<-list(type="jk1", repweights=repweights,scale=(fpc*(n-1)/n))
      rval
  }
}




jknweights<-function(strata,psu, fpc=NULL,
                     fpctype=c("population","fraction","correction"),
                     compress=TRUE, lonely.psu=getOption("survey.lonely.psu")){

  sunq<-unique(strata)
  unq<-unique(psu)
  nstrat<-length(sunq)
  n<-length(strata)

  lonely.psu<-match.arg(lonely.psu, c("fail","certainty","remove","adjust","average"))
  
  fpctype<-match.arg(fpctype)
  
  if (is.null(fpc)){
      fpc<-rep(1,nstrat)
      names(fpc)<-as.character(sunq)
      fpctype<-"correction"
  } else if (length(fpc)==n){
      if (length(unique(fpc))>nstrat)
          stop("More distinct fpc values than strata")
      fpc<-sapply(sunq, function(ss) fpc[match(ss,strata)])
      names(fpc)<-as.character(sunq)
  } else if (length(fpc)==1) {
      fpc<-rep(fpc,nstrat)
      names(fpc)<-as.character(sunq)
  } else if (length(fpc)==nstrat){
      nn<-names(fpc)
      if (is.null(nn)) names(fpc)<-as.character(sunq)
      if (!all(names(fpc) %in% as.character(sunq)))
          stop("fpc has names that do not match the stratum identifiers")
  }


  if (compress){
    repweights<-matrix(1,ncol=length(unq),nrow=length(unq))
  } else {
    repweights<-matrix(1,ncol=length(unq), nrow=length(psu))
  }
  counter<-0
  rscales<-numeric(length(unq))
  
  for(ss in as.character(sunq)){
      thisfpc<-fpc[match(ss,names(fpc))]
      theseweights<-jk1weights(psu[strata %in% ss], fpc=thisfpc,
                               fpctype=fpctype,compress=compress)
      nc<-if (compress) NCOL(theseweights$repweights$weights) else NCOL(theseweights$repweights)
      if (nc==1 && thisfpc!=0){
        ## lonely PSUs
        if (lonely.psu=="fail")
          stop("Stratum",ss,"has only one PSU")
        if (lonely.psu=="remove")
          next
        if (lonely.psu=="certainty")
          next
        if (lonely.psu=="average")
          next
        if (lonely.psu=="adjust"){
          nc<-1
          if (compress)
            repweights[, counter+nc]<-ifelse(strata[!duplicated(psu)] %in% ss, 0, nstrat/(nstrat-1))
          else
            repweights[ counter+nc]<-ifelse(strata %in% ss, 0, nstrat/(nstrat-1))
          rscales[counter+nc]<-(nstrat-1)/nstrat
          counter<-counter+nc
          next
        }
      }
      
      if (compress)
        repweights[strata[!duplicated(psu)] %in% ss,counter+seq(length=nc)]<-theseweights$repweights$weights
      else
        repweights[strata %in% ss, counter+seq(length=nc)]<-theseweights$repweights
      
      rscales[counter+seq(length=nc)]<-theseweights$scale
      counter<-counter+nc
  }
  if (counter==0) stop("All strata were exhaustively sampled: you have the whole population")
  scale<-1
  if (counter<length(unq)){
      repweights<-repweights[,1:counter]
      rscales<-rscales[1:counter]
      if (lonely.psu=="average")
        scale<-scale*length(unq)/counter
  }
  if (compress){
    repweights<-list(weights=repweights,index=match(psu,unq))
    class(repweights)<- c("repweights_compressed","repweights")
  } else class(repweights)<-"repweights"
  list(type="jkn", repweights=repweights, rscales=rscales, scale=scale)
}



brrweights<-function(strata,psu, match=NULL, small=c("fail","split","merge"),
                     large=c("split","merge","fail"), fay.rho=0,
                     only.weights=FALSE, compress=TRUE,
                     hadamard.matrix=NULL){

  small<-match.arg(small)
  large<-match.arg(large)

  strata<-as.character(strata)
  
  ssize<-table(strata[!duplicated(psu)])
  if (any(ssize<2) && small=="fail")
    stop("Some strata have fewer than 2 PSUs")
  if (any(ssize>2) && large=="fail")
    stop("Some strata have more than 2 PSUs")

  unq<-which(!duplicated(psu))
  sunq<-strata[unq]
  psunq<-psu[unq]
  weights<-matrix(ncol=2,nrow=length(unq))
  weightstrata<-numeric(length(unq))
  
  if (length(match)==length(strata))
    match<-match[unq]
  if (is.null(match))
    match<-unq  ## default is to match by dataset order
  oo<-order(sunq,match)

  upto <- 0
  
  if(any(ssize==1)){
    smallstrata<-names(ssize)[ssize==1]
    if(small=="split"){
      weights[sunq %in% smallstrata,1]<- 0.5
      weights[sunq %in% smallstrata,2]<- 0.5
      weightstrata[sunq %in% smallstrata]<-1:length(smallstrata)
      upto<-length(smallstrata)
    } else {
      ##small=="merge"
      if (length(smallstrata) > 1){
        weights[oo,][sunq[oo] %in% smallstrata, 1]<-rep(0:1,length.out=length(smallstrata))
        weights[oo,][sunq[oo] %in% smallstrata, 2]<-rep(1:0,length.out=length(smallstrata))
        if(length(smallstrata) %% 2==0)
          weightstrata[oo][sunq[oo] %in% smallstrata]<-rep(1:(length(smallstrata) %/%2), 2)
        else
          weightstrata[oo][sunq[oo] %in% smallstrata]<-c(1,rep(1:(length(smallstrata) %/%2), 2))
        upto<-length(smallstrata) %/% 2
      } else stop("Can't merge with a single small stratum")
    }
  }

  if (any(ssize>2)){
    largestrata<-names(ssize)[ssize>2]
    if (large=="split"){
      if (any(ssize[largestrata] %%2 ==1))
        stop("Can't split with odd numbers of PSUs in a stratum")
      ## make substrata of size 2
      for(ss in largestrata){
        weights[oo,][sunq[oo] %in% ss, 1]<-rep(0:1,length.out=ssize[ss])
        weights[oo,][sunq[oo] %in% ss, 2]<-rep(1:0,length.out=ssize[ss])
        weightstrata[oo][sunq[oo] %in% ss]<-upto+rep(1:(ssize[ss] %/%2),each=2)
        upto<-upto+(ssize[ss] %/% 2)
      }
    } else {
      ## make two substrata.
      halfsize<-ssize[largestrata] %/%2
      otherhalfsize<-ssize[largestrata] - halfsize
      reps<-as.vector(rbind(halfsize,otherhalfsize))
      nlarge<-length(halfsize)
      weights[oo,][sunq[oo] %in% largestrata, 1]<-rep(rep(0:1,nlarge),reps) 
      weights[oo,][sunq[oo] %in% largestrata, 2]<-rep(rep(1:0,nlarge),reps)
      weightstrata[oo][sunq[oo] %in% largestrata]<-upto+rep(1:length(largestrata),ssize[largestrata])
      upto<-upto+length(largestrata)
    }
  }
  if(any(ssize==2)){
    goodstrata<-names(ssize)[ssize==2]
    weights[oo,][sunq[oo] %in% goodstrata, 1]<-rep(0:1,length(goodstrata))
    weights[oo,][sunq[oo] %in% goodstrata, 2]<-rep(1:0,length(goodstrata))
    weightstrata[oo][sunq[oo] %in% goodstrata]<-upto+rep(1:length(goodstrata),each=2)
    upto<-upto+length(goodstrata)
  }
  

  if (is.null(hadamard.matrix)){
    H<-hadamard(upto)
  } else {
    ## the user supplied hadamard.matrix
    ## Check that it is a binary matrix and satifies the
    ## Hadamard determinant property
    if (!is.matrix(hadamard.matrix) || nrow(hadamard.matrix)<upto+1)
      stop("hadamard.matrix must be a matrix of dimension at least nstrata+1")
    values<-unique(as.vector(hadamard.matrix))
    if(length(values)!=2)
      stop("hadamard.matrix has more than two different values")
    H<-ifelse(hadamard.matrix==values[1],0,1)
    if(!is.hadamard(H,full.orthogonal.balance=FALSE))
        stop("hadamard.matrix is not a Hadamard matrix")
  }
  ii<-1:upto
  jj<-1:length(weightstrata)
  sampler<-function(i){
    h<-H[1+ii, i]+1
    col<-h[match(weightstrata,ii)]
    wa<-weights[cbind(jj,col)]
    wb<-weights[cbind(jj,3-col)]
    if (compress)
      wa*(2-fay.rho)+wb*fay.rho
    else
      wa[match(psu,psunq)]*(2-fay.rho)+wb[match(psu,psunq)]*fay.rho
  }

  if (only.weights){
    repweights<-sapply(1:NCOL(H),sampler)
    if (!compress){
      class(repweights)<-"repweights"
      return(repweights)
    }
    repweights<-list(weights=repweights,index=match(psu,psunq))
    class(repweights)<-c("repweights_compressed","repweights")
    repweights
  }  else 
  list(weights=weights, wstrata=weightstrata, strata=sunq, psu=psunq,
       npairs=NCOL(H),sampler=sampler,compress=compress)

}
  

  


##
## Designs with replication weights rather than survey structure.
##

as.svrepdesign<- function(design,...) UseMethod("as.svrepdesign")
as.svrepdesign.default<-function(design,type=c("auto","JK1","JKn","BRR","bootstrap","subbootstrap","mrbbootstrap","Fay"),
                          fay.rho=0, fpc=NULL, fpctype=NULL,...,compress=TRUE, mse=getOption("survey.replicates.mse")){
    if(!is.null(design$postStrata)){
        stop("postStratify, rake, or calibrate the design *after* adding replicate weights")
        }
  type<-match.arg(type)

  if (type=="auto"){
    if (!design$has.strata)
      type<-"JK1"
    else
      type<-"JKn"
  }
  selfrep<-NULL
  if (type=="JK1" && design$has.strata)
    stop("Can't use JK1 for a stratified design")
  if (type %in% c("JKn","BRR","Fay") && !design$has.strata)
    stop("Must use JK1 or bootstrap for an unstratified design")
  
  if (is.null(fpc)) {
    fpctype<-"population"
    
    if (is.null(design$fpc) ||
        (inherits(design, "survey.design2") && is.null(design$fpc$popsize))){
      fpc<-NULL
    } else if (type %in% c("Fay","BRR")){
      warning("Finite population correction dropped in conversion")
    } else {
      if (inherits(design,"survey.design2")){
        fpc<-design$fpc$popsize
        if(NCOL(fpc)>1 && type!="mrbbootstrap"){
          fpc<-fpc[,1]
          warning("Finite population corrections after first stage have been dropped")
        }
        if (getOption("survey.drop.replicates")){
          selfrep<-design$fpc$popsize[,1]==design$fpc$sampsize[,1]
        } 
      } else{
        fpc<-design$fpc[,2]
        names(fpc)<-design$fpc[,1]
      }
    }
  } else {
    if (type %in% c("Fay","BRR","subbootstrap")) stop(paste("fpc information cannot be used for type=",type))
    if (is.null(fpctype)) stop("fpctype must be supplied if fpc is supplied")
  }
  if (type=="JK1"){
    ##JK1
    r<-jk1weights(design$cluster[,1], fpc=fpc,fpctype=fpctype, compress=compress)
    repweights<-r$repweights
    scale<-drop(r$scale)
    if (inherits(repweights,"repweights_compressed"))
      rscales<-rep(1, NCOL(repweights$weights))
    else
      rscales<-rep(1, NCOL(repweights))
    
    type<-"JK1"
    pweights<-1/design$prob
  } else if (type %in% c("BRR","Fay")){
    ##BRR
    if (inherits(design,"survey.design2"))
      repweights<-brrweights(design$strata[,1], design$cluster[,1],...,fay.rho=fay.rho,
                             compress=compress,only.weights=TRUE)
    else
      repweights<-brrweights(design$strata, design$cluster[,1],...,fay.rho=fay.rho,
                             compress=compress,only.weights=TRUE)
    pweights<-1/design$prob
    if (length(pweights)==1)
      pweights<-rep(pweights, NROW(design$variables))
    
    if (fay.rho==0)
      type<-"BRR"
    else
      type<-"Fay"

    rscales<-rep(1,ncol(repweights))
    scale<-1/(ncol(repweights)*(1-fay.rho)^2)
    
  } else if (type=="JKn"){
    ##JKn
    if (inherits(design,"survey.design2"))
      r<-jknweights(design$strata[,1],design$cluster[,1], fpc=fpc,
                    fpctype=fpctype, compress=compress)
    else
      r<-jknweights(design$strata,design$cluster[,1], fpc=fpc,
                    fpctype=fpctype, compress=compress)
    pweights<-1/design$prob
    repweights<-r$repweights
    scale<-r$scale
    rscales<-r$rscales
  } else if (type=="bootstrap"){
    ##bootstrap
    if (inherits(design,"survey.design2"))
      r<-bootweights(design$strata[,1],design$cluster[,1], fpc=fpc,
                     fpctype=fpctype, compress=compress,...)
    else
      r<-bootweights(design$strata,design$cluster[,1], fpc=fpc,
                     fpctype=fpctype, compress=compress,...)
    pweights<-1/design$prob
    repweights<-r$repweights
    scale<-r$scale
    rscales<-r$rscales
  }else if (type=="subbootstrap"){
    ##bootstrap
    if (inherits(design,"survey.design2"))
      r<-subbootweights(design$strata[,1],design$cluster[,1],compress=compress,...)
    else
      r<-subbootweights(design$strata,design$cluster[,1],compress=compress,...)
    pweights<-1/design$prob
    repweights<-r$repweights
    scale<-r$scale
    rscales<-r$rscales
  } else if (type=="mrbbootstrap"){
    if (inherits(design,"survey.design2"))
      r<-mrbweights(design$cluster,design$strata,design$fpc,...)
    else
      stop("MRB bootstrap not available for obsolete svydesign objects")
    pweights<-1/design$prob
    repweights<-r$repweights
    scale<-r$scale
    rscales<-r$rscales
  } else stop("Can't happen")

  rval<-list(repweights=repweights, pweights=pweights,
             type=type, rho=fay.rho,scale=scale, rscales=rscales,
             call=sys.call(), combined.weights=FALSE, selfrep=selfrep,mse=mse)
  rval$variables <- design$variables
  class(rval)<-"svyrep.design"
  rval$degf<-degf(rval)
  rval
}



svrepdesign<-function(variables, repweights, weights,data=NULL,degf=NULL,...) UseMethod("svrepdesign",data)

svrepdesign.default<-function(variables=NULL,repweights=NULL, weights=NULL,
                              data=NULL, degf=NULL,type=c("BRR","Fay","JK1", "JKn","bootstrap",
                                               "ACS","successive-difference","JK2","other"),
                              combined.weights=TRUE, rho=NULL, bootstrap.average=NULL,
                              scale=NULL,rscales=NULL,fpc=NULL, fpctype=c("fraction","correction"),
                              mse=getOption("survey.replicates.mse"),...)
{
  
  type<-match.arg(type)
  
  if(type=="Fay" && is.null(rho))
    stop("With type='Fay' you must supply the correct rho")
  
  if (type %in% c("JK1","JKn","ACS","successive-difference","JK2")  && !is.null(rho))
    warning("rho not relevant to JK1 design: ignored.")
  
  if (type %in% c("other")  && !is.null(rho))
    warning("rho ignored.")
  
  if(is.null(variables))
    variables<-data
    
  if(inherits(variables,"formula")){
    mf<-substitute(model.frame(variables, data=data,na.action=na.pass))
    variables<-eval.parent(mf)
  }

  variables<-detibble(variables)
  
  if(inherits(repweights,"formula")){
    mf<-substitute(model.frame(repweights, data=data))
    repweights<-eval.parent(mf)
  }

  if(is.character(repweights)){##regular expression
    wtcols<-grep(repweights,names(data))
    repweights<-data[,wtcols]
  }
  
  if (is.null(repweights))
    stop("You must provide replication weights")

  if (anyNA(repweights))
        stop("Missing values not allowed in 'repweights'")
   
   
  repweights<-detibble(repweights)
  
  if(inherits(weights,"formula")){
    mf<-substitute(model.frame(weights, data=data))
    weights<-eval.parent(mf)
     if (anyNA(weights))
        stop("Missing values not allowed in 'weights'")
    weights<-drop(as.matrix(weights))
  }

  if (is.null(weights)){
    warning("No sampling weights provided: equal probability assumed")
    weights<-rep(1,NROW(repweights))
  }

    if (!is.null(degf)){
        if (!is.numeric(degf)) stop("degf must be NULL or numeric")
        if (degf>ncol(repweights)) warning(paste0("degf (", degf,") is larger than number of replicates (",ncol(repweights),")"))
        if (degf<=1) warning("degf is <=1")
        }

  repwtmn<-mean(apply(repweights,2,mean))
  wtmn<-mean(weights)
  probably.combined.weights<-(repwtmn>5) & (wtmn/repwtmn<5)
  probably.not.combined.weights<-(repwtmn<5) & (wtmn/repwtmn>5)
  if (combined.weights & probably.not.combined.weights)
    warning(paste("Data do not look like combined weights: mean replication weight is", repwtmn," and mean sampling weight is",wtmn))
  if (!combined.weights & probably.combined.weights)
    warning(paste("Data look like combined weights: mean replication weight is", repwtmn," and mean sampling weight is",wtmn))

  if (!is.null(rscales) && !(length(rscales) %in% c(1, ncol(repweights)))){
    stop(paste("rscales has length ",length(rscales),", should be ncol(repweights)",sep=""))
  }

    if (type %in% c("ACS", "successive-difference")){
        if(!is.null(scale) | !is.null(rscales))
            warning(paste("with type",type,"scale= and rscales= are not needed and will be ignored"))
    }
  
    if (type == "BRR"){
        ## the default, so check it hasn't been accidentally defaulted to
        if (!is.null(scale)) warning("type='BRR' does not use 'scale=' argument")
        if (!is.null(rho))  warning("type='BRR' does not use 'rho=' argument, you may want type='Fay'")
      scale<-1/ncol(repweights)
      }
  if (type=="Fay")
    scale <-1/(ncol(repweights)*(1-rho)^2)

  if (type=="bootstrap"){
    if(is.null(bootstrap.average))
      bootstrap.average<-1
    if (is.null(scale))
      scale<-bootstrap.average/(ncol(repweights)-1)
    if (is.null(rscales))
      rscales<-rep(1,ncol(repweights))
    }

  if (type=="JK1" && is.null(scale)) {
    if(!combined.weights){
      warning("scale (n-1)/n not provided: guessing from weights")
      scale<-1/max(repweights[,1])
    } else {
      probably.n = ncol(repweights)
      scale<- (probably.n-1)/probably.n
      warning("scale (n-1)/n not provided: guessing n=number of replicates")
    }
  }

  if (type =="JKn" && is.null(rscales))
    if (!combined.weights) {
      warning("rscales (n-1)/n not provided:guessing from weights")
      rscales<-1/apply(repweights,2,max)
    } else stop("Must provide rscales for combined JKn weights")

    if (type %in% c("ACS","successive-difference")){
        rscales<-rep(1, ncol(repweights))
        scale<-4/ncol(repweights)
    }

    if (type =="JK2"){
        warning(paste("with type",type,"scale= and rscales= are not needed and will be ignored"))
        rscales<-rep(1, ncol(repweights))
        scale<-1
    }
    

  if (type=="other" && (is.null(rscales) || is.null(scale))){
    if (is.null(rscales)) rscales<-rep(1,NCOL(repweights))
    if (is.null(scale)) scale<-1
    warning("scale or rscales not specified, set to 1")
  }
  if (is.null(rscales)) rscales<-rep(1,NCOL(repweights))

  if (!is.null(fpc)){
      if (missing(fpctype)) stop("Must specify fpctype")
      fpctype<-match.arg(fpctype)
      if (type %in% c("BRR","Fay","JK2","ACS","successive-difference")) stop("fpc not available for this type")
      if (type %in% "bootstrap") stop("Separate fpc not needed for bootstrap")
      if (length(fpc)!=length(rscales)) stop("fpc is wrong length")
      if (any(fpc>1) || any(fpc<0)) stop("Illegal fpc value")
      fpc<-switch(fpctype,correction=fpc,fraction=1-fpc)
      rscales<-rscales*fpc
  }

    if (is.null(scale)) scale<-1
  
  rval<-list(type=type, scale=scale, rscales=rscales,  rho=rho,call=sys.call(),
             combined.weights=combined.weights)
  rval$variables<-variables
  rval$pweights<-weights
  if (!inherits(repweights,"repweights"))
    class(rval)<-"repweights"
  rval$repweights<-repweights
  class(rval)<-"svyrep.design"
    if (!is.null(degf))
        rval$degf<-degf
    else
        rval$degf<-degf(rval)
  rval$mse<-mse
  rval
  
}


print.svyrep.design<-function(x,...){
  cat("Call: ")
  print(x$call)
  if (x$type=="Fay")
    cat("Fay's variance method (rho=",x$rho,") ")
  if (x$type=="BRR")
    cat("Balanced Repeated Replicates ")
  if (x$type=="JK1")
    cat("Unstratified cluster jacknife (JK1) ")
  if (x$type=="JKn")
    cat("Stratified cluster jackknife (JKn) ")
  if (x$type=="bootstrap")
    cat("Survey bootstrap ")
  if (x$type=="mrbbootstrap")
    cat("Multistage rescaled bootstrap ")
  if (x$type=="subbootstrap")
    cat("(n-1) bootstrap ")
  nweights<-ncol(x$repweights)
  cat("with", nweights,"replicates")
  if (!is.null(x$mse) && x$mse) cat(" and MSE variances")
  cat(".\n")
  invisible(x)
}

summary.svyrep.design<-function(object,...){
  class(object)<-c("summary.svyrep.design", class(object))
  object
}

print.summary.svyrep.design<-function(x,...){
  class(x)<-class(x)[-1]
  print(x)
  cat("Variables: \n")
  print(colnames(x))
}



image.svyrep.design<-function(x, ..., col=grey(seq(.5,1,length=30)),
                              type.=c("rep","total")){
  type<-match.arg(type.)
  m<-as.matrix(x$repweights)
  if (type=="total"){
    m<-m*x$pweights
  } 
  
  image(1:NCOL(m), 1:NROW(m), t(m),  col=col, xlab="Replicate", ylab="Observation",...)
  invisible(NULL)
}

"[.svyrep.design"<-function(x, i, j, drop=FALSE){
  if (!missing(i)){
    pwt<-x$pweights
    if (is.data.frame(pwt)) pwt<-pwt[[1]]
    x$pweights<-pwt[i]
    x$repweights<-x$repweights[i,,drop=FALSE]
    if(!is.null(x$selfrep))
        x$selfrep<-x$selfrep[i]
    if (!missing(j))
      x$variables<-x$variables[i,j, drop=FALSE]
    else
      x$variables<-x$variables[i,,drop=FALSE]
    x$degf<-NULL
    x$degf<-degf(x)
  } else {
    x$variables<-x$variables[,j,drop=FALSE]
  }
  x
}


subset.svyrep.design<-function(x,subset,...){
        e <- substitute(subset)
        r <- eval(e, x$variables, parent.frame())
        r <- r & !is.na(r) 
        x<-x[r,]
	x$call<-sys.call(-1)
	x
}

update.svyrep.design<-function(object,...){

  dots<-substitute(list(...))[-1]
  newnames<-names(dots)
  
  for(j in seq(along=dots)){
    object$variables[,newnames[j]]<-eval(dots[[j]],object$variables, parent.frame())
  }
  
  object$call<-sys.call(-1)
  object 
}

weights.svyrep.design<-function(object,type=c("replication","sampling","analysis"),...){
  type<-match.arg(type)
  switch(type,
         replication= as.matrix(object$repweights),
         sampling=object$pweights,
         analysis=if(object$combined.weights) as.matrix(object$repweights) else as.matrix(object$repweights)*object$pweights)
}

weights.survey.design<-function(object,...){
  return(1/object$prob)
}


svrVar<-function(thetas, scale, rscales,na.action=getOption("na.action"),mse=getOption("survey.replicates.mse"),coef){
  thetas<-get(na.action)(thetas)
  naa<-attr(thetas,"na.action")
  if (!is.null(naa)){
    rscales<-rscales[-naa]
    if (length(rscales))
      warning(length(naa), " replicates gave NA results and were discarded.")
    else
      stop("All replicates contained NAs")
  }
  if (is.null(mse)) mse<-FALSE
 
  if (length(dim(thetas))==2){
    if (mse) {
      meantheta<-coef
    } else {
      meantheta<-colMeans(thetas[rscales>0,,drop=FALSE])
    }
    v<-crossprod( sweep(thetas,2, meantheta,"-")*sqrt(rscales))*scale
  }  else {
    if (mse){
      meantheta<-coef
    } else {
      meantheta<-mean(thetas[rscales>0])
    }
    v<- sum( (thetas-meantheta)^2*rscales)*scale
  }
  attr(v,"na.replicates")<-naa
  attr(v,"means")<-meantheta
  return(v)
}


svyvar.svyrep.design<-svrepvar<-function(x, design, na.rm=FALSE, rho=NULL,
                                         return.replicates=FALSE,...,estimate.only=FALSE){

  if (!exists(".Generic",inherits=FALSE))
    .Deprecated("svyvar")
  
  if (inherits(x,"formula"))
    x<-model.frame(x,design$variables,na.action=na.pass)
  else if(typeof(x) %in% c("expression","symbol"))
    x<-eval(x, design$variables)

  wts<-design$repweights
  scale<-design$scale
  rscales<-design$rscales
  if (!is.null(rho)) .NotYetUsed("rho")
  
  x<-as.matrix(x)
  
  if (na.rm){
    nas<-rowSums(is.na(x))
    if(any(nas>0)){
      design<-design[nas==0,]
      x<-x[nas==0,,drop=FALSE]
      wts<-wts[nas==0,,drop=FALSE]
    }
  }
  
  if (design$combined.weights)
    pw<-1
  else
    pw<-design$pweights

  n<-NROW(x)
  p<-NCOL(x)
  v<-function(w){
    xbar<-colSums(as.vector(w)*pw*x)/sum(as.vector(w)*pw)
    xdev<-sweep(x,2,xbar,"-")
    x1<-matrix(rep(xdev,p),ncol=p*p)
    x2<-xdev[,rep(1:p,each=p),drop=FALSE]
    (n/(n-1))*colSums(x1*x2*as.vector(w)*pw)/sum(as.vector(w)*pw)
  }

  if (design$combined.weights)
    rval<-v(design$pweights)
  else
    rval<-v(rep(1,length(design$pweights)))

  rval<-matrix(rval, ncol=p)
  dimnames(rval)<-list(colnames(x),colnames(x))
  if (estimate.only) return(rval)
  
  repvars<-apply(wts,2, v)
  
  repvars<-drop(t(repvars))
  attr(rval,"var")<-svrVar(repvars, scale, rscales,mse=design$mse, coef=rval)
  attr(rval, "statistic")<-"variance"
  if (return.replicates){
    attr(repvars,"scale")<-design$scale
    attr(repvars,"rscales")<-design$rscales
    attr(repvars,"mse")<-design$mse
    rval<-list(variance=rval, replicates=repvars)
  }
  class(rval)<-c("svrepvar","svrepstat")
  rval

}


print.svrepvar<-function (x,  covariance=FALSE, ...) 
{
    if (is.list(x)) x<-x[[1]]
    vv <- as.matrix(attr(x, "var"))
    if (covariance){
      nms<-outer(rownames(x),colnames(x),paste,sep=":")
      m<-cbind(as.vector(x), sqrt(diag(vv)))
      rownames(m)<-nms
    } else{
      ii <- which(diag(sqrt(length(x)))>0)
      m <- cbind(x[ii], sqrt(diag(vv))[ii])
      if(length(ii)==1) rownames(m)<-rownames(x)
    }
    colnames(m) <- c(attr(x, "statistic"), "SE")
    printCoefmat(m)
}

as.matrix.svrepvar<-function(x,...) if (is.list(x)) unclass(x[[1]]) else unclass(x)


svymean.svyrep.design<-svrepmean<-function(x,design, na.rm=FALSE, rho=NULL,
                                           return.replicates=FALSE,deff=FALSE,...)
{
  if (!exists(".Generic",inherits=FALSE))
    .Deprecated("svymean")
  if (!inherits(design,"svyrep.design")) stop("design is not a replicate survey design")
  
  if (inherits(x,"formula")){
    ## do the right thing with factors
    mf<-model.frame(x,design$variables,na.action=na.pass)
    xx<-lapply(attr(terms(x),"variables")[-1],
               function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
    cols<-sapply(xx,NCOL)
    x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
    scols<-c(0,cumsum(cols))
    for(i in 1:length(xx)){
      x[,scols[i]+1:cols[i]]<-xx[[i]]
    }
    colnames(x)<-do.call("c",lapply(xx,colnames))
  } else {
      if(typeof(x) %in% c("expression","symbol"))
          x<-eval(x, design$variables)
      else {
          if(is.data.frame(x) && any(sapply(x,is.factor))){
              xx<-lapply(x, function(xi) {if (is.factor(xi)) 0+(outer(xi,levels(xi),"==")) else xi})
              cols<-sapply(xx,NCOL)
              scols<-c(0,cumsum(cols))
              cn<-character(sum(cols))
              for(i in 1:length(xx))
                  cn[scols[i]+1:cols[i]]<-paste(names(x)[i],levels(x[[i]]),sep="")
              x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
              for(i in 1:length(xx)){
                  x[,scols[i]+1:cols[i]]<-xx[[i]]
              }
              colnames(x)<-cn
          }
      }
  } 
  
  x<-as.matrix(x)
  
  if (na.rm){
    nas<-rowSums(is.na(x))
    design<-design[nas==0,]
    x<-x[nas==0,,drop=FALSE]
  }
  
  wts<-design$repweights
  scale<-design$scale
  rscales<-design$rscales
  if (!is.null(rho)) .NotYetUsed("rho")
  
  if (!design$combined.weights)
    pw<-design$pweights
  else
    pw<-1
  
  rval<-colSums(design$pweights*x)/sum(design$pweights)

  if (getOption("survey.drop.replicates") && !is.null(design$selfrep) && all(design$selfrep)){
    v<-matrix(0,length(rval),length(rval))
    repmeans<-NULL
  } else {
  if (inherits(wts, "repweights_compressed")){
    repmeans<-matrix(ncol=NCOL(x), nrow=ncol(wts$weights))
    for(i in 1:ncol(wts$weights)){
      wi<-wts$weights[wts$index,i]
      repmeans[i,]<-t(colSums(wi*x*pw)/sum(pw*wi))
    }
  } else {
    repmeans<-matrix(ncol=NCOL(x), nrow=ncol(wts))
    for(i in 1:ncol(wts)){
      repmeans[i,]<-t(colSums(wts[,i]*x*pw)/sum(pw*wts[,i]))
    }
  }
  repmeans<-drop(repmeans)
  v <- svrVar(repmeans, scale, rscales,mse=design$mse, coef=rval)
}
  attr(rval,"var") <-v
  attr(rval, "statistic")<-"mean"
  if (return.replicates){
    attr(repmeans,"scale")<-design$scale
    attr(repmeans,"rscales")<-design$rscales
    attr(repmeans,"mse")<-design$mse
    rval<-list(mean=rval, replicates=repmeans)
  }
  if (is.character(deff) || deff){
      nobs<-length(design$pweights)
      npop<-sum(design$pweights)
      vsrs<-unclass(svyvar(x,design,na.rm=na.rm, return.replicates=FALSE,estimate.only=TRUE))/length(design$pweights)
      if (deff!="replace")
        vsrs<-vsrs*(npop-nobs)/npop
      attr(rval,"deff") <- v/vsrs
  }
  class(rval)<-"svrepstat"
  rval
}



svytotal.svyrep.design<-svreptotal<-function(x,design, na.rm=FALSE, rho=NULL,
                                             return.replicates=FALSE, deff=FALSE,...)
{
 if (!exists(".Generic",inherits=FALSE))
    .Deprecated("svytotal")
 if (!inherits(design,"svyrep.design")) stop("design is not a replicate survey design")
 
 if (inherits(x,"formula")){
     ## do the right thing with factors
     mf<-model.frame(x,design$variables,na.action=na.pass)
     xx<-lapply(attr(terms(x),"variables")[-1],
                function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
    cols<-sapply(xx,NCOL)
     x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
     scols<-c(0,cumsum(cols))
     for(i in 1:length(xx)){
         x[,scols[i]+1:cols[i]]<-xx[[i]]
     }
     colnames(x)<-do.call("c",lapply(xx,colnames))
 }  else{
      if(typeof(x) %in% c("expression","symbol"))
          x<-eval(x, design$variables)
      else {
          if(is.data.frame(x) && any(sapply(x,is.factor))){
              xx<-lapply(x, function(xi) {if (is.factor(xi)) 0+(outer(xi,levels(xi),"==")) else xi})
              cols<-sapply(xx,NCOL)
              scols<-c(0,cumsum(cols))
              cn<-character(sum(cols))
              for(i in 1:length(xx))
                  cn[scols[i]+1:cols[i]]<-paste(names(x)[i],levels(x[[i]]),sep="")
              x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
              for(i in 1:length(xx)){
                  x[,scols[i]+1:cols[i]]<-xx[[i]]
              }
              colnames(x)<-cn
          }
      }
  }
  
 
 
  x<-as.matrix(x)
 
 if (na.rm){
     nas<-rowSums(is.na(x))
     design<-design[nas==0,]
     x<-x[nas==0,,drop=FALSE]
 }
  
 wts<-design$repweights
 scale<-design$scale
 rscales<-design$rscales
 if (!is.null(rho)) .NotYetUsed("rho")
 
 if (!design$combined.weights)
     pw<-design$pweights
  else
    pw<-1
 
 rval<-colSums(design$pweights*x)

 if (is.character(deff) || deff){
     nobs<-length(design$pweights)
     npop<-sum(design$pweights)
     vsrs<-unclass(svyvar(x,design,na.rm=na.rm, return.replicates=FALSE,estimate.only=TRUE))*sum(design$pweights)^2/nobs
     if (deff!="replace")
         vsrs<-vsrs*(npop-nobs)/npop
 }

 if (getOption("survey.drop.replicates") && !is.null(design$selfrep) && all(design$selfrep)){
   v<-matrix(0,nrow=NROW(rval),ncol=NROW(rval))
   repmeans<-NULL
 } else {
   if (inherits(wts, "repweights_compressed")){
     if (getOption("survey.drop.replicates") && !is.null(design$selfrep)){
       wts$index<-wts$index[!design$selfrep]
       x<-x[!design$selfrep,,drop=FALSE]
       pw<-pw[!design$selfrep]
     }
     repmeans<-matrix(ncol=NCOL(x), nrow=ncol(wts$weights))
     for(i in 1:ncol(wts$weights)){
       wi<-wts$weights[wts$index,i]
       repmeans[i,]<-t(colSums(wi*x*pw))
     }
 } else {
   if (getOption("survey.drop.replicates") && !is.null(design$selfrep)){
     wts<-wts[!design$selfrep,,drop=FALSE]
     x<-x[!design$selfrep,,drop=FALSE]
     pw<-pw[!design$selfrep]
   }
   repmeans<-matrix(ncol=NCOL(x), nrow=ncol(wts))
   for(i in 1:ncol(wts)){
     repmeans[i,]<-t(colSums(wts[,i]*x*pw))
   }
 }
   repmeans<-drop(repmeans)
   v <- svrVar(repmeans, scale, rscales,mse=design$mse,coef=rval)
 }
attr(rval,"var") <- v
attr(rval, "statistic")<-"total"
if (return.replicates){
  attr(repmeans,"scale")<-design$scale
  attr(repmeans,"rscales")<-design$rscales
  attr(repmeans,"mse")<-design$mse 
  rval<-list(mean=rval, replicates=repmeans)
}
 
if (is.character(deff) || deff)
  attr(rval,"deff") <- v/vsrs
class(rval)<-"svrepstat"
rval
}



svycoxph.svyrep.design<-function(formula, design, subset=NULL,rescale=NULL,...,return.replicates=FALSE,na.action,
                                 multicore=getOption("survey.multicore")){
  subset<-substitute(subset)
  subset<-eval(subset, design$variables, parent.frame())
  if (!is.null(subset))
    design<-design[subset,]
  if (multicore && !requireNamespace(parallel,quietly=TRUE))
    multicore<-FALSE
  
  data<-design$variables
  
  
  g<-match.call()
  g$design<-NULL
  g$return.replicates<-NULL
  g$weights<-quote(.survey.prob.weights)
  g[[1]]<-quote(coxph)
  g$x<-TRUE
  
  scale<-design$scale
  rscales<-design$rscales

  if (is.null(rescale))
      pwts<-design$pweights/mean(design$pweights)
  else if (rescale)
      pwts<-design$pweights/sum(design$pweights)
  
  if (is.data.frame(pwts)) pwts<-pwts[[1]]
  
  if (!all(all.vars(formula) %in% names(data))) 
    stop("all variables must be in design= argument")
  .survey.prob.weights<-pwts
  full<-with(data,eval(g))
  if (inherits(full, "coxph.penal"))
      warning("svycoxph does not support penalised terms")
 
  nas<-attr(full$model, "na.action")
  
  betas<-matrix(ncol=length(coef(full)),nrow=ncol(design$repweights))
  
  wts<-design$repweights
  
  if (!design$combined.weights){
    pw1<-pwts
    rwt<-pw1/mean(pw1)
  } else{
    rwt<-1/mean(as.vector(wts[,1]))
    pw1<-rwt
  }
  
  if (length(nas))
    wts<-wts[-nas,]
  beta0<-coef(full)

  ## coxph doesn't allow zero weights
  EPSILON<-1e-10

  if(full$method %in% c("efron","breslow")){
    if (attr(full$y,"type")=="right")
      fitter<-coxph.fit
    else if(attr(full$y,"type")=="counting")
      fitter<-survival::agreg.fit
    else stop("invalid survival type")
  } else fitter<-survival::agexact.fit
                 
##  g$init<-beta0
## for(i in 1:ncol(wts)){
##    .survey.prob.weights<-as.vector(wts[,i])*pw1+EPSILON
##    betas[i,]<-with(data,coef(eval(g)))
##  }
  if (multicore){
    betas<-do.call(rbind, parallel::mclapply(1:ncol(wts), function(i){
      fitter(full$x, full$y, full$strata, full$offset,
             coef(full), coxph.control(),
             as.vector(wts[,i])*pw1+EPSILON,
             full$method,  names(full$resid))$coef
    }))
  }else{
    for(i in 1:ncol(wts)){
      betas[i,]<-fitter(full$x, full$y, full$strata, full$offset,
                        coef(full), coxph.control(),
                        as.vector(wts[,i])*pw1+EPSILON,
                        full$method,  names(full$resid))$coef
      
    }
  }
  
  if (length(nas))
    design<-design[-nas,]
  
  v<-svrVar(betas,scale, rscales, mse=design$mse, coef=beta0)
  
  full$var<-v
  if (return.replicates){
    attr(betas,"scale")<-design$scale
    attr(betas,"rscales")<-design$rscales
    attr(betas,"mse")<-design$mse
    full$replicates<-betas
  }
  full$naive.var<-NULL
  full$wald.test<-coef(full)%*%solve(full$var,coef(full))
  full$loglik<-c(NA,NA)
  full$rscore<-NULL
  full$score<-NA
  full$degf.residual<-degf(design)+1-length(coef(full)[!is.na(coef(full))])
  
  class(full)<-c("svrepcoxph","svycoxph",class(full))
  full$call<-match.call()
  full$printcall<-sys.call(-1)
  full$survey.design<-design
  
  full
}

svrepglm<-svyglm.svyrep.design<-function(formula, design, subset=NULL,family=stats::gaussian(),start=NULL, rescale=NULL, ...,
                                         rho=NULL, return.replicates=FALSE, na.action,
                                         multicore=getOption("survey.multicore")){

  if (!exists(".Generic",inherits=FALSE))
    .Deprecated("svyglm")
  
  subset<-substitute(subset)
    subset<-eval(subset, design$variables, parent.frame())
    if (any(is.na(subset)))
        stop("subset must not contain NA values")

  if (!is.null(subset))
    design<-design[subset,]

  if(multicore && !requireNamespace("parallel",quietly=TRUE))
    multicore<-FALSE
  
  data<-design$variables
  

  g<-match.call()
  formula<-eval.parent(formula)
  environment(formula)<-environment()
  g$formula<-formula
  g$data<-quote(data)
  g$design<-NULL
  g$var<-g$rho<-g$return.replicates<-g$multicore<-NULL
  g$weights<-quote(.survey.prob.weights)
  g[[1]]<-quote(glm)      
  g$model<-TRUE
  g$x<-TRUE
    g$y<-TRUE
    g$rescale<-NULL
  
      scale<-design$scale
      rscales<-design$rscales
      if (!is.null(rho)) .NotYetUsed(rho)

      if (is.null(rescale))
          pwts<-design$pweights/mean(design$pweights)
      else if (rescale)  ## old behaviour
          pwts<-design$pweights/sum(design$pweights)
      else
          pwts<-design$pweights ## no rescaling

      if (is.data.frame(pwts)) pwts<-pwts[[1]]
      
      if (!all(all.vars(formula) %in% names(data))) 
	stop("all variables must be in design= argument")
      .survey.prob.weights<-pwts
      full<-with(data,eval(g))
  
      full$naive.cov<-summary(full)$cov.unscaled
  
      nas<-attr(full$model, "na.action")

      if(getOption("survey.drop.replicates") && !is.null(design$selfrep) && all(design$selfrep)){

          v<-matrix(0,ncol=length(coef(full)),nrow=length(coef(full)))
          betas<-NULL

      } else {
          betas<-matrix(ncol=length(coef(full)),
                        nrow=ncol(design$repweights))
          
          if (!design$combined.weights)
              pw1<-pwts
          else
              pw1<-rep(1,length(pwts))
          wts<-design$repweights
          if (length(nas)){
              wts<-wts[-nas,]
              pw1<-pw1[-nas]
          }
          XX<-full$x
          YY<-full$y
          beta0<-coef(full)
          if (!all(is.finite(beta0))) stop(paste("Infinite/NA values in estimate (",paste(beta0,collapse=","),")"))
          if(is.null(full$offset))
          offs<-rep(0,nrow(XX))
          else
              offs<-full$offset
          incpt<-as.logical(attr(terms(full),"intercept"))
          fam<-full$family
          contrl<-full$control
          if (multicore){
            betas<-do.call(rbind,parallel::mclapply(1:ncol(wts), function(i){
              wi<-as.vector(wts[,i])*pw1
              glm.fit(XX, YY, weights = wi/sum(wi),
                      start =beta0,
                      offset = offs,
                      family = fam, control = contrl,
                      intercept = incpt)$coefficients
              
            }))
          } else {
            for(i in 1:ncol(wts)){
              wi<-as.vector(wts[,i])*pw1
              betas[i,]<-glm.fit(XX, YY, weights = wi/sum(wi),
                                 start =beta0,
                                 offset = offs,
                                 family = fam, control = contrl,
                                 intercept = incpt)$coefficients
            }
          }
          v<-svrVar(betas,scale, rscales,mse=design$mse,coef=beta0)
  }

  full$x<-NULL
  full$df.residual<-degf(design)+1-length(coef(full)[!is.na(coef(full))])
  
  if (length(nas))
      design<-design[-nas,]

  full$cov.unscaled<-v
  if (return.replicates){
    attr(betas,"scale")<-design$scale
    attr(betas,"rscales")<-design$rscales
    attr(betas,"mse")<-design$mse
    full$replicates<-betas
  }  
  class(full)<-c("svrepglm", "svyglm", class(full))
    
  full$call<-sys.call(-1)
  if(!("formula" %in% names(full$call))) {
    if (is.null(names(full$call)))
      i<-1
    else
      i<-min(which(names(full$call)[-1]==""))
    names(full$call)[i+1]<-"formula"
  }
  full$survey.design<-design
  full
}


print.summary.svyglm<-function (x, digits = max(3, getOption("digits") - 3),
                                symbolic.cor = x$symbolic.cor, 
    signif.stars = getOption("show.signif.stars"), ...) 
{
  ##if (!exists("printCoefmat")) printCoefmat<-print.coefmat

  cat("\nCall:\n")
    cat(paste(deparse(x$call), sep = "\n", collapse = "\n"), 
        "\n\n", sep = "") 

    cat("Survey design:\n")
    print(x$survey.design$call)
   
        if (!is.null(df <- x$df) && (nsingular <- df[3] - df[1])) 
            cat("\nCoefficients: (", nsingular, " not defined because of singularities)\n", 
                sep = "")
        else cat("\nCoefficients:\n")
        coefs <- x$coefficients
        if (!is.null(aliased <- is.na(x$coefficients[,1])) && any(aliased)) {
            cn <- names(aliased)
            coefs <- matrix(NA, length(aliased), 4, dimnames = list(cn, 
                colnames(coefs)))
            coefs[!aliased, ] <- x$coefficients
        }
        printCoefmat(coefs, digits = digits, signif.stars = signif.stars, 
            na.print = "NA", ...)
    if (x$df.resid<=0)
        cat("\nZero or negative residual df; p-values not defined\n")
    cat("\n(Dispersion parameter for ", x$family$family, " family taken to be ", 
        format(x$dispersion), ")\n\n",  "Number of Fisher Scoring iterations: ", 
        x$iter, "\n", sep = "")
    correl <- x$correlation
    if (!is.null(correl)) {
        p <- NCOL(correl)
        if (p > 1) {
            cat("\nCorrelation of Coefficients:\n")
            if (is.logical(symbolic.cor) && symbolic.cor) {
                print(symnum(correl, abbr.colnames = NULL))
            }
            else {
                correl <- format(round(correl, 2), nsmall = 2, 
                  digits = digits)
                correl[!lower.tri(correl)] <- ""
                print(correl[-1, -p, drop = FALSE], quote = FALSE)
            }
        }
    }
    cat("\n")
    invisible(x)
}



predict.svrepglm <- function(object, newdata=NULL, total=NULL,
                           type = c("link", "response","terms"),
                           se.fit=(type!="terms"),
                           vcov=FALSE, return.replicates=!is.null(object$replicates),...){
    if(is.null(newdata))
      newdata<-model.frame(object$survey.design)
    type<-match.arg(type)
    if (type=="terms")
      return(predterms(object,se=se.fit,...))
    tt<-delete.response(terms(formula(object)))
    mf<-model.frame(tt,data=newdata)
    mm<-model.matrix(tt,mf)
    if (!is.null(total) && attr(tt,"intercept")){
        mm[,attr(tt,"intercept")]<-mm[,attr(tt,"intercept")]*total
    }
    eta<-drop(mm %*% coef(object))
    d<-drop(object$family$mu.eta(eta))
    eta<-switch(type, link=eta, response=object$family$linkinv(eta))
    if(se.fit){
        if(vcov){
            vv<-mm %*% vcov(object) %*% t(mm)
            attr(eta,"var")<-switch(type,
                                    link=vv,
                                    response=d*(t(vv*d)))
        } else {
            ## FIXME make this more efficient
            vv<-drop(rowSums((mm %*% vcov(object)) * mm))
            attr(eta,"var")<-switch(type,
                                    link=vv,
                                    response=drop(d*(t(vv*d))))
        }
    }
    attr(eta,"statistic")<-type

    if (return.replicates){
      if (is.null(object$replicates)) {
        warning("replicates are not present in the fit")
      } else{
        pred.replicates<-t(apply(object$replicates,1, function(beta){
          etai<-drop(mm %*% beta)
          switch(type, link=etai, response=object$family$linkinv(etai))
        }))
        attr(pred.replicates,"scale")<-attr(object$replicates,"scale")
        attr(pred.replicates,"rscales")<-attr(object$replicates,"rscales")
        attr(pred.replicates,"mse")<-attr(object$replicates,"mse")
        eta<-list(eta,replicates=pred.replicates)
      }
    }

    class(eta)<-"svrepstat"
    eta
  }
    

    

svyratio.svyrep.design<-svrepratio<-function(numerator=formula,denominator, design,
                                             na.rm=FALSE,formula,covmat=FALSE,
                                             return.replicates=FALSE,deff=FALSE,...){

  if (!exists(".Generic"))
    .Deprecated("svyratio")
  
  if (!inherits(design, "svyrep.design")) stop("design must be a svyrepdesign object")
  
  if (inherits(numerator,"formula"))
    numerator<-model.frame(numerator,design$variables, na.action=na.pass)
  else if(typeof(numerator) %in% c("expression","symbol"))
    numerator<-eval(numerator, design$variables)
  if (inherits(denominator,"formula"))
    denominator<-model.frame(denominator,design$variables, na.action=na.pass)
  else if(typeof(denominator) %in% c("expression","symbol"))
    denominator<-eval(denominator, design$variables)
  
  nn<-NCOL(numerator)
  nd<-NCOL(denominator)
  
  all<-cbind(numerator,denominator)
  nas<-!complete.cases(all)
  if (na.rm==TRUE){
      design<-design[!nas,]
      all<-all[!nas,,drop=FALSE]
      numerator<-numerator[!nas,,drop=FALSE]
      denominator<-denominator[!nas,,drop=FALSE]
  }
  allstats<-svymean(all, design, return.replicates=TRUE)
  
  rval<-list(ratio=outer(allstats$mean[1:nn], allstats$mean[nn+1:nd], "/"))
  
  if (is.null(allstats$replicates)){
    ##only self-representing strata.
    vars<-matrix(0,nrow=nn,ncol=nd)
  }else {
    vars<-matrix(nrow=nn,ncol=nd)
    if (deff) deffs<-matrix(nrow=nn,ncol=nd)
    for(i in 1:nn){
      for(j in 1:nd){
        vars[i,j]<-svrVar(allstats$replicates[,i]/allstats$replicates[,nn+j],
                          design$scale, design$rscales,mse=design$mse,coef=rval$ratio[i,j])
        if (deff=="replace" || deff)
          deffs[i,j]<-deff(svytotal(numerator[,i]-rval$ratio[i,j]*denominator[,j],design,deff=deff))
      }
    }
  }
  if (covmat){
      if (is.null(allstats$replicates))
          vcovmat<-matrix(0,nn*nd,nn*nd)
      else
          vcovmat<-as.matrix(svrVar(allstats$replicates[,rep(1:nn,nd)]/allstats$replicates[,nn+rep(1:nd,each=nn)],
                          design$scale, design$rscales,mse=design$mse,coef=as.vector(rval$ratio)))
      rownames(vcovmat)<-names(numerator)[rep(1:nn,nd)]
      colnames(vcovmat)<-names(denominator)[rep(1:nd,each=nn)]
      rval$vcov<-vcovmat
  }
  if (return.replicates) {
    reps<-allstats$replicates[, rep(1:nn, nd)]/allstats$replicates[, nn + rep(1:nd, each = nn)]
    attr(reps,"scale")<-design$scale
    attr(reps,"rscales")<-design$rscales
    attr(reps,"mse")<-design$mse
    rval$replicates<-reps
  }
  rval$var<-vars
  attr(rval,"call")<-sys.call()
  if (deff=="replace" || deff) attr(rval,"deff")<-deffs
  class(rval)<-"svyratio"
  rval
}

vcov.svyratio <- function(object, ...){
    covmat<-object$vcov
    if (is.null(covmat)){
        covmat<-matrix(NaN,length(object$var),length(object$var))
        diag(covmat)<-as.vector(object$var)
    }
    nms<-as.vector(outer(rownames(object$ratio),colnames(object$ratio),paste,sep="/"))
    dimnames(covmat)<-list(nms,nms)
    covmat
}

residuals.svrepglm<-function(object,type = c("deviance", "pearson", "working", 
    "response", "partial"),...){
	type<-match.arg(type)
	if (type=="pearson"){
   	   y <- object$y
	   mu <- object$fitted.values
    	   wts <- object$prior.weights
	   r<-(y - mu) * sqrt(wts)/(sqrt(object$family$variance(mu))*sqrt(object$survey.design$pweights/mean(object$survey.design$pweights)))
	   if (is.null(object$na.action)) 
        	r
    	   else 
	        naresid(object$na.action, r)
	} else 
		NextMethod()

}

logLik.svrepglm<-function(object,...){
   stop("svrepglm not fitted by maximum likelihood.")
}

vcov.svyrep.design<-function (object, replicates, centre, ...) 
{
    if (object$mse) 
        svrVar(replicates, scale = object$scale, rscales = object$rscales, 
            mse = object$mse, coef=centre)
    else svrVar(replicates, scale = object$scale, rscales = object$rscales, 
        mse = object$mse)
}



withReplicates<-function(design, theta,  ..., return.replicates=FALSE){
  UseMethod("withReplicates",design)
}

withReplicates.svrepvar<-function(design, theta, ...,return.replicates=FALSE){
  if (is.null(reps<-design$replicates)) stop("object does not contain replicate estimates")

  p<-sqrt(NCOL(reps))
    if (is.function(theta)){
        full<-theta(design[[1]],...)
        thetas<-drop(t(apply(reps,1,
                             function(rr) theta(matrix(rr,p,p), ...))))
    } else{
        full<-eval(theta, list(.replicate=design[[1]]))
        thetas<-drop(t(apply(reps,1,
                             function(rr) eval(theta, list(.replicate=matrix(rr,p,p))))))
    }

  v<-svrVar(thetas, attr(reps,"scale"), attr(reps,"rscales"), mse=attr(reps,"mse"), coef=full)
  
  attr(full,"var")<-v
  attr(full,"statistic")<-"theta"
  
  if (return.replicates){
    attr(thetas,"scale")<-attr(reps,"scale")
    attr(thetas,"rscales")<-attr(reps,"rscales")
    attr(thetas,"mse")<-attr(reps,"mse")
    rval<-list(theta=full, replicates=thetas)
  }  else {
    rval<-full
  }
  class(rval)<-"svrepstat"
  rval
  
  
}

withReplicates.svrepstat<-function(design, theta, ..., return.replicates=FALSE){
  if (is.null(reps<-design$replicates)) stop("object does not contain replicate estimates")

  reps<-as.matrix(reps)
  
  if (is.function(theta)){
        full<-theta(design[[1]],...)
        thetas<-drop(t(apply(reps,1,theta, ...)))
      } else{
        full<-eval(theta, list(.replicate=design[[1]]))
        thetas<-drop(t(apply(reps,1,
                             function(rr) eval(theta, list(.replicate=rr)))))
      }
  
  v<-svrVar(thetas, attr(reps,"scale"), attr(reps,"rscales"), mse=attr(reps,"mse"), coef=full)
  
  attr(full,"var")<-v
  attr(full,"statistic")<-"theta"
  
  if (return.replicates){
    attr(thetas,"scale")<-attr(reps,"scale")
    attr(thetas,"rscales")<-attr(reps,"rscales")
    attr(thetas,"mse")<-attr(reps,"mse")
    rval<-list(theta=full, replicates=thetas)
  }  else {
    rval<-full
  }
  class(rval)<-"svrepstat"
  rval 
}


withReplicates.svyrep.design<-function(design, theta, rho=NULL,...,
                         scale.weights=FALSE,
                         return.replicates=FALSE){
    wts<-design$repweights
    scale<-design$scale
    rscales<-design$rscales
    if (!is.null(rho)) .NotYetUsed("rho")
    
    if (scale.weights)
      pwts<-design$pweights/sum(design$pweights)
    else
      pwts<-design$pweights
    
  if (inherits(wts,"repweights_compressed")){
      if (scale.weights)
          wts$weights<-sweep(wts$weights,2,drop(colSums(wts$weights)),"/")
  } else {
      if (scale.weights)
          wts<-sweep(wts,2, drop(colSums(wts)),"/")
  }

    rpwts<-if (design$combined.weights) 1 else pwts
    data<-design$variables
    
    if (is.function(theta)){
        full<-theta(pwts,data,...)
        thetas<-drop(t(apply(wts,2,
                             function(ww) theta(as.vector(ww)*rpwts, data, ...))))
    } else{
        .weights<-pwts
        full<-with(data, eval(theta))
        thetas<-drop(t(apply(wts,2,
                             function(.weights) {.weights<-as.vector(.weights)*rpwts
                                                 with(data, eval(theta))})))
    }
      
  v<-svrVar(thetas, scale, rscales,mse=design$mse, coef=full)

    attr(full,"var")<-v
    attr(full,"statistic")<-"theta"

    if (return.replicates)
      rval<-list(theta=full, replicates=thetas)
    else
      rval<-full
    class(rval)<-"svrepstat"
    rval
  }

coef.svrepstat<-function(object,...){
  if (is.list(object)) object<-object[[1]]
  attr(object,"statistic")<-NULL
  attr(object,"deff")<-NULL
  attr(object,"var")<-NULL
  unclass(object)
}

vcov.svrepstat<-function (object, ...) 
{
  nms <- names(coef(object))
  if (is.list(object)) 
    object <- object[[1]]
  v <- as.matrix(attr(object, "var"))
  
  if (length(object) == NCOL(v)) {
    dimnames(v) <- list(nms, nms)
    v
  }
  else if (length(object) == length(v)) {
    dnms <- dimnames(coef(object))
    vmat <- matrix(NA, nrow = length(object), ncol = length(object))
    diag(vmat) <- as.vector(v)
    nms <- as.vector(outer(dnms[[1]], dnms[[2]], paste, sep = ":"))
    dimnames(vmat) <- list(nms, nms)
    vmat
  }
}



as.data.frame.svrepstat<-function(x,...){
  if (is.list(x)) {
    x<-x[[1]]
    class(x)<-"svrepstat"
  }
  rval<-data.frame(statistic=coef(x),SE=SE(x))
  names(rval)[1]<-attr(x,"statistic")
  if (!is.null(attr(x,"deff")))
    rval<-cbind(rval,deff=deff(x))
  rval
}

SE<-function(object,...){
  UseMethod("SE")
}

SE.default<-function(object,...){
  sqrt(diag(vcov(object,...)))
}

SE.svrepstat<-function(object,...){
  if (is.list(object)){
    object<-object[[1]]
  }
  vv<-as.matrix(attr(object,"var"))
  if (!is.null(dim(object)) && length(object)==length(vv))
    sqrt(vv)
  else
    sqrt(diag(vv))
}

print.svrepstat<-function(x,...){
  if (is.list(x)){
    x<-x[[1]]
  }
  vv<-attr(x,"var")
  deff<-attr(x, "deff")
  if (!is.null(dim(x)) && length(x)==length(vv)){
    cat("Statistic:\n")
    prmatrix(x)
    cat("SE:\n")
    print(sqrt(vv))
    if (!is.null(deff)){
      cat("Design Effect:\n")
      printCoefmat()
    }
  } else if(length(x)==NCOL(vv)){
    m<-cbind(x,sqrt(diag(as.matrix(vv))))
    if (is.null(deff))
      colnames(m)<-c(attr(x,"statistic"),"SE")
    else {
      m<-cbind(m,deff(x))
      colnames(m)<-c(attr(x,"statistic"),"SE","DEff")
    }
    printCoefmat(m)
  } else if (length(x)==length(vv)){
      m<-cbind(x,sqrt((vv)))
      if (is.null(deff))
          colnames(m)<-c(attr(x,"statistic"),"SE")
      else {
          m<-cbind(m,deff(x))
          colnames(m)<-c(attr(x,"statistic"),"SE","DEff")
      }
      printCoefmat(m)
  } else {
      stop("incorrect structure of svrepstat object")
  }

  naa<-attr(vv,"na.replicates")
  if (!is.null(naa))
    cat("Note: NA results discarded for",length(naa),"replicates (",naa,")\n")
}

summary.svrepglm<-function (object, correlation = FALSE, df.resid=NULL,...) 
{
    Qr <- object$qr
    est.disp <- TRUE
    if (is.null(df.resid))
      df.r <- object$df.residual
    else
      df.r<-df.resid
    presid<-resid(object,"pearson")
    dispersion<- sum(  object$survey.design$pweights*presid^2,na.rm=TRUE)/sum(object$survey.design$pweights)
    coef.p <- coef(object)
    covmat<-vcov(object)
    dimnames(covmat) <- list(names(coef.p), names(coef.p))
    var.cf <- diag(covmat)
    s.err <- sqrt(var.cf)
    tvalue <- coef.p/s.err
    dn <- c("Estimate", "Std. Error")
    if (!est.disp) {
        pvalue <- 2 * pnorm(-abs(tvalue))
        coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
        dimnames(coef.table) <- list(names(coef.p), c(dn, "z value", 
            "Pr(>|z|)"))
    }
    else if (df.r > 0) {
        pvalue <- 2 * pt(-abs(tvalue), df.r)
        coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
        dimnames(coef.table) <- list(names(coef.p), c(dn, "t value", 
            "Pr(>|t|)"))
    }
    else {
        coef.table <- cbind(coef.p, Inf)
        dimnames(coef.table) <- list(names(coef.p), dn)
    }
    ans <- c(object[c("call", "terms", "family", "deviance", 
        "aic", "contrasts", "df.residual", "null.deviance", "df.null", 
        "iter")], list(deviance.resid = residuals(object, type = "deviance"), 
        aic = object$aic, coefficients = coef.table, dispersion = dispersion, 
        df = c(object$rank, df.r,NCOL(Qr$qr)), cov.unscaled = covmat, 
        cov.scaled = covmat))
    if (correlation) {
        dd <- sqrt(diag(covmat))
        ans$correlation <- covmat/outer(dd, dd)
    }

    ans$aliased<-is.na(ans$coef)
    ans$survey.design<-list(call=object$survey.design$call,
                            type=object$survey.design$type)
    class(ans) <- c("summary.svyglm","summary.glm")
    return(ans)
}


svytable.svyrep.design<-svreptable<-function(formula, design,
                                             Ntotal=sum(weights(design, "sampling")),
                                             round=FALSE,...){

  if (!exists(".Generic",inherits=FALSE))
    .Deprecated("svytable")
  
   weights<-design$pweights
   if (is.data.frame(weights)) weights<-weights[[1]]
   ## unstratified or unadjusted.
   if (is.null(Ntotal) || length(Ntotal)==1){
       ff<-eval(substitute(lhs~rhs,list(lhs=quote(weights), rhs=formula[[2]])))
       tbl<-xtabs(ff, data=design$variables,...)
       if (!is.null(Ntotal)) {
           tbl<-tbl*(sum(as.numeric(Ntotal))/sum(tbl))  ## avoid integer overflow when Ntotal >sqrt(maxint)
       }
       if (round)
           tbl<-round(tbl)
       class(tbl) <- c("svytable", class(tbl))
       attr(tbl, "call")<-match.call()
       return(tbl)
   }
   ## adjusted and stratified
   ff<-eval(substitute(lhs~strata+rhs,list(lhs=quote(weights),
                                           rhs=formula[[2]],
                                           strata=quote(design$strata))))
   tbl<-xtabs(ff, data=design$variables,...)
   ss<-match(sort(unique(design$strata)), Ntotal[,1])
   dm<-dim(tbl)
   layer<-prod(dm[-1])
   tbl<-sweep(tbl,1,Ntotal[ss, 2]/apply(tbl,1,sum),"*")
   tbl<-apply(tbl, 2:length(dm), sum)
   if (round)
       tbl<-round(tbl)
   class(tbl)<-c("svytable", "xtabs","table")
   attr(tbl, "call")<-match.call()

   tbl
}


postStratify<-function(design,strata, population, partial=FALSE,...){
  UseMethod("postStratify")
}



postStratify.svyrep.design<-function(design, strata, population,
                                     partial=FALSE,compress=NULL,...){

  if(inherits(strata,"formula")){
    mf<-substitute(model.frame(strata, data=design$variables,na.action=na.fail))
    strata<-eval.parent(mf)
  }
  strata<-as.data.frame(strata)
  if (is.null(compress))
    compress<-inherits(design$repweights, "repweights_compressed")

  sampletable<-xtabs(design$pweights~.,data=strata)
  sampletable<-as.data.frame(sampletable)

  if (inherits(population,"table"))
    population<-as.data.frame(population)
  else if (is.data.frame(population))
    population$Freq <- as.vector(population$Freq)
  else
    stop("population must be a table or dataframe")

  if (!all(names(strata) %in% names(population)))
    stop("Stratifying variables don't match")
  nn<- names(population) %in% names(strata)
  if (sum(!nn)!=1)
    stop("stratifying variables don't match")

  names(population)[which(!nn)]<-"Pop.Freq"
  
  both<-merge(sampletable, population, by=names(strata), all=TRUE)

  samplezero <- both$Freq %in% c(0, NA)
  popzero <- both$Pop.Freq %in% c(0, NA)
  both<-both[!(samplezero & popzero),]
  
  if (any(onlysample<- popzero & !samplezero)){
    print(both[onlysample,])
    stop("Strata in sample absent from population. This Can't Happen")
  }
  if (any(onlypop <- samplezero & !popzero)){
    if (partial){
      both<-both[!onlypop,]
      warning("Some strata absent from sample: ignored")
    } else {
      print(both[onlypop,])
      stop("Some strata absent from sample: use partial=TRUE to ignore them.")
    }
  } 

  reweight<-both$Pop.Freq/both$Freq
  both$label <- do.call("interaction", list(both[,names(strata)]))
  designlabel <- do.call("interaction", strata)
  index<-match(designlabel, both$label)

  oldpw<-design$pweights
  design$pweights<-design$pweights*reweight[index]

  
  if (design$combined.weights){
    replicateFreq<- rowsum(as.matrix(design$repweights),
                           match(designlabel, both$label),
                           reorder=TRUE)
    repreweight<-  both$Pop.Freq/replicateFreq
    design$repweights <- as.matrix(design$repweights)*repreweight[index,]
  } else { 
    replicateFreq<- rowsum(as.matrix(design$repweights)*oldpw,
                           match(designlabel, both$label),
                           reorder=TRUE)
    repreweight<- both$Pop.Freq/replicateFreq
    design$repweights <- as.matrix(design$repweights)* (repreweight/reweight)[index,]
  }

  if (compress) design$repweights<-compressWeights(design$repweights)
  
  design$call<-sys.call(-1)
  if(!is.null(design$degf)){
    design$degf<-NULL
    design$degf<-degf(design)
  }
  design
}


rake<-function(design, sample.margins, population.margins,
               control=list(maxit=10, epsilon=1, verbose=FALSE),
                 compress=NULL){

    if (!missing(control)){
        control.defaults<-formals(rake)$control
        for(n in names(control.defaults))
            if(!(n %in% names(control)))
                control[[n]]<-control.defaults[[n]]
    }

    is.rep<-inherits(design,"svyrep.design")

    if (is.rep && is.null(compress))
      compress<-inherits(design$repweights,"repweights_compressed")

    if (is.rep) design$degf<-NULL
    
    if (length(sample.margins)!=length(population.margins))
        stop("sample.margins and population.margins do not match.")

    nmar<-length(sample.margins)
    
    if (control$epsilon<1) 
        epsilon<-control$epsilon*sum(weights(design,"sampling"))
    else
        epsilon<-control$epsilon

    
    
    strata<-lapply(sample.margins, function(margin)
                   if(inherits(margin,"formula")){
                     mf<-model.frame(margin, data=design$variables,na.action=na.fail)
                   }
                   )
    

    allterms<-unlist(lapply(sample.margins,all.vars))
    ff<-formula(paste("~", paste(allterms,collapse="+"),sep=""))
    oldtable<-svytable(ff, design)
    if (control$verbose)
        print(oldtable)

    oldpoststrata<-design$postStrata
    iter<-0
    converged<-FALSE
    while(iter < control$maxit){
        ## we don't want to accumulate more poststrata with each iteration
        design$postStrata<-NULL
        
        for(i in 1:nmar){
            design<-postStratify(design, strata[[i]],
                                 population.margins[[i]],
                                 compress=FALSE)
        }
        newtable<-svytable(ff, design)
        if (control$verbose)
            print(newtable)

        delta<-max(abs(oldtable-newtable))
        if (delta<epsilon){
            converged<-TRUE
            break
        }
        oldtable<-newtable
        iter<-iter+1
    }

    ## changed in 3.6-3 to allow the projections to be iterated
    ## in svyrecvar
    rakestrata<-design$postStrata
    if(!is.null(rakestrata)){
      class(rakestrata)<-"raking"
      design$postStrata<-c(oldpoststrata, list(rakestrata))
    }
    
    design$call<-sys.call()

    if (is.rep && compress)
      design$repweights<-compressWeights(design$repweights)
    if(is.rep)
      design$degf<-degf(design)
    
    if(!converged)
        warning("Raking did not converge after ", iter, " iterations.\n")

    return(design)
        
}




## degrees of freedom for repweights design
degf<-function(design,...) UseMethod("degf")

degf.svyrep.design<-function(design,tol=1e-5,...){
  if (!inherits(design,"svyrep.design"))
    stop("Not a survey design with replicate weights")
  rval<-design$degf ##cached version
  if(is.null(rval))
    rval<-qr(weights(design,"analysis"), tol=1e-5)$rank-1
  rval
}

degf.survey.design2<-function(design,...){
  inset<- weights(design,"sampling")!=0
  length(unique(design$cluster[inset, 1])) - length(unique(design$strata[inset, 1]))
}

degf.twophase<-function(design,...){
  degf(design$phase2)
}

dim.svyrep.design<-function(x) dim(x$variables)
bschneidr/fastsurvey documentation built on March 13, 2024, 11:12 a.m.