R/survey.R

Defines functions svytable.survey.design svytable cv.svyratio predict.svyratio_separate predict.svyratio print.svyratio print.svyratio_separate svyratio.survey.design svyratio SE.svyratio coef.svyratio as.matrix.svyvar svyvar.survey.design svyvar svytotal.survey.design svytotal cv.default cv deff.default deff SE.svystat influence.svystat vcov.svystat coef.svystat as.data.frame.svystat print.svystat svymean.survey.design svymean svyCprod postStratify.survey.design print.summary.survey.design summary.survey.design subset.survey.design update.survey.design na.exclude.survey.design na.omit.survey.design na.fail.survey.design dim.survey.design print.survey.design oldsvydesign dimnames.twophase dimnames.svyrep.design dimnames.survey.design make.formula

Documented in coef.svyratio coef.svystat cv cv.default cv.svyratio deff deff.default dimnames.survey.design dimnames.svyrep.design dimnames.twophase dim.survey.design make.formula na.exclude.survey.design na.fail.survey.design na.omit.survey.design postStratify.survey.design predict.svyratio predict.svyratio_separate print.svyratio print.svyratio_separate SE.svyratio subset.survey.design svyCprod svymean svymean.survey.design svyratio svyratio.survey.design svytable svytable.survey.design svytotal svytotal.survey.design svyvar svyvar.survey.design update.survey.design vcov.svystat

make.formula<-function(names) formula(paste("~",paste(names,collapse="+")))

dimnames.survey.design<-function(x) dimnames(x$variables)
dimnames.svyrep.design<-function(x) dimnames(x$variables)
dimnames.twophase<-function(x) dimnames(x$phase1$sample$variables)

oldsvydesign<-function(ids,probs=NULL,strata=NULL,variables=NULL, fpc=NULL,
                    data=NULL, nest=FALSE, check.strata=!nest,weights=NULL){
 
    .Deprecated("svydesign")
  
    ## less memory-hungry version for sparse tables
    interaction<-function (..., drop = TRUE) {
        args <- list(...)
        narg <- length(args)
        if (narg == 1 && is.list(args[[1]])) {
            args <- args[[1]]
            narg <- length(args)
        }
        
        ls<-sapply(args,function(a) length(levels(a)))
        ans<-do.call("paste",c(lapply(args,as.character),sep="."))
        ans<-factor(ans)
        return(ans)
        
    }


    na.failsafe<-function(object,...){
      if (NCOL(object)==0)
        object
      else na.fail(object)
    }
    
     if(inherits(ids,"formula")) {
	 mf<-substitute(model.frame(ids,data=data,na.action=na.failsafe))   
	 ids<-eval.parent(mf)
	} else if (!is.null(ids))
            ids<-na.fail(data.frame(ids))

     if(inherits(probs,"formula")){
	mf<-substitute(model.frame(probs,data=data,na.action=na.failsafe))
	probs<-eval.parent(mf)
	}
     
     if(inherits(weights,"formula")){
       mf<-substitute(model.frame(weights,data=data,na.action=na.failsafe))
       weights<-eval.parent(mf)
     } else if (!is.null(weights))
         weights<-na.fail(data.frame(weights))
     
     if(!is.null(weights)){
       if (!is.null(probs))
         stop("Can't specify both sampling weights and probabilities")
       else
         probs<-1/weights
     }

      

    if (!is.null(strata)){
      if(inherits(strata,"formula")){
        mf<-substitute(model.frame(strata,data=data, na.action=na.failsafe))
        strata<-eval.parent(mf)
      }
      if(is.list(strata))
        strata<-na.fail(do.call("interaction", strata))
      if (!is.factor(strata))
        strata<-factor(strata)
      has.strata<-TRUE
    } else {
      strata<-factor(rep(1,NROW(ids)))
      has.strata <-FALSE
    }
    
    if (inherits(variables,"formula")){
        mf<-substitute(model.frame(variables,data=data,na.action=na.pass))
        variables <- eval.parent(mf)
    } else if (is.null(variables)){
        variables<-data
    } else
        variables<-data.frame(variables)

    
     if (inherits(fpc,"formula")){
       mf<-substitute(model.frame(fpc,data=data,na.action=na.failsafe))
       fpc<-eval.parent(mf)
       if (length(fpc))
         fpc<-fpc[,1]
     }
     
    if (is.null(ids) || NCOL(ids)==0)
	ids<-data.frame(.id=seq(length=NROW(variables)))

     ## force subclusters nested in clusters
     if (nest && NCOL(ids)>1){
      N<-ncol(ids)
      for(i in 2:(N)){
          ids[,i]<-do.call("interaction", ids[,1:i,drop=TRUE])
      }
    }
     ## force clusters nested in strata
     if (nest && has.strata && NCOL(ids)){
       N<-NCOL(ids)
       for(i in 1:N)
         ids[,i]<-do.call("interaction", list(strata, ids[,i]))
     }

    ## check if clusters nested in strata 
     if (check.strata && nest)
      warning("No point in check.strata=TRUE if nest=TRUE")
    if(check.strata && !is.null(strata) && NCOL(ids)){
       sc<-rowSums(table(ids[,1],strata)>0)
       if(any(sc>1)) stop("Clusters not nested in strata")
    }

    ## Put degrees of freedom (# of PSUs in each stratum) in object, to 
    ## allow subpopulations
    if (NCOL(ids)){
        nPSU<-table(strata[!duplicated(ids[,1])])
    }


    if (!is.null(fpc)){

       if (NCOL(ids)>1){
         if (all(fpc<1))
           warning("FPC is not currently supported for multi-stage sampling")
         else
           stop("Can't compute FPC from population size for multi-stage sampling")
       }
       
       ## Finite population correction: specified per observation
       if (is.numeric(fpc) && length(fpc)==NROW(variables)){
         tbl<-by(fpc,list(strata),unique)
         if (any(sapply(tbl,length)!=1))
           stop("fpc not constant within strata")
         fpc<-data.frame(strata=factor(rownames(tbl),levels=levels(strata)),
                         N=as.vector(tbl))
       }
       ## Now reduced to fpc per stratum
       nstr<-table(strata[!duplicated(ids[[1]])])
       
       if (all(fpc[,2]<=1)){
         fpc[,2]<- nstr[match(as.character(fpc[,1]), names(nstr))]/fpc[,2]
       } else if (any(fpc[,2]<nstr[match(as.character(fpc[,1]), names(nstr))]))
         stop("Over 100% sampling in some strata")
       
     }

    ## if FPC specified, but no weights, use it for weights
    if (is.null(probs) && is.null(weights) && !is.null(fpc)){
      pstr<-nstr[match(as.character(fpc[,1]), names(nstr))]/fpc[,2]
      probs<-pstr[match(as.character(strata),as.character(fpc[,1]))]
      probs<-as.vector(probs)
    }


    certainty<-rep(FALSE,length(unique(strata)))
    names(certainty)<-as.character(unique(strata))
    if (any(nPSU==1)){
      ## lonely PSUs: are they certainty PSUs?
      if (!is.null(fpc)){
        certainty<- fpc$N < 1.01
        names(certainty)<-as.character(fpc$strata)
      } else if (all(as.vector(probs)<=1)){
        certainty<- !is.na(match(as.character(unique(strata)),as.character(strata)[probs > 0.99]))
        names(certainty)<-as.character(unique(strata))
      } else {
        warning("Some strata have only one PSU and I can't tell if they are certainty PSUs")
      }
      
    } 
    
    if (is.numeric(probs) && length(probs)==1)
        probs<-rep(probs, NROW(variables))
    
    if (length(probs)==0) probs<-rep(1,NROW(variables))
    
    if (NCOL(probs)==1) probs<-data.frame(probs)

    rval<-list(cluster=ids)
    rval$strata<-strata
    rval$has.strata<-has.strata
    rval$prob<- apply(probs,1,prod) 
    rval$allprob<-probs
    rval$call<-match.call()
    rval$variables<-variables
    rval$fpc<-fpc
    rval$certainty<-certainty
    rval$call<-sys.call()
    rval$nPSU<-nPSU
    class(rval)<-"survey.design"
    rval
  }

print.survey.design<-function(x,varnames=FALSE,design.summaries=FALSE,...){
  .svycheck(x)
  n<-NROW(x$cluster)
  if (x$has.strata) cat("Stratified ")
  un<-length(unique(x$cluster[,1]))
  if(n==un){
    cat("Independent Sampling design\n")
    is.independent<-TRUE
  } else {
    cat(NCOL(x$cluster),"- level Cluster Sampling design\n")
    nn<-lapply(x$cluster,function(i) length(unique(i)))
    cat(paste("With (",paste(unlist(nn),collapse=", "),") clusters.\n",sep=""))
    is.independent<-FALSE
  }
  print(x$call)
  if (design.summaries){
    cat("Probabilities:\n")
    print(summary(x$prob))
    if(x$has.strata){
      cat("Stratum sizes: \n")
      a<-rbind(obs=table(x$strata),
	       design.PSU=x$nPSU,
               actual.PSU=if(!is.independent || !is.null(x$fpc))
               table(x$strata[!duplicated(x$cluster[,1])]))
      print(a)
    }
    if (!is.null(x$fpc)){
      if (x$has.strata) {
        cat("Population stratum sizes (PSUs): \n")
        print(x$fpc)
      } else {
        cat("Population size (PSUs):",x$fpc[,2],"\n")
      }
    }
  }
  if (varnames){
    cat("Data variables:\n")
    print(names(x$variables))
  }
  invisible(x)
}

"[.survey.design"<-function (x,i, ...){
  
  if (!missing(i)){ 
    if (is.calibrated(x)){
      tmp<-x$prob[i,]
      x$prob<-rep(Inf, length(x$prob))
      x$prob[i,]<-tmp
    } else {
      x$variables<-"[.data.frame"(x$variables,i,...,drop=FALSE)
      x$cluster<-x$cluster[i,,drop=FALSE]
      x$prob<-x$prob[i]
      x$allprob<-x$allprob[i,,drop=FALSE]
      x$strata<-x$strata[i]
    }
  } else {
    x$variables<-x$variables[,...,drop=FALSE]
  }
  
  x
}

"[<-.survey.design"<-function(x, ...,value){
  if (inherits(value, "survey.design"))
    value<-value$variables
  x$variables[...]<-value
  x
}

dim.survey.design<-function(x){
	dim(x$variables)
}

na.fail.survey.design<-function(object,...){
	tmp<-na.fail(object$variables,...)
	object
}

na.omit.survey.design<-function(object,...){
  tmp<-na.omit(object$variables,...)
  omit<-attr(tmp,"na.action")
  if (length(omit)){
    object<-object[-omit,]
    object$variables<-tmp
    attr(object,"na.action")<-omit
  }
  object
}

na.exclude.survey.design<-function(object,...){
	tmp<-na.exclude(object$variables,...)
	exclude<-attr(tmp,"na.action")
	if (length(exclude)){
           object<-object[-exclude,]
	   object$variables<-tmp
	   attr(object,"na.action")<-exclude
	}
	object
}


update.survey.design<-function(object,...){

  dots<-substitute(list(...))[-1]
  newnames<-names(dots)
  
  for(j in seq(along=dots)){
    object$variables[,newnames[j]]<-eval(dots[[j]],object$variables, parent.frame())
  }
  
  object$call<-sys.call(-1)
  object 
}


subset.survey.design<-function(x,subset,...){
        e <- substitute(subset)
        r <- eval(e, x$variables, parent.frame())
        r <- r & !is.na(r) 
        x<-x[r,]
	x$call<-sys.call(-1)
	x
}

summary.survey.design<-function(object,...){
  class(object)<-"summary.survey.design"
  object
}

print.summary.survey.design<-function(x,...){
  y<-x
  class(y)<-"survey.design"
  print(y,varnames=TRUE,design.summaries=TRUE,...)
}	
     
postStratify.survey.design<-function(design, strata, population, partial=FALSE,...){

  if(inherits(strata,"formula")){
    mf<-substitute(model.frame(strata, data=design$variables,na.action=na.fail))
    strata<-eval.parent(mf)
  }
  strata<-as.data.frame(strata)

  sampletable<-xtabs(I(1/design$prob)~.,data=strata)
  sampletable<-as.data.frame(sampletable)

  if (inherits(population,"table"))
    population<-as.data.frame(population)
  else if (is.data.frame(population))
    population$Freq <- as.vector(population$Freq) ##allows Freq computed by tapply()
  else
    stop("population must be a table or dataframe")

  if (!all(names(strata) %in% names(population)))
    stop("Stratifying variables don't match")
  nn<- names(population) %in% names(strata)
  if (sum(!nn)!=1)
    stop("stratifying variables don't match")

  names(population)[which(!nn)]<-"Pop.Freq"
  
  both<-merge(sampletable, population, by=names(strata), all=TRUE)

  samplezero <- both$Freq %in% c(0, NA)
  popzero <- both$Pop.Freq %in% c(0, NA)
  both<-both[!(samplezero & popzero),]
  
  if (any(onlysample<- popzero & !samplezero)){
    print(both[onlysample,])
    stop("Strata in sample absent from population. This Can't Happen")
  }
  if (any(onlypop <- samplezero & !popzero)){
    if (partial){
      both<-both[!onlypop,]
      warning("Some strata absent from sample: ignored")
    } else {
      print(both[onlypop,])
      stop("Some strata absent from sample: use partial=TRUE to ignore them.")
    }
  } 

  reweight<-both$Pop.Freq/both$Freq
  both$label <- do.call("interaction", list(both[,names(strata)]))
  designlabel <- do.call("interaction", strata)
  index<-match(designlabel, both$label)

  attr(index,"oldweights")<-1/design$prob
  design$prob<-design$prob/reweight[index]
  attr(index,"weights")<-1/design$prob
  design$postStrata<-c(design$postStrata,list(index))
  
  ## Do we need to iterate here a la raking to get design strata
  ## and post-strata both balanced?
  design$call<-sys.call(-1)
  
  design
}


svyCprod<-function(x, strata, psu, fpc, nPSU, certainty=NULL, postStrata=NULL,
                   lonely.psu=getOption("survey.lonely.psu")){

  x<-as.matrix(x)
  n<-NROW(x)

  ## Remove post-stratum means, which may cut across PSUs
  if(!is.null(postStrata)){
    for (psvar in postStrata){
      if (inherits(psvar, "greg_calibration") || inherits(psvar, "raking"))
        stop("rake() and calibrate() not supported for old-style design objects")
      psw<-attr(psvar,"weights")
      psmeans<-rowsum(x/psw,psvar,reorder=TRUE)/as.vector(table(factor(psvar)))
      x<- x-psmeans[match(psvar,sort(unique(psvar))),]*psw
    }
  }

  ##First collapse over PSUs

  if (is.null(strata)){
    strata<-rep("1",n)
    if (!is.null(nPSU))
        names(nPSU)<-"1"
  }
  else
    strata<-as.character(strata) ##can't use factors as indices in for()'

  if (is.null(certainty)){
    certainty<-rep(FALSE,length(strata))
    names(certainty)<-strata
  }
  
  if (!is.null(psu)){
    x<-rowsum(x, psu, reorder=FALSE)
    strata<-strata[!duplicated(psu)]
    n<-NROW(x)
  }
  
  if (!is.null(nPSU)){
      obsn<-table(strata)
      dropped<-nPSU[match(names(obsn),names(nPSU))]-obsn
      if(sum(dropped)){
        xtra<-matrix(0,ncol=NCOL(x),nrow=sum(dropped))
        strata<-c(strata,rep(names(dropped),dropped))
      	if(is.matrix(x))
	   x<-rbind(x,xtra)
        else
	   x<-c(x,xtra)
        n<-NROW(x)
      }
  } else obsn<-table(strata)

  if(is.null(strata)){
      x<-t(t(x)-colMeans(x))
  } else {
      strata.means<-drop(rowsum(x,strata, reorder=FALSE))/drop(rowsum(rep(1,n),strata, reorder=FALSE))
      if (!is.matrix(strata.means))
          strata.means<-matrix(strata.means, ncol=NCOL(x))
      x<- x- strata.means[ match(strata, unique(strata)),,drop=FALSE]
  }
  
  p<-NCOL(x)
  v<-matrix(0,p,p)
  
  ss<-unique(strata)
  for(s in ss){
      this.stratum <- strata %in% s
      
      ## original number of PSUs in this stratum 
      ## before missing data/subsetting
      this.n <-nPSU[match(s,names(nPSU))]
      
      this.df <- this.n/(this.n-1)	
      
      if (is.null(fpc))
          this.fpc <- 1
      else{
          this.fpc <- fpc[,2][ fpc[,1]==as.character(s)]
          this.fpc <- (this.fpc - this.n)/this.fpc
      }
      
      xs<-x[this.stratum,,drop=FALSE]

      this.certain<-certainty[names(certainty) %in% s]
      
      ## stratum with only 1 design cluster leads to undefined variance
      lonely.psu<-match.arg(lonely.psu, c("remove","adjust","fail",
                                          "certainty","average"))
      if (this.n==1 && !this.certain){
        this.df<-1
        if (lonely.psu=="fail")
          stop("Stratum ",s, " has only one sampling unit.")
        else if (lonely.psu!="certainty")
          warning("Stratum ",s, " has only one sampling unit.")
        if (lonely.psu=="adjust")
          xs<-strata.means[match(s,ss),,drop=FALSE]
      } else if (obsn[match(s,names(obsn))]==1 && !this.certain){
        ## stratum with only 1 cluster left after subsetting 
        warning("Stratum ",s," has only one PSU in this subset.")
        if (lonely.psu=="adjust")
          xs<-strata.means[match(s,ss),,drop=FALSE]
      }
      ## add it up
      if (!this.certain)
        v<-v+crossprod(xs)*this.df*this.fpc
    }
  if (lonely.psu=="average"){
    v<- v/(1-mean(obsn==1 & !certainty))
  }
  v
}



svymean<-function(x, design,na.rm=FALSE,...){
  .svycheck(design)
  UseMethod("svymean",design)
}

svymean.survey.design<-function(x,design, na.rm=FALSE,deff=FALSE, influence=FALSE,...){

  if (!inherits(design,"survey.design"))
    stop("design is not a survey design")
  
  if (inherits(x,"formula")){
    ## do the right thing with factors
    mf<-model.frame(x,design$variables,na.action=na.pass)
    xx<-lapply(attr(terms(x),"variables")[-1],
               function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
    cols<-sapply(xx,NCOL)
    x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
    scols<-c(0,cumsum(cols))
    for(i in 1:length(xx)){
      x[,scols[i]+1:cols[i]]<-xx[[i]]
    }
    colnames(x)<-do.call("c",lapply(xx,colnames))
  }
  else if(typeof(x) %in% c("expression","symbol"))
    x<-eval(x, design$variables)
  
  x<-as.matrix(x)
  
  if (na.rm){
    nas<-rowSums(is.na(x))
            design<-design[nas==0,]
    x<-x[nas==0,,drop=FALSE]
  }
  
  pweights<-1/design$prob
  psum<-sum(pweights)
  average<-colSums(x*pweights/psum)
  x<-sweep(x,2,average)
  v<-svyCprod(x*pweights/psum,design$strata,design$cluster[[1]], design$fpc,
              design$nPSU,design$certainty, design$postStrata)
  attr(average,"var")<-v
    attr(average,"statistic")<-"mean"
  if(influence)
    attr(average, "influence")<-x*pweights/psum
  class(average)<-"svystat"
  if (is.character(deff) || deff){
    nobs<-NROW(design$cluster)
    vsrs<-svyvar(x,design,na.rm=na.rm)/nobs
    vsrs<-vsrs*(psum-nobs)/psum
    attr(average, "deff")<-v/vsrs
  }
  
  return(average)
}


print.svystat<-function(x,...){
    vv<-attr(x,"var")
    if (is.matrix(vv))
        m<-cbind(x,sqrt(diag(vv)))
    else
        m<-cbind(x,sqrt(vv))
    hasdeff<-!is.null(attr(x,"deff"))
    if (hasdeff) {
        m<-cbind(m,deff(x))
        colnames(m)<-c(attr(x,"statistic"),"SE","DEff")
    } else {
        colnames(m)<-c(attr(x,"statistic"),"SE")
    }
    printCoefmat(m)
}

as.data.frame.svystat<-function(x,...){
  rval<-data.frame(statistic=coef(x),SE=SE(x))
  names(rval)[1]<-attr(x,"statistic")
  if (!is.null(attr(x,"deff")))
    rval<-cbind(rval,deff=deff(x))
  rval
}

coef.svystat<-function(object,...){
  attr(object,"statistic")<-NULL
  attr(object,"deff")<-NULL
  attr(object,"var")<-NULL
  unclass(object)
}

vcov.svystat<-function(object,...){
  as.matrix(attr(object,"var"))
}

influence.svystat<-function(model,...){
  attr(model,"influence")
}

SE.svystat<-function(object,...){
 v<-vcov(object)
 if (!is.matrix(v) || NCOL(v)==1) sqrt(v) else sqrt(diag(v))
}

deff <- function(object,quietly=FALSE,...) UseMethod("deff")

deff.default <- function(object, quietly=FALSE,...){
  rval<-attr(object,"deff")
  if (is.null(rval)) { 
    if(!quietly)
      warning("object has no design effect information")
  } else rval<-diag(as.matrix(rval))
  rval
}

cv<-function(object,...) UseMethod("cv")

cv.default<-function(object, warn=TRUE, ...){
  rval<-SE(object)/coef(object)
  if (warn && any(coef(object)<0,na.rm=TRUE)) warning("CV may not be useful for negative statistics")
  rval
}


svytotal<-function(x,design,na.rm=FALSE,...){
  .svycheck(design)
  UseMethod("svytotal",design)
}
svytotal.survey.design<-function(x,design, na.rm=FALSE, deff=FALSE,influence=FALSE,...){

  if (!inherits(design,"survey.design"))
    stop("design is not a survey design")
  
    if (inherits(x,"formula")){
    ## do the right thing with factors
    mf<-model.frame(x,design$variables,na.action=na.pass)
    xx<-lapply(attr(terms(x),"variables")[-1],
               function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
    cols<-sapply(xx,NCOL)
    x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
    scols<-c(0,cumsum(cols))
    for(i in 1:length(xx)){
      x[,scols[i]+1:cols[i]]<-xx[[i]]
    }
    colnames(x)<-do.call("c",lapply(xx,colnames))
  } else if(typeof(x) %in% c("expression","symbol"))
      x<-eval(x, design$variables)

  x<-as.matrix(x)
  
  if (na.rm){
    nas<-rowSums(is.na(x))
    design<-design[nas==0,]
    x<-x[nas==0,,drop=FALSE]
  }

  N<-sum(1/design$prob)
  m <- svymean(x, design, na.rm=na.rm)
  total<-m*N
  attr(total, "var")<-v<-svyCprod(x/design$prob,design$strata,
                                  design$cluster[[1]], design$fpc,
                                  design$nPSU,design$certainty,design$postStrata)
    attr(total,"statistic")<-"total"
    if (influence){
        attr(total,"influence")<-x/design$prob
        }
  if (is.character(deff) || deff){
    vsrs<-svyvar(x,design)*sum(weights(design)^2)
    vsrs<-vsrs*(N-NROW(design$cluster))/N
    attr(total,"deff")<-v/vsrs
  }
  return(total)
}

svyvar<-function(x, design, na.rm=FALSE,...){
  .svycheck(design)
  UseMethod("svyvar",design)
}
svyvar.survey.design<-function(x, design, na.rm=FALSE,...){
    
	if (inherits(x,"formula"))
            x<-model.frame(x,model.frame(design),na.action=na.pass)
	else if(typeof(x) %in% c("expression","symbol"))
            x<-eval(x, design$variables)
        
	n<-sum((weights(design,"sampling")!=0) & (rowSums(is.na(as.matrix(x)))==0))
	xbar<-svymean(x,design, na.rm=na.rm)
    if(NCOL(x)==1) {
        if(n==1){
            v<-NA
            attr(v,"statistic")<-NA
            attr(v,"var")<-NA
            class(v)<-"svystat"
            return(v)
        }
            x<-x-xbar
            v<-svymean(x*x*n/(n-1),design, na.rm=na.rm)
            attr(v,"statistic")<-"variance"
            return(v)
	}
	x<-t(t(x)-xbar)
	p<-NCOL(x)
	a<-matrix(rep(x,p),ncol=p*p)
	b<-x[,rep(1:p,each=p)]
        ## Kish uses the n-1 divisor, so it affects design effects
	v<-svymean(a*b*n/(n-1),design, na.rm=na.rm)
	vv<-matrix(v,ncol=p)
        dimnames(vv)<-list(names(xbar),names(xbar))
        attr(vv,"var")<-attr(v,"var")
        attr(vv,"statistic")<-"variance"
        class(vv)<-c("svyvar","svystat")
        vv
    }

print.svyvar<-function (x,  covariance=FALSE, ...) 
{
    if(!is.matrix(x)) NextMethod()
    
    vv <- attr(x, "var")
    if (covariance){
      nms<-outer(rownames(x),colnames(x),paste,sep=":")
      m<-cbind(as.vector(x), sqrt(diag(vv)))
      rownames(m)<-nms
    } else{
      ii <- which(diag(sqrt(length(x)))>0)
      m <- cbind(x[ii], sqrt(diag(vv))[ii])
    }
    colnames(m) <- c(attr(x, "statistic"), "SE")
    printCoefmat(m)
}

as.matrix.svyvar<-function(x,...) unclass(x)

coef.svyratio<-function(object,...,drop=TRUE){
  if (!drop) return(object$ratio)
  cf<-as.vector(object$ratio)
  nms<-as.vector(outer(rownames(object$ratio),colnames(object$ratio),paste,sep="/"))
  names(cf)<-nms
  cf
}
 
SE.svyratio<-function(object,...,drop=TRUE){
  if(!drop) return(sqrt(object$var))
  se<-as.vector(sqrt(object$var))
  nms<-as.vector(outer(rownames(object$ratio),colnames(object$ratio),paste,sep="/"))
  names(se)<-nms
  se
}

svyratio<-function(numerator,denominator, design,...){
  .svycheck(design)
  UseMethod("svyratio",design)
}

svyratio.survey.design<-function(numerator, denominator, design,...){

    if (inherits(numerator,"formula"))
		numerator<-model.frame(numerator,design$variables)
    else if(typeof(numerator) %in% c("expression","symbol"))
        numerator<-eval(numerator, design$variables)
    if (inherits(denominator,"formula"))
		denominator<-model.frame(denominator,design$variables)
    else if(typeof(denominator) %in% c("expression","symbol"))
        denominator<-eval(denominator, design$variables)

    nn<-NCOL(numerator)
    nd<-NCOL(denominator)

    all<-cbind(numerator,denominator)
    allstats<-svytotal(all,design) 
    rval<-list(ratio=outer(allstats[1:nn],allstats[nn+1:nd],"/"))


    vars<-matrix(ncol=nd,nrow=nn)
    for(i in 1:nn){
      for(j in 1:nd){
        r<-(numerator[,i]-rval$ratio[i,j]*denominator[,j])/sum(denominator[,j]/design$prob)
        vars[i,j]<-svyCprod(r*1/design$prob, design$strata, design$cluster[[1]], design$fpc,
                            design$nPSU, design$certainty,design$postStrata)
      }
    }
    colnames(vars)<-names(denominator)
    rownames(vars)<-names(numerator)
    rval$var<-vars
    rval$call<-sys.call()
    class(rval)<-"svyratio"
    rval
    
  }

print.svyratio_separate<-function(x,...){
  cat("Stratified ratio estimate: ")
  if (!is.null(x$call))
    print(x$call)
  else if (!is.null(attr(x,"call")))
    print(attr(x$call))
  for(r in x$ratios) {
    print(r)
  }
  invisible(x)
}

print.svyratio<-function(x,...){
  cat("Ratio estimator: ")
  if (!is.null(x$call))
    print(x$call)
  else if(!is.null(attr(x,"call")))
    print(attr(x,"call"))
  cat("Ratios=\n")
  print(x$ratio)
  cat("SEs=\n")
  print(sqrt(x$var))
  invisible(NULL)
}

predict.svyratio<-function(object, total, se=TRUE,...){
  if (se)
    return(list(total=object$ratio*total,se=sqrt(object$var)*total))
  else
    return(object$ratio*total)
}

predict.svyratio_separate<-function(object, total, se=TRUE,...){

  if (length(total)!=length(object$ratios))
    stop("Number of strata differ in ratio object and totals.")
  if (!is.null(names(total)) && !is.null(levels(object$strata))){
    if (!setequal(names(total), levels(object$strata)))
      warning("Names of strata differ in ratio object and totals")
    else if (!all(names(total)==levels(object$strata))){
      warning("Reordering supplied totals to make their names match the ratio object")
      total<-total[match(names(total),levels(object$strata))]
    }
  }
  totals<-mapply(predict, object=object$ratios, total=total,se=se,...,SIMPLIFY=FALSE)

  if(se){
    rval<-totals[[1]]$total
    v<-totals[[1]]$se^2
    for(ti in totals[-1]) {
      rval<-rval+ti$total
      v<-v+ti$se^2
    }
    list(total=rval,se=sqrt(v))
  } else {
    rval<-totals[[1]]
    for (ti in totals[-1]) rval<-rval+ti
    rval
  }

}


cv.svyratio<-function(object,...){
  sqrt(object$var)/object$ratio
}

svytable<-function(formula, design, ...){
    UseMethod("svytable",design)
}

svytable.survey.design<-function(formula, design, Ntotal=NULL, round=FALSE,...){
  
  if (!inherits(design,"survey.design")) stop("design must be a survey design")
  weights<-1/design$prob
  
  ## unstratified or unadjusted
  if (length(Ntotal)<=1 || !design$has.strata){
    if (length(formula)==3)
      tblcall<-bquote(xtabs(I(weights*.(formula[[2]]))~.(formula[[3]]), data=model.frame(design),...))
       else
         tblcall<-bquote(xtabs(weights~.(formula[[2]]), data=model.frame(design),...))
    tbl<-eval(tblcall)
    if (!is.null(Ntotal)) {
      if(length(formula)==3)
        tbl<-tbl/sum(Ntotal)
      else
        tbl<-tbl*sum(Ntotal)/sum(tbl)
    }
    if (round)
      tbl<-round(tbl)
    attr(tbl,"call")<-match.call()
    class(tbl)<-c("svytable",class(tbl))
    return(tbl)
  }
  ## adjusted and stratified
  if (length(formula)==3)
    tblcall<-bquote(xtabs(I(weights*.(formula[[2]]))~design$strata[,1]+.(formula[[3]]), data=model.frame(design),...))
  else
    tblcall<-bquote(xtabs(weights~design$strata[,1]+.(formula[[2]]), data=model.frame(design),...))
  
  tbl<-eval(tblcall)
  
  ss<-match(sort(unique(design$strata[,1])), Ntotal[,1])
  dm<-dim(tbl)
  layer<-prod(dm[-1])
  tbl<-sweep(tbl,1,Ntotal[ss, 2]/apply(tbl,1,sum),"*")
  tbl<-apply(tbl, 2:length(dm), sum)
  if (round)
    tbl<-round(tbl)
  class(tbl)<-c("svytable","xtabs", "table")
  attr(tbl, "call")<-match.call()
  tbl
}

svycoxph<-function(formula,design,subset=NULL,rescale=TRUE,...){
  .svycheck(design)
  UseMethod("svycoxph",design)
}

svycoxph.survey.design<-function(formula,design, subset=NULL, rescale=TRUE, ...){
    subset<-substitute(subset)
    subset<-eval(subset, model.frame(design),parent.frame())
    if (!is.null(subset))
        design<-design[subset,]

    if(any(weights(design)<0)) stop("weights must be non-negative")
    
    data<-model.frame(design)
    
    g<-match.call()
    g$formula<-eval.parent(g$formula)
    g$design<-NULL
    g$var<-NULL
    g$rescale <- NULL
    if (is.null(g$weights))
      g$weights<-quote(.survey.prob.weights)
    else
      g$weights<-bquote(.survey.prob.weights*.(g$weights))
    g[[1]]<-quote(coxph)
    g$data<-quote(data)
    g$subset<-quote(.survey.prob.weights>0)
    g$model <- TRUE
    
    ##need to rescale weights for stability
    ## unless the user doesn't want to
    if (rescale)
        data$.survey.prob.weights<-(1/design$prob)/mean(1/design$prob)
    else
        data$.survey.prob.weights<-1/design$prob
    if (!all(all.vars(formula) %in% names(data))) 
        stop("all variables must be in design= argument")
    g<-with(list(data=data), eval(g))

    if (inherits(g, "coxph.penal"))
        warning("svycoxph does not support penalised terms")
    
    g$call<-match.call()
    g$call[[1]]<-as.name(.Generic)
    g$printcall<-sys.call(-1)
    g$printcall[[1]]<-as.name(.Generic)
    class(g)<-c("svycoxph", class(g))
    g$survey.design<-design
    
    nas<-g$na.action
    if (length(nas))
        design<-design[-nas,]

    ## if there are betas...
    if (length(coef(g))>0){
        dbeta.subset<-resid(g,"dfbeta",weighted=TRUE)
        if (nrow(design)==NROW(dbeta.subset)){
            dbeta<-as.matrix(dbeta.subset)
        } else {
            dbeta<-matrix(0,ncol=NCOL(dbeta.subset),nrow=nrow(design))
            dbeta[is.finite(design$prob),]<-dbeta.subset
        }
        
        if (!is.null(g$naive.var)) ## newer versions of survival switch to robust=TRUE with non-integer weights
            g$inv.info<-g$naive.var
        else
            g$inv.info<-g$var
        if (inherits(design,"survey.design2"))
            g$var<-svyrecvar(dbeta, design$cluster,
                             design$strata, design$fpc,
                             postStrata=design$postStrata)
        else if (inherits(design, "twophase"))
            g$var<-twophasevar(dbeta, design)
        else if(inherits(design, "twophase2"))
            g$var<-twophase2var(dbeta, design)
        else if(inherits(design, "pps"))
            g$var<-ppsvar(dbeta,design)
        else
            g$var<-svyCprod(dbeta, design$strata,
                            design$cluster[[1]], design$fpc,design$nPSU,
                            design$certainty,design$postStrata)
    
        g$wald.test<-coef(g)%*%solve(g$var,coef(g))
    } else g$var<-matrix(ncol=0,nrow=0)
    
    g$ll<-g$loglik
    g$loglik<-NULL
    g$rscore<-NULL
    g$score<-NA
    g$degf.resid<-degf(design)-length(coef(g)[!is.na(coef(g))])+1
    
    g
}


model.frame.svycoxph<-function(formula,...){
    f<-formula$call
    env <- environment(formula(formula))
    if (is.null(env)) 
        env <- parent.frame()
    f[[1]]<-as.name("model.frame")
    f$data<-quote(data)
    f$design<-NULL
    f$method<-f$control<-f$singular.ok<-f$model<-f$x<-f$y<-f$iter<-NULL
    f$formula<-formula(formula)
    if (is.null(f$weights))
        f$weights<-quote(.survey.prob.weights)
    else 
        f$weights<-bquote(.survey.prob.weights*.(f$weights))
    design<-formula$survey.design
    data<-model.frame(design)
    data$.survey.prob.weights<-(1/design$prob)/sum(1/design$prob)
    with(list(data=data), eval(f))
}

## model.matrix.svycoxph<-function (object, data = NULL, contrast.arg = object$contrasts, 
##     ...) 
## {
##     if (!is.null(object[["x"]])) 
##         object[["x"]]
##     else {
##         if (is.null(data)) 
##             data <- model.frame(object, ...)
##         else data <- model.frame(object, data = data, ...)
##         Terms <- object$terms
##         attr(Terms, "intercept") <- 1
##         strats <- attr(Terms, "specials")$strata
##         cluster <- attr(Terms, "specials")$cluster
##         dropx <- NULL
##         if (length(cluster)) {
##             tempc <- untangle.specials(Terms, "cluster", 1:10)
##             ord <- attr(Terms, "order")[tempc$terms]
##             if (any(ord > 1)) 
##                 stop("Cluster can not be used in an interaction")
##             dropx <- tempc$terms
##         }
##         if (length(strats)) {
##             temp <- untangle.specials(Terms, "strata", 1)
##             dropx <- c(dropx, temp$terms)
##         }
##         if (length(dropx)) {
##             newTerms <- Terms[-dropx]
##             X <- model.matrix(newTerms, data, contrasts = contrast.arg)
##         }
##         else {
##             newTerms <- Terms
##             X <- model.matrix(Terms, data, contrasts = contrast.arg)
##         }
##         X
##     }
## }

print.svycoxph<-function(x,...){
    print(x$survey.design, varnames=FALSE, design.summaries=FALSE,...)
##    x$call<-x$printcall
    NextMethod()
}

summary.svycoxph<-function(object,...){
    print(object$survey.design,varnames=FALSE, design.summaries=FALSE,...)
##    object$call<-object$printcall
    NextMethod()
}

survfit.svycoxph<-function(formula,...){
    stop("No survfit method for survey models")
}
extractAIC.svycoxph<-function(fit,...){
    stop("No AIC for survey models")
}



svyglm<-function(formula, design,subset=NULL,family=stats::gaussian(),start=NULL, ...){
  .svycheck(design)
  UseMethod("svyglm",design)
}

svyglm.survey.design<-function(formula,design,subset=NULL, family=stats::gaussian(),start=NULL,
                               rescale=TRUE,..., deff=FALSE, influence=FALSE){

      subset<-substitute(subset)
      subset<-eval(subset, model.frame(design), parent.frame())
    if (!is.null(subset)){
        if (any(is.na(subset)))
            stop("subset must not contain NA values")
        design<-design[subset,]
      }
      data<-model.frame(design)

      g<-match.call()
    g$formula<-eval.parent(g$formula)
    g$influence<-NULL
      g$design<-NULL
      g$var <- NULL
    g$rescale <- NULL
    g$deff<-NULL
    g$subset <- NULL  ## done it already
      g$family<-family
      if (is.null(g$weights))
        g$weights<-quote(.survey.prob.weights)
      else 
          g$weights<-bquote(.survey.prob.weights*.(g$weights))
      g$data<-quote(data)
      g[[1]]<-quote(glm)      

      ##need to rescale weights for stability in binomial
      ## (unless the user doesn't want to)
      if (rescale)
          data$.survey.prob.weights<-(1/design$prob)/mean(1/design$prob)
      else
          data$.survey.prob.weights<-(1/design$prob)

      if(any(is.na(data$.survey.prob.weights)))
        stop("weights must not contain NA values")
      if (!all(all.vars(formula) %in% names(data))) 
	stop("all variables must be in design= argument")
    g<-with(list(data=data), eval(g))
    summ<-summary(g)
      g$naive.cov<-summ$cov.unscaled
      
      nas<-g$na.action
      if (length(nas))
         design<-design[-nas,]

      g$cov.unscaled<-svy.varcoef(g,design)
      g$df.residual <- degf(design)+1-length(coef(g)[!is.na(coef(g))])
      
      class(g)<-c("svyglm",class(g))
      g$call<-match.call()
      g$call[[1]]<-as.name(.Generic)
      if(!("formula" %in% names(g$call))) {
        if (is.null(names(g$call)))
          i<-1
        else
          i<-min(which(names(g$call)[-1]==""))
        names(g$call)[i+1]<-"formula"
      }

    if(deff){
        vsrs<-summ$cov.scaled*mean(data$.survey.prob.weights)
        attr(g,"deff")<-g$cov.unscaled/vsrs
    }

    if (influence){
        estfun<- model.matrix(g)*naa_shorter(nas, resid(g,"working"))*g$weights
        if (g$rank<NCOL(estfun)){
            estfun<-estfun[,g$qr$pivot[1:g$rank]]
        }
        if ( length(nas) && (NROW(data)>NROW(estfun))){
            estfun1<-matrix(0,ncol=ncol(estfun),nrow=nrow(data))
            estfun1[-nas,]<-estfun
            estfun<-estfun1
            }
        attr(g, "influence")<-estfun%*%g$naive.cov
    }

    g$survey.design<-design 
    g
}

print.svyglm<-function(x,...){
  print(x$survey.design, varnames=FALSE, design.summaries=FALSE,...)
  NextMethod()

}

coef.svyglm<-function(object,complete=FALSE,...,na.rm=!complete) {
  beta<-object$coefficients
  if (!na.rm || length(beta)==object$rank)
    beta
  else
    beta[object$qr$pivot[1:object$rank]]
}

vcov.svyglm<-function(object,...) {
  v<-object$cov.unscaled
  dimnames(v)<-list(names(coef(object)),names(coef(object)))
  v
}


svy.varcoef<-function(glm.object,design){
    Ainv<-summary(glm.object)$cov.unscaled
    nas<-glm.object$na.action
    estfun<-model.matrix(glm.object)*naa_shorter(nas, resid(glm.object,"working"))*glm.object$weights
    if (glm.object$rank<NCOL(estfun)){
      estfun<-estfun[,glm.object$qr$pivot[1:glm.object$rank]]
    }
    naa<-glm.object$na.action
    ## the design may still have rows with weight zero for missing values
    ## if there are weights or calibration. model.matrix will have removed them
    if (length(naa) && (NROW(estfun)!=nrow(design) )){
      if ((length(naa)+NROW(estfun))!=nrow(design) )
        stop("length mismatch: this can't happen.")
      n<-nrow(design)     
      inx <- (1:n)[-naa]
      ee <- matrix(0,nrow=n,ncol=NCOL(estfun))
      ee[inx,]<-estfun
      estfun<-ee
    }

    if (inherits(design,"survey.design2"))
      svyrecvar(estfun%*%Ainv,design$cluster,design$strata,design$fpc,postStrata=design$postStrata)
    else if (inherits(design, "twophase"))
      twophasevar(estfun%*%Ainv, design)
    else if (inherits(design, "twophase2"))
      twophase2var(estfun%*%Ainv, design)
    else if (inherits(design, "pps"))
      ppsvar(estfun%*%Ainv, design)
    else
      vcov(svytotal(estfun%*%Ainv/weights(design,"sampling"), design))
  }

residuals.svyglm<-function(object,type = c("deviance", "pearson", "working", 
    "response", "partial"),...){
	type<-match.arg(type)
	if (type=="pearson"){
   	   y <- object$y
	   mu <- object$fitted.values
    	   wts <- object$prior.weights
           pwts<- 1/object$survey.design$prob
           pwts<- pwts/mean(pwts)
           ## missing values in calibrated/post-stratified designs
           ## the rows will still be in the design object but not in the model
           if (length(naa<-object$na.action) && (length(pwts)!=length(wts))){
             if(length(naa)+length(wts) != length(pwts))
               stop("length mismatch: this can't happen.")
             inx<-(1:length(pwts))[-naa]
           } else inx<-1:length(pwts)
           r<-numeric(length(pwts))
	   r[inx]<-(y - mu) * sqrt(wts/pwts[inx])/(sqrt(object$family$variance(mu)))
	   if (is.null(object$na.action)) 
        	r
    	   else 
	        naresid(object$na.action, r)
	} else 
		NextMethod()

}

summary.svyglm<-function (object, correlation = FALSE, df.resid=NULL,...) 
{
    Qr <- object$qr
    est.disp <- TRUE
    if (is.null(df.resid))
      df.r <- object$df.residual
    else
      df.r<-df.resid
    
    dispersion<-svyvar(resid(object,"pearson"), object$survey.design,
                       na.rm=TRUE)
    
    coef.p <- coef(object)
    covmat<-vcov(object)
    dimnames(covmat) <- list(names(coef.p), names(coef.p))
    var.cf <- diag(covmat)
    s.err <- sqrt(var.cf)
    tvalue <- coef.p/s.err
    dn <- c("Estimate", "Std. Error")
    if (!est.disp) {
        pvalue <- 2 * pnorm(-abs(tvalue))
        coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
        dimnames(coef.table) <- list(names(coef.p), c(dn, "z value", 
            "Pr(>|z|)"))
    }
    else if (df.r > 0) {
        pvalue <- 2 * pt(-abs(tvalue), df.r)
        coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
        dimnames(coef.table) <- list(names(coef.p), c(dn, "t value", 
            "Pr(>|t|)"))
    }
    else {
       coef.table <- cbind(coef.p, s.err, tvalue, NaN)
       dimnames(coef.table) <- list(names(coef.p), c(dn, "t value", 
                                                      "Pr(>|t|)"))
    }
    ans <- c(object[c("call", "terms", "family", "deviance", 
        "aic", "contrasts", "df.residual", "null.deviance", "df.null", 
        "iter")], list(deviance.resid = residuals(object, type = "deviance"), 
        aic = object$aic, coefficients = coef.table, dispersion = dispersion, 
        df = c(object$rank, df.r,NCOL(Qr$qr)), cov.unscaled = covmat, 
        cov.scaled = covmat))
    if (correlation) {
        dd <- sqrt(diag(covmat))
        ans$correlation <- covmat/outer(dd, dd)
    }
    ans$aliased<-is.na(coef(object,na.rm=FALSE))
    ans$survey.design<-list(call=object$survey.design$call)
    class(ans) <- c("summary.svyglm","summary.glm")
    return(ans)
}


confint.svyglm<-function(object,parm,level=0.95,method=c("Wald","likelihood"),ddf=NULL,...){
    method<-match.arg(method)
    if(method=="Wald"){        
        if (is.null(ddf))
            ddf <- object$df.residual
        if (ddf<=0) {
            ci<-confint.default(object,parm=parm,level=.95,...)*NaN
        } else {
            tlevel <- 1 - 2*pnorm(qt((1 - level)/2, df = ddf))
            ci<-confint.default(object,parm=parm,level=tlevel,...)
        }
        a <- (1 - level)/2
        a <- c(a, 1 - a)
        pct <- format.perc(a, 3)
        colnames(ci)<-pct
        return(ci)
    }
  pnames <- names(coef(object))
  if (missing(parm)) 
    parm <- seq_along(pnames)
  else if (is.character(parm))
    parm <- match(parm, pnames, nomatch = 0)
  lambda<-diag(object$cov.unscaled[parm,parm,drop=FALSE])/diag(object$naive.cov[parm,parm,drop=FALSE])
  if(is.null(ddf)) ddf<-object$df.residual
  if (ddf==Inf){
      alpha<-pnorm(qnorm((1-level)/2)*sqrt(lambda))/2
  }  else if (ddf<=0) {
      stop("zero or negative denomintor df")
  } else {
      alpha<-pnorm(qt((1-level)/2,df=ddf)*sqrt(lambda))/2
  }
  rval<-vector("list",length(parm))
  for(i in 1:length(parm)){
    temp<-MASSprofile_glm(fitted=object,which=parm[i],alpha=alpha[i],...)
    rval[[i]]<-confint_profile(temp,parm=parm[i],level=level, unscaled_level=2*alpha[i],...)
  }
  
  names(rval)<-pnames[parm]
  if (length(rval)==1)
    rval<-rval[[1]]
  else
    rval<-do.call(rbind,rval)
  attr(rval,"levels")<-level
  rval
}


##
## based on MASS:::confint.profile.glm
## which is GPL and (c) Bill Venables and Brian D Ripley.
##
confint_profile <- function (object, parm = seq_along(pnames), level = 0.95, unscaled_level, ...) 
{
    of <- attr(object, "original.fit")
    pnames <- names(coef(of))
    if (is.character(parm)) 
        parm <- match(parm, pnames, nomatch = 0L)
    a <- (1-level)/2
    a <- c(a, 1 - a)
    pct <- paste(round(100 * a, 1), "%")
    ci <- array(NA, dim = c(length(parm), 2L), dimnames = list(pnames[parm], 
        pct))
    cutoff <- c(qnorm(unscaled_level),qnorm(unscaled_level, lower.tail=FALSE))
    for (pm in parm) {
        pro <- object[[pnames[pm]]]
        if (is.null(pro)) 
            next
        if (length(pnames) > 1L) 
            sp <- spline(x = pro[, "par.vals"][, pm], y = pro[, 
                1])
        else sp <- spline(x = pro[, "par.vals"], y = pro[, 1])
        ci[pnames[pm], ] <- approx(sp$y, sp$x, xout = cutoff)$y
    }
    drop(ci)
}

##
## MASS:::profile.glm with very slight changes to avoid rounding error in 1-alpha
## original is GPL and (c) Bill Venables and Brian D Ripley.
##

MASSprofile_glm<-function (fitted, which = 1:p, alpha = 0.01, maxsteps = 10, del = zmax/5, 
    trace = FALSE, ...) 
{
    Pnames <- names(B0 <- coef(fitted))
    nonA <- !is.na(B0)
    pv0 <- t(as.matrix(B0))
    p <- length(Pnames)
    if (is.character(which)) 
        which <- match(which, Pnames)
    summ <- summary(fitted)
    std.err <- summ$coefficients[, "Std. Error", drop = FALSE]
    mf <- model.frame(fitted)
    Y <- model.response(mf)
    n <- NROW(Y)
    O <- model.offset(mf)
    if (!length(O)) 
        O <- rep(0, n)
    W <- model.weights(mf)
    if (length(W) == 0L) 
        W <- rep(1, n)
    OriginalDeviance <- deviance(fitted)
    DispersionParameter <- summ$dispersion
    X <- model.matrix(fitted)
    fam <- family(fitted)
    switch(fam$family, binomial = , poisson = , `Negative Binomial` = {
        zmax <- sqrt(qchisq(alpha, 1,lower.tail=FALSE))
        profName <- "z"
    }, gaussian = , quasi = , inverse.gaussian = , quasibinomial = , 
        quasipoisson = , {
            zmax <- sqrt(qf(alpha, 1, n - p,lower.tail=FALSE))
            profName <- "tau"
        })
    prof <- vector("list", length = length(which))
    names(prof) <- Pnames[which]
    for (i in which) {
        if (!nonA[i]) 
            next
        zi <- 0
        pvi <- pv0
        a <- nonA
        a[i] <- FALSE
        Xi <- X[, a, drop = FALSE]
        pi <- Pnames[i]
        for (sgn in c(-1, 1)) {
            if (trace) 
                message("\nParameter: ", pi, " ", c("down", "up")[(sgn + 
                  1)/2 + 1])
            step <- 0
            z <- 0
            LP <- X[, nonA, drop = FALSE] %*% B0[nonA] + O
            while ((step <- step + 1) < maxsteps && abs(z) < 
                zmax) {
                bi <- B0[i] + sgn * step * del * std.err[Pnames[i], 
                  1]
                o <- O + X[, i] * bi
                fm <- glm.fit(x = Xi, y = Y, weights = W, etastart = LP, 
                  offset = o, family = fam, control = fitted$control)
                LP <- Xi %*% fm$coefficients + o
                ri <- pv0
                ri[, names(coef(fm))] <- coef(fm)
                ri[, pi] <- bi
                pvi <- rbind(pvi, ri)
                zz <- (fm$deviance - OriginalDeviance)/DispersionParameter
                if (zz > -0.001) 
                  zz <- max(zz, 0)
                else stop("profiling has found a better solution, so original fit had not converged")
                z <- sgn * sqrt(zz)
                zi <- c(zi, z)
            }
        }
        si <- order(zi)
        prof[[pi]] <- structure(data.frame(zi[si]), names = profName)
        prof[[pi]]$par.vals <- pvi[si, , drop = FALSE]
    }
    val <- structure(prof, original.fit = fitted, summary = summ)
    class(val) <- c("profile.glm", "profile")
    val
}


###

svymle<-function(loglike, gradient=NULL, design, formulas,
    start=NULL, control=list(),
    na.action="na.fail", method=NULL,lower=NULL,upper=NULL,
    influence=FALSE,...){
  if(is.null(method))
    method<-if(is.null(gradient)) "Nelder-Mead" else "newuoa"
  if (!inherits(design,"survey.design")) 
    stop("design is not a survey.design")
  weights<-weights(design)
  wtotal<-sum(weights)
  
  if (!(method %in% c("bobyqa","newuoa")) && is.null(control$fnscale))
    control$fnscale <- -wtotal/length(weights)
  if (inherits(design, "twophase")) {
    data<-design$phase1$sample$variables
  } else { 
    data<-design$variables
  }
  
  ## Get the response variable
  nms<-names(formulas)
  if (nms[1]==""){
    if (inherits(formulas[[1]],"formula")) {
      y<-eval.parent(model.frame(formulas[[1]],data=data,na.action=na.pass))
    } else {
      y<-eval(y,data,parent.frame())
    }
    formulas[1]<-NULL
    if (FALSE && NCOL(y)>1) stop("Y has more than one column")
  }   else {
    ## one formula must have response
    has.response<-sapply(formulas,length)==3
    if (sum(has.response)!=1) stop("Need a response variable")
    ff<-formulas[[which(has.response)]]
    ff[[3]]<-1
    y<-eval.parent(model.frame(ff,data=data,na.action=na.pass))
    formulas[[which(has.response)]]<-delete.response(terms(formulas[[which(has.response)]]))
    nms<-c("",nms)
  }
  
  if(length(which(nms==""))>1) stop("Formulas must have names")
  
  mf<-vector("list",length(formulas))
  vnms <- unique(do.call(c, lapply(formulas, all.vars)))
  uformula <- make.formula(vnms)
  mf <- model.frame(uformula, data=data,na.action=na.pass)
  mf <- cbind(`(Response)`=y, mf)
  mf<-mf[,!duplicated(colnames(mf)),drop=FALSE]
  
  mf<-get(na.action)(mf)  
  nas<-attr(mf,"na.action")
  if (length(nas))
    design<-design[-nas,]
  weights<-1/design$prob
  wtotal<-sum(weights)
  
  Y<-mf[,1]
  
#  mm <- lapply(formulas,model.matrix, data=mf)
  
  mmFrame <- lapply(formulas,model.frame, data=mf)
  mm = mapply(model.matrix, object = formulas, data=mmFrame, SIMPLIFY=FALSE)
  mmOffset <- lapply(mmFrame, model.offset)
  
  # add a vector of zeros if there is no offset provided
  noOffset = which(unlist(lapply(mmOffset, length))==0)
  for(D in noOffset) {
    mmOffset[[D]] =  rep(0, NROW(mm[[1]]))
  }
  
  ## parameter names
  parnms<-lapply(mm,colnames)
  for(i in 1:length(parnms))
    parnms[[i]]<-paste(nms[i+1],parnms[[i]],sep=".")
  parnms<-unlist(parnms)
  
  # maps position in theta to model matrices
  np<-c(0,cumsum(sapply(mm,NCOL)))
  
  
  objectivefn<-function(theta,...){
    args<-vector("list",length(nms))
    args[[1]]<-Y
    for(i in 2:length(nms))
      args[[i]]<-mm[[i-1]]%*%theta[(np[i-1]+1):np[i]] + 
          mmOffset[[i-1]]
    names(args)<-nms
    args<-c(args, ...)
    sum(do.call("loglike",args)*weights)
  }
  
  if (is.null(gradient)) {
    grad<-NULL
  } else {  
    fnargs<-names(formals(loglike))[-1]
    grargs<-names(formals(gradient))[-1]
    if(!identical(fnargs,grargs))
      stop("loglike and gradient have different arguments.")
    reorder<-na.omit(match(grargs,nms[-1]))
    grad<-function(theta,...){
      args<-vector("list",length(nms))
      args[[1]]<-Y
      for(i in 2:length(nms))
        args[[i]]<-drop(
            mm[[i-1]]%*%theta[(np[i-1]+1):np[i]] +
                mmOffset[[i-1]])
      names(args)<-nms
      args<-c(args,...)
      rval<-NULL
      tmp<-do.call("gradient",args)
      for(i in reorder){
        rval<-c(rval, colSums(as.matrix(tmp[,i]*weights*mm[[i]])))
      }
      drop(rval)
    }
  }
  
  theta0<-numeric(np[length(np)])
  if (is.list(start))
    st<-do.call("c",start)
  else
    st<-start
  
  if (length(st)==length(theta0)) {
    theta0<-st
  } else {
    stop("starting values wrong length")
  }
  
  if (method=="nlm"){
    ff<-function(theta){
      rval<- -objectivefn(theta)
      if (is.na(rval)) rval<- -Inf
      attr(rval,"gradient")<- -grad(theta)
      rval
    }
    rval<-nlm(ff, theta0,hessian=TRUE)
    if (rval$code>3) warning("nlm did not converge")
    rval$par<-rval$estimate
  } else if (method == "newuoa"){
      rval<-minqa::uobyqa(theta0, function(par,...) -objectivefn(par,...), control=control, ...)
      if(rval$ier>0) warning(rval$msg)
      rval$hessian <- numDeriv::hessian(objectivefn, rval$par)
  } else if (method == "bobyqa"){
      if (is.list(lower)) lower<-do.call(c, lower)
      if (is.list(upper)) upper<-do.call(c,upper)
      rval<-minqa::bobyqa(theta0, function(par,...) -objectivefn(par,...), control=control,lower=lower,upper=upper, ...)
      if(rval$ier>0) warning(rval$msg)
      rval$hessian <- numDeriv::hessian(objectivefn,rval$par)
  }else {
    rval<-optim(theta0, objectivefn, grad,control=control,
        hessian=TRUE,method=method,...)
    if (rval$conv!=0) warning("optim did not converge")
  }
  
  
  
  
  names(rval$par)<-parnms
  dimnames(rval$hessian)<-list(parnms,parnms)
  
  if (is.null(gradient)) {
    rval$invinf<-solve(-rval$hessian)
    rval$scores<-NULL
    rval$sandwich<-NULL
  }  else {
    theta<-rval$par
    args<-vector("list",length(nms))
    args[[1]]<-Y
    for(i in 2:length(nms))
      args[[i]]<-drop(mm[[i-1]]%*%theta[(np[i-1]+1):np[i]]+ mmOffset[[i-1]])
    names(args)<-nms
    args<-c(args,...)
    deta<-do.call("gradient",args)
    rval$scores<-NULL
    for(i in reorder)
      rval$scores<-cbind(rval$scores,deta[,i]*weights*mm[[i]])
    
    rval$invinf<-solve(-rval$hessian)
    dimnames(rval$invinf)<-list(parnms,parnms)
    
    db<-rval$scores%*%rval$invinf
    if (inherits(design,"survey.design2"))
      rval$sandwich<-svyrecvar(db,design$cluster,design$strata, design$fpc, 
          postStrata=design$postStrata)
    else if (inherits(design, "twophase"))
      rval$sandwich<-twophasevar(db,design)
    else
      rval$sandwich<-svyCprod(db,design$strata,design$cluster[[1]],
          design$fpc, design$nPSU,
          design$certainty, design$postStrata)
    dimnames(rval$sandwich)<-list(parnms,parnms)
  }

  if (influence){
      if (nas && (NROW(data)>NROW(db))){
          db1<-matrix(0,nrow=NROW(data),ncol=NCOL(db))
          db1[-nas,]<-db
          db<-db1
      }
      attr(rval,"influence")<-db
      }
  rval$call<-match.call()
  rval$design<-design
  class(rval)<-"svymle"
  rval
  
}


coef.svymle<-function(object,...) object$par

vcov.svymle<-function(object,stderr=c("robust","model"),...) {
    stderr<-match.arg(stderr)
    if (stderr=="robust"){
	rval<-object$sandwich
	if (is.null(rval)) {
		p<-length(coef(object))
		rval<-matrix(NA,p,p)
	}
    } else {
        rval<-object$invinf*mean(1/object$design$prob)
    }
    rval
}


print.svymle<-function(x,...){
  cat("Survey-sampled mle: \n")
  print(x$call)
  cat("Coef:  \n")
  print(x$par)
}

summary.svymle<-function(object,stderr=c("robust","model"),...){
    cat("Survey-sampled mle: \n")
    print(object$call)
    stderr<-match.arg(stderr)
    tbl<-data.frame(Coef=coef(object),SE=sqrt(diag(vcov(object,stderr=stderr))))
    tbl$p.value<-format.pval(2*(1-pnorm(abs(tbl$Coef/tbl$SE))), digits=3,eps=0.001)
    print(tbl)
    print(object$design)
}

model.frame.survey.design<-function(formula,...,drop=TRUE){
  formula$variables
}
model.frame.svyrep.design<-function(formula,...){
  formula$variables
}
model.frame.survey.design2<-function(formula,...){
  formula$variables
}

.onLoad<-function(...){
  if (is.null(getOption("survey.lonely.psu")))
    options(survey.lonely.psu="fail")
  if (is.null(getOption("survey.ultimate.cluster")))
    options(survey.ultimate.cluster=FALSE)
  if (is.null(getOption("survey.want.obsolete")))
    options(survey.want.obsolete=FALSE)
  if (is.null(getOption("survey.adjust.domain.lonely")))
    options(survey.adjust.domain.lonely=FALSE)
  if (is.null(getOption("survey.drop.replicates")))
      options(survey.drop.replicates=TRUE)
  if (is.null(getOption("survey.multicore")))
    options(survey.multicore=FALSE)
  if (is.null(getOption("survey.replicates.mse")))
      options(survey.replicates.mse=FALSE)
  if (is.null(getOption("survey.use_rcpp")))
      options(survey.use_rcpp=TRUE)
}


predterms<-function(object,se=FALSE,terms=NULL){
  tt<-terms(object)
  n <- length(object$residuals)
  p <- object$rank
  p1 <- seq_len(p)
  piv <- object$qr$pivot[p1]
  beta<-coef(object)
  X<-mm<-model.matrix(object)
  aa <- attr(mm, "assign")
  ll <- attr(tt, "term.labels")
  hasintercept <- attr(tt, "intercept") > 0L
  if (hasintercept) 
    ll <- c("(Intercept)", ll)
  aaa <- factor(aa, labels = ll)
  asgn <- split(order(aa), aaa)
  if (hasintercept) {
    asgn$"(Intercept)" <- NULL
    }
  avx <- colMeans(mm)
  termsconst <- sum(avx[piv] * beta[piv])
  nterms <- length(asgn)
  ip <- matrix(ncol = nterms, nrow = NROW(X))
  if (nterms > 0) {
    predictor <- matrix(ncol = nterms, nrow = NROW(X))
    dimnames(predictor) <- list(rownames(X), names(asgn))
    
    if (hasintercept) 
      X <- sweep(X, 2L, avx, check.margin = FALSE)
    unpiv <- rep.int(0L, NCOL(X))
    unpiv[piv] <- p1
    for (i in seq.int(1L, nterms, length.out = nterms)) {
      iipiv <- asgn[[i]]
      ii <- unpiv[iipiv]
      iipiv[ii == 0L] <- 0L
      predictor[, i] <- if (any(iipiv > 0L)) 
        X[, iipiv, drop = FALSE] %*% beta[iipiv]
      else 0
      if (se){
        ip[,i]<-if (any(iipiv > 0L)) 
          rowSums(as.matrix(X[, iipiv, drop = FALSE] %*% vcov(object)[ii,ii]) * X[, iipiv, drop = FALSE]) else 0
      }
    }
    if (!is.null(terms)) {
      predictor <- predictor[, terms, drop = FALSE]
      if (se) 
        ip <- ip[, terms, drop = FALSE]
    }
  }
  else {
    predictor <- ip <- matrix(0, n, 0)
  }
  attr(predictor, "constant") <- if (hasintercept) 
    termsconst
  else 0
  if(se)
          dimnames(ip)<-dimnames(predictor)
  if (se) list(fit=predictor,se.fit=sqrt(ip)) else predictor
}


predict.svyglm <- function(object, newdata=NULL, total=NULL,
                           type = c("link", "response","terms"),
                           se.fit=(type!="terms"),
                           vcov=FALSE,...){
    if(is.null(newdata))
      newdata<-model.frame(object$survey.design)
    type<-match.arg(type)
    if (type=="terms")
      return(predterms(object,se=se.fit,...))
    tt<-delete.response(terms(formula(object)))
    mf<-model.frame(tt,data=newdata, xlev=object$xlevels)
    mm<-model.matrix(tt,mf,contrasts.arg = object$contrasts)
    if (!is.null(total) && attr(tt,"intercept")){
        mm[,attr(tt,"intercept")]<-mm[,attr(tt,"intercept")]*total
    }
    eta<-drop(mm %*% coef(object))
    d<-drop(object$family$mu.eta(eta))
    eta<-switch(type, link=eta, response=object$family$linkinv(eta))
    if(se.fit){
        if(vcov){
            vv<-mm %*% vcov(object) %*% t(mm)
            attr(eta,"var")<-switch(type,
                                    link=vv,
                                    response=d*(t(vv*d)))
        } else {
            ## FIXME make this more efficient
            vv<-drop(rowSums((mm %*% vcov(object)) * mm))
            attr(eta,"var")<-switch(type,
                                    link=vv,
                                    response=drop(d*(t(vv*d))))
        }
    }
    attr(eta,"statistic")<-type
    class(eta)<-"svystat"
    eta
    }
    
bschneidr/fastsurvey documentation built on March 13, 2024, 11:12 a.m.