svyPVpack: A package for complex surveys including plausible values

Documented in svyPVprob

#' Proportion estimation
#' 
#' This function was created to estimate the proportions of weighted
#' observations within each group.
#' 
#' 
#' @param by A formula statement is expected which splits the data into several
#' subsets.
#' @param svydat A survey design (\code{svydesign} as well as
#' \code{svrepdesign}) which was generated by the \code{survey} package.
#' @param pvs Either a character vector which contains variablenames (these
#' variables must exist within the survey) or a vector of length 1 which
#' contains NULL. If plausible values (or any other variablenames) are provided
#' by the argument \code{pvs} the weighted ratio of group members is computed
#' after listwise deletion of those who contain any NA.
#' @param colN If TRUE the colnames will equal the grouping variable names from
#' the by statement. If FALSE, which is the default, the names will be Group1
#' up to Group k.
#' @return The function returns a data.frame with the following columns
#' \item{Group1..k}{The first k columns show the different levels of the k
#' subsetting groups.} \item{Number.of.cases}{Shows the unweighted number of
#' cases (NA's excluded) within each group.} \item{Sum.of.weights}{Shows the
#' sum of weights (NA's excluded) within each group.} \item{Proportion}{Shows
#' the (weighted) estimated proportion of persons within the categories.}
#' \item{Proportion.SE}{Shows the Standard Errors of the proportion estimate.}
#' @author Manuel Reif
#' @seealso \code{\link{svyPVprob}}
#' @references Lumley, T. (2010). \emph{Complex Surveys}. Hoboken, NJ: Wiley.
#' 
#' Saerndal, C.-E. & Swensson, B. & Wretman, J. (1992). \emph{Model Assisted
#' Survey Sampling}. New York: Springer.
#' 
#' Chaudhuri, A. & Stenger, H. (2005). \emph{Survey Sampling. Theory and
#' Methods}. Boka Raton, FL: Chapman & Hall/CRC.
#' @keywords proportion
#' @examples
#' 
#' 
#' data(svy_example1)
#' 
#' erg_p <- svyPVprob(by = ~ sex, svydat=svy.exrep, pvs=c("plaus1","plaus2","plaus3"))
#' 
#' erg_p
#' 
#' 
#' @export svyPVprob
svyPVprob <-
function(by, svydat, pvs=NULL, colN=FALSE)
{
# if plausible values (or any other column names) are provided by the argument "pvs" the weighted ratio of group members is computed only for the observations where all pvs variables are NOT NA!  "pvs" must contain valid names of variables which exist in the "svydat" survey-design!

# check input

checkds <- data.frame(svydat$variables[,all.vars(by)])
only1gr <- all(sapply(checkds,function(A)length(unique(A))) == 1)  
if(only1gr) stop("by variable must contain more th an 1 category.")


# compute group ratios
cellp   <- opv_perc(by=by, svydat=svydat, pvs=pvs)


#########################################################
########### additional information ######################
######################################################### 
ADC <- additional_comp(by=by,svydat=svydat)

colnames(ADC$Ncases) <- c(paste0("Group",1:length(all.vars(by))),"Number.of.cases")
colnames(ADC$Sumweights) <- c(paste0("Group",1:length(all.vars(by))),"Sum.of.weights")


pmV <- data.frame(ADC$Ncases, "Sum.of.weights"=ADC$Sumweights[,length(all.vars(by))+1])


# merge the outcomes reasonably
pm  <- merge(pmV,cellp,sort=FALSE)



### um die ordnung der factors gleich zu lassen (vor allem wichtig bezogen auf grafiken) wird hier nochmal umgeordnet so wie es im datensatz ?blich ist
mybys <- all.vars(by)
# facordall <- mapply(function(x,number) factor(pm[,number], levels=levels(svydat$variables[[x]])), x=mybys, number=1:length(mybys),SIMPLIFY=FALSE)
# 
# facordallDF <- data.frame(facordall)


facordallDF <- fALL(mybys,pm, svydat)


pm[,1:length(mybys)] <- facordallDF


if(colN)
{
  colnames(pm)[1:length(mybys)] <- c(mybys) 
  
}



# if(addcountry)
# {  
#   pm  <- data.frame("Country"=unique(svydat$variables$CNTRYID), pm) 
# }

return(pm)
  
}