svyPVpack: A package for complex surveys including plausible values

Documented in svyPVbenchmark

#' Estimate the proportion below and above a bechmark
#' 
#' This function works in a similar fashion like the \code{svyPVlevel}
#' function. It discretizes the plausible values to a dichotomous variable and
#' estimates the proportion of population totals above and below the benchmark
#' within the comitted groups (by statement).
#' 
#' 
#' @param by A formula statement is expected which splits the data into several
#' subsets.
#' @param svydat A survey design (\code{svydesign} as well as
#' \code{svrepdesign}) which was generated by the \code{survey} package. To
#' figure out how to create a survey design object, please read the help files
#' for the \code{survey} package.
#' @param pvs A character vector which includes the colnames of the plausible
#' values. These variables must be part of the survey design comitted as
#' \code{svydat}.
#' @param BENCH Submit a benchmark (numeric vector of length = 1). A plausible
#' value will be assigned to "< benchmark" if it is below the benchmark and
#' assigned to ">= benchmark" if it is on or above the benchmark.
#' @param colN If TRUE the colnames will equal the grouping variable names from
#' the by statement. If FALSE, which is the default, the names will be Group1
#' up to Group k.
#' @return The function returns a data.frame with the following columns
#' 
#' \item{Group1..k}{The first k-1 columns show the different levels of the k-1
#' subsetting groups, provided with \code{by}. The kth group column contains
#' the benchmark variable.} \item{Number.of.cases}{Shows the unweighted number
#' of cases (NA's excluded) within each group.} \item{Sum.of.weights}{Shows the
#' sum of weights (NA's excluded) within each group.}
#' \item{Proportion}{Contains the estimate of the conditional proportion of
#' persons below and on/above the benchmark given the categories of the first
#' k-1 groups.} \item{Proportion.SE}{Contains the SE of the proportion
#' estimate.}
#' @author Manuel Reif
#' @seealso \code{\link{svyPVlevel}}
#' @references Lumley, T. (2010). \emph{Complex Surveys}. Hoboken, NJ: Wiley.
#' 
#' Saerndal, C.-E. & Swensson, B. & Wretman, J. (1992). \emph{Model Assisted
#' Survey Sampling}. New York: Springer.
#' 
#' Chaudhuri, A. & Stenger, H. (2005). \emph{Survey Sampling. Theory and
#' Methods}. Boka Raton, FL: Chapman & Hall/CRC.
#' @keywords benchmark
#' @examples
#' 
#' 
#' data(svy_example1)
#' 
#' erg_ben <- svyPVbenchmark(by = ~ sex, svydat=svy.exrep, 
#' pvs=c("plaus1","plaus2","plaus3"), BENCH=320)
#' 
#' erg_ben
#' 
#' 
#' 
#' 
#' @export svyPVbenchmark
svyPVbenchmark <-
function(by, svydat, pvs, BENCH=NA, colN=FALSE)
# put in a Benachmark - a this benchmark (e.g.: 300) the ratio of people below and at/above this benchmark is identified and reported - as well as the SE of this ratio
  # additionally the sum of weights and the number of observations for each group is reported.

# pvs = a vertor which contains the column-names of the plausible value variables  
{
  

if(is.na(BENCH)) stop("Please submit a benchmark!")



BENCH1 <- paste0("x >=",BENCH)

PVPRS <- svydat$variables[, pvs]


catPVs <- data.frame(lapply(PVPRS,function(x) factor(ifelse(eval(parse(text=BENCH1)),paste0(">=",BENCH),paste0("<",BENCH)))))
colnames(catPVs) <- paste0(colnames(PVPRS),"_cat")

svydat2 <- 0
addALL <- paste0("svydat2 <- update(svydat, ",paste0(colnames(catPVs),"=catPVs$",colnames(catPVs),collapse=" , "),")")


# create svydat2 which includes the categorized plausible values
eval(parse(text=addALL))  
  
  

ergperc <- pv_perc(by=by, svydat=svydat2, pvcat=colnames(catPVs))
pm      <- mergeALL(ergperc)



### um die ordnung der factors gleich zu lassen (vor allem wichtig bezogen auf grafiken) wird hier nochmal umgeordnet so wie es im datensatz ?blich ist

mybys <- all.vars(by)
# facordall <- mapply(function(x,number) factor(pm[,number], levels=levels(svydat2$variables[[x]])), x=mybys, number=1:length(mybys),SIMPLIFY=FALSE)
# 
# facordallDF <- data.frame(facordall)

browser()

facordallDF <- fALL(mybys,pm, svydat)


browser()

pm[,1:length(mybys)] <- facordallDF 

# Beschriftung
if(colN)
{
  colnames(pm)[1:(length(mybys)+1)] <- c(mybys,"benchmark") 
  
}




# if(addcountry)
# {  
#   pm  <- data.frame("Country"=unique(svydat$variables$CNTRYID), pm) 
# }
  
return(pm)  
  
}