R/modules-svy.R

Defines functions svyModuleApproxExactTests svyTestChisq svyTestNonNormal svyTestNormal svyGlmTermTest svyCatSummary svyCatSummaryForOneVar svyPropTable svyLevel svyTable svyContSummary svyQuant svySd svyMean svyPMiss svyMiss svyN StopIfNotSurveyDesign FormulaString miss one

################################################################################
### Modules for survey data support
##
## Created on: 2015-07-23
## Author: Kazuki Yoshida
################################################################################


###
### Generic helpers
################################################################################

## Function to return 1 as many as x elements
## Used for counting weighted sample size
one <- function(x) {
    rep(1, length(x))
}


## 0,1 indicator variable for missingness
miss <- function(x) {
    as.numeric(is.na(x))
}


## Return formula string having vars on RHS
FormulaString <- function(vars) {
    paste0(" ~ ", paste0(vars, collapse = " + "))
}


###
### Use oldsvyquantile when available
################################################################################

## 2020-07-30 via Thomas Lumley
## survey::svyquantile rewritten for survey 4.1
## To get the old behaviour you can simply call survey::oldsvyquantile() instead.
try_oldsvyquantile <- try(getFromNamespace(x = "oldsvyquantile", ns = "survey"))
if (is.function(try_oldsvyquantile)) {
    svyquantile <- survey::oldsvyquantile
}


###
### Helpers for both types of data
################################################################################

## Check for survey data; fail if not
StopIfNotSurveyDesign <- function(data) {

    if (!any(c("svyrep.design", "survey.design2", "survey.design") %in% class(data))) {
        stop("The data argument needs to be a survey design object.")
    }

}


###
### Helpers for continuous variable summary
################################################################################

svyN <- function(vars, design) {
    ## Same for all variables, just use first
    form <- sprintf(" ~ one(%s)", vars[1])
    ## No missing handling as all values are 1
    res <- svytotal(x = as.formula(form), design = design)
    ## Repeat for all variables
    out <- rep(res[1], length(vars))
    names(out) <- vars
    out
}


svyMiss <- function(vars, design) {

    ## Rewrite variables locally to missing 0,1 indicators
    design$variables[vars] <- lapply(design$variables[vars], miss)

    ## For each var find number of missingness
    res <- sapply(design$variables[vars], svytotal, design = design, na.rm=TRUE)
    out <- as.vector(res)
    names(out) <- vars
    out
}


svyPMiss <- function(vars, design) {
    svyMiss(vars, design) / svyN(vars, design)
}


svyMean <- function(vars, design) {

    ## For each var find weighted mean
    res <- sapply(design$variables[vars], svymean, design = design, na.rm=TRUE)
    out <- as.vector(res)
    names(out) <- vars
    out
}


svySd <- function(vars, design) {

    ## For each var find weighted variance (ie, not a variance, covariance matrix)
    res <- sapply(design$variables[vars],svyvar, design = design, na.rm=TRUE)
    out <- as.vector(sqrt(res))
    names(out) <- vars
    out
}


svyQuant <- function(vars, design, q = 0.5) {

    ## Use only one quantile
    ## Not as elegant as sapply, though sapply with svyquantile failed to run
    res <- vector()
    for ( i in 1:length(vars)) {
      var    <- vars[i]
      res[i] <- svyquantile(design$variables[var], design = design, quantiles = q[1], na.rm = TRUE)
    }
    out <- as.vector(res)
    names(out) <- vars
    out
}


svyContSummary <- function(vars, design) {

    ## Save for reuse
    nVec    <- svyN(vars, design)
    missVec <- svyMiss(vars, design)

    cbind(n      = nVec,
          miss   = missVec,
          p.miss = missVec / nVec * 100,
          mean   = svyMean(vars, design),
          sd     = svySd(vars, design),
          median = svyQuant(vars, design, q = 0.5),
          p25    = svyQuant(vars, design, q = 0.25),
          p75    = svyQuant(vars, design, q = 0.75),
          min    = svyQuant(vars, design, q = 0),
          max    = svyQuant(vars, design, q = 1))
}


###
### Helpers for categorical variable summary
################################################################################
## These work on one variable at a time

svyTable <- function(var, design) {
    form <- FormulaString(var)
    out <- svytable(formula = as.formula(form), design = design)
    out
}


svyLevel <- function(var, design) {
    names(svyTable(var, design))
}


svyPropTable <- function(var, design) {
    prop.table(svyTable(var, design))
}


## This one works at a single variable level within a stratum
svyCatSummaryForOneVar <- function(var, design) {

    ## Tables
    freqTab <- svyTable(var, design)
    propTab <- prop.table(freqTab)
    nLevels <- length(freqTab)
    ## Repeat as many as the levels
    nVec    <- rep(svyN(var, design), nLevels)
    missVec <- rep(svyMiss(var, design), nLevels)

    data.frame(n           = nVec,
               miss        = missVec,
               p.miss      = missVec / nVec * 100,
               ## Order must be as is.
               level       = names(freqTab),
               freq        = as.vector(freqTab),
               percent     = as.vector(propTab) * 100,
               cum.percent = cumsum(propTab) * 100,
               ## To protect against, level having <NA>
               row.names   = NULL,
               stringsAsFactors = FALSE)
}


## This one can take multiple variables and return a list
svyCatSummary <- function(vars, design) {

    sapply(vars, function(var) {

        svyCatSummaryForOneVar(var, design)

    }, simplify = FALSE)
}


###
### Helpers for testing for p values
################################################################################

## Function to do Wald test on a multi-degree variable after linear regression
svyGlmTermTest <- function(formula, design, test.terms, method = "Wald") {

    ## Perform linear regression and perform
    regTermTest(svyglm(formula, design), test.terms = test.terms, method = method)
}


## Given a formula string as its first argument, calls svyGlmTermTest correctly
svyTestNormal <- function(formulaString, design, test.terms, method = "Wald") {

    out <- svyGlmTermTest(formula = as.formula(formulaString), design = design,
                          test.terms = test.terms, method = method)
    ## Give an appropriate name for consistent extraction
    list(p.value = out$p[1,1])
}


## Kruskal-Wallis test
svyTestNonNormal <- function(formulaString, design) {

    ## This returns an htest object that has a scalar $p.value element
    svyranktest(formula = as.formula(formulaString), design = design)
}


svyTestChisq <- function(formulaString, design) {

    ## This returns an htest object that has a scalar $p.value element
    svychisq(formula = as.formula(formulaString), design = design)
}


svyModuleApproxExactTests <- function(data, vars, strataVarName,
                                      testApprox, argsApprox) {

    ## Loop over variables and create a list of xtabs
    ## Empty strata are kept in the corss tables. Different behavior than the cont counterpart!
    listXtabs <- sapply(X = vars,
                        FUN = function(var) {
                            ## Create a formula
                            formula <- as.formula(paste0("~ ", var, " + ", "..strataVar.."))

                            ## Create a 2-dimensional crosstable
                            svytable(formula = formula, design = data)
                        },
                        simplify = FALSE)

    ## Rename the second dimension of the xtabs with the newly create name.
    for (i in seq_along(listXtabs)) {

        names(dimnames(listXtabs[[i]]))[2] <- strataVarName
    }

    ## Loop over variables, and create p-values
    pValues <-
    sapply(X = vars,
           FUN = function(var) {
               formulaString <- paste0(" ~ ", var, " + ..strataVar..")
               ## Perform tests and return the result as 1x2 DF
               data.frame(pApprox = ModuleTestSafe(formulaString, testApprox,
                                                   c(list(design = data), argsApprox)),
                          pExact  = NA) # Not available for survey data. Just fill in with NA
           },
           simplify = FALSE)

    ## Create a single data frame (n x 2 (normal,nonormal))
    pValues <- do.call(rbind, pValues)

    ## Return both xtabs and p value df
    list(pValues = pValues, xtabs = listXtabs)
}

Try the tableone package in your browser

Any scripts or data that you put into this service are public.

tableone documentation built on April 15, 2022, 5:06 p.m.