tableone: Create 'Table 1' to Describe Baseline Characteristics with or without Propensity Score Weights

################################################################################
### Modules for survey data support
##
## Created on: 2015-07-23
## Author: Kazuki Yoshida
################################################################################


###
### Generic helpers
################################################################################

## Function to return 1 as many as x elements
## Used for counting weighted sample size
one <- function(x) {
    rep(1, length(x))
}


## 0,1 indicator variable for missingness
miss <- function(x) {
    as.numeric(is.na(x))
}


## Return formula string having vars on RHS
FormulaString <- function(vars) {
    paste0(" ~ ", paste0(vars, collapse = " + "))
}


###
### Use oldsvyquantile when available
################################################################################

## 2020-07-30 via Thomas Lumley
## survey::svyquantile rewritten for survey 4.1
## To get the old behaviour you can simply call survey::oldsvyquantile() instead.
try_oldsvyquantile <- try(getFromNamespace(x = "oldsvyquantile", ns = "survey"))
if (is.function(try_oldsvyquantile)) {
    svyquantile <- survey::oldsvyquantile
}


###
### Helpers for both types of data
################################################################################

## Check for survey data; fail if not
StopIfNotSurveyDesign <- function(data) {

    if (!any(c("svyrep.design", "survey.design2", "survey.design") %in% class(data))) {
        stop("The data argument needs to be a survey design object.")
    }

}


###
### Helpers for continuous variable summary
################################################################################

svyN <- function(vars, design) {
    ## Same for all variables, just use first
    form <- sprintf(" ~ one(%s)", vars[1])
    ## No missing handling as all values are 1
    res <- svytotal(x = as.formula(form), design = design)
    ## Repeat for all variables
    out <- rep(res[1], length(vars))
    names(out) <- vars
    out
}


svyMiss <- function(vars, design) {

    ## Rewrite variables locally to missing 0,1 indicators
    design$variables[vars] <- lapply(design$variables[vars], miss)

    ## For each var find number of missingness
    res <- sapply(design$variables[vars], svytotal, design = design, na.rm=TRUE)
    out <- as.vector(res)
    names(out) <- vars
    out
}


svyPMiss <- function(vars, design) {
    svyMiss(vars, design) / svyN(vars, design)
}


svyMean <- function(vars, design) {

    ## For each var find weighted mean
    res <- sapply(design$variables[vars], svymean, design = design, na.rm=TRUE)
    out <- as.vector(res)
    names(out) <- vars
    out
}


svySd <- function(vars, design) {

    ## For each var find weighted variance (ie, not a variance, covariance matrix)
    res <- sapply(design$variables[vars],svyvar, design = design, na.rm=TRUE)
    out <- as.vector(sqrt(res))
    names(out) <- vars
    out
}


svyQuant <- function(vars, design, q = 0.5) {

    ## Use only one quantile
    ## Not as elegant as sapply, though sapply with svyquantile failed to run
    res <- vector()
    for ( i in 1:length(vars)) {
      var    <- vars[i]
      res[i] <- svyquantile(design$variables[var], design = design, quantiles = q[1], na.rm = TRUE)
    }
    out <- as.vector(res)
    names(out) <- vars
    out
}


svyContSummary <- function(vars, design) {

    ## Save for reuse
    nVec    <- svyN(vars, design)
    missVec <- svyMiss(vars, design)

    cbind(n      = nVec,
          miss   = missVec,
          p.miss = missVec / nVec * 100,
          mean   = svyMean(vars, design),
          sd     = svySd(vars, design),
          median = svyQuant(vars, design, q = 0.5),
          p25    = svyQuant(vars, design, q = 0.25),
          p75    = svyQuant(vars, design, q = 0.75),
          min    = svyQuant(vars, design, q = 0),
          max    = svyQuant(vars, design, q = 1))
}


###
### Helpers for categorical variable summary
################################################################################
## These work on one variable at a time

svyTable <- function(var, design) {
    form <- FormulaString(var)
    out <- svytable(formula = as.formula(form), design = design)
    out
}


svyLevel <- function(var, design) {
    names(svyTable(var, design))
}


svyPropTable <- function(var, design) {
    prop.table(svyTable(var, design))
}


## This one works at a single variable level within a stratum
svyCatSummaryForOneVar <- function(var, design) {

    ## Tables
    freqTab <- svyTable(var, design)
    propTab <- prop.table(freqTab)
    nLevels <- length(freqTab)
    ## Repeat as many as the levels
    nVec    <- rep(svyN(var, design), nLevels)
    missVec <- rep(svyMiss(var, design), nLevels)

    data.frame(n           = nVec,
               miss        = missVec,
               p.miss      = missVec / nVec * 100,
               ## Order must be as is.
               level       = names(freqTab),
               freq        = as.vector(freqTab),
               percent     = as.vector(propTab) * 100,
               cum.percent = cumsum(propTab) * 100,
               ## To protect against, level having <NA>
               row.names   = NULL,
               stringsAsFactors = FALSE)
}


## This one can take multiple variables and return a list
svyCatSummary <- function(vars, design) {

    sapply(vars, function(var) {

        svyCatSummaryForOneVar(var, design)

    }, simplify = FALSE)
}


###
### Helpers for testing for p values
################################################################################

## Function to do Wald test on a multi-degree variable after linear regression
svyGlmTermTest <- function(formula, design, test.terms, method = "Wald") {

    ## Perform linear regression and perform
    regTermTest(svyglm(formula, design), test.terms = test.terms, method = method)
}


## Given a formula string as its first argument, calls svyGlmTermTest correctly
svyTestNormal <- function(formulaString, design, test.terms, method = "Wald") {

    out <- svyGlmTermTest(formula = as.formula(formulaString), design = design,
                          test.terms = test.terms, method = method)
    ## Give an appropriate name for consistent extraction
    list(p.value = out$p[1,1])
}


## Kruskal-Wallis test
svyTestNonNormal <- function(formulaString, design) {

    ## This returns an htest object that has a scalar $p.value element
    svyranktest(formula = as.formula(formulaString), design = design)
}


svyTestChisq <- function(formulaString, design) {

    ## This returns an htest object that has a scalar $p.value element
    svychisq(formula = as.formula(formulaString), design = design)
}


svyModuleApproxExactTests <- function(data, vars, strataVarName,
                                      testApprox, argsApprox) {

    ## Loop over variables and create a list of xtabs
    ## Empty strata are kept in the corss tables. Different behavior than the cont counterpart!
    listXtabs <- sapply(X = vars,
                        FUN = function(var) {
                            ## Create a formula
                            formula <- as.formula(paste0("~ ", var, " + ", "..strataVar.."))

                            ## Create a 2-dimensional crosstable
                            svytable(formula = formula, design = data)
                        },
                        simplify = FALSE)

    ## Rename the second dimension of the xtabs with the newly create name.
    for (i in seq_along(listXtabs)) {

        names(dimnames(listXtabs[[i]]))[2] <- strataVarName
    }

    ## Loop over variables, and create p-values
    pValues <-
    sapply(X = vars,
           FUN = function(var) {
               formulaString <- paste0(" ~ ", var, " + ..strataVar..")
               ## Perform tests and return the result as 1x2 DF
               data.frame(pApprox = ModuleTestSafe(formulaString, testApprox,
                                                   c(list(design = data), argsApprox)),
                          pExact  = NA) # Not available for survey data. Just fill in with NA
           },
           simplify = FALSE)

    ## Create a single data frame (n x 2 (normal,nonormal))
    pValues <- do.call(rbind, pValues)

    ## Return both xtabs and p value df
    list(pValues = pValues, xtabs = listXtabs)
}

kaz-yos/tableone documentation built on April 20, 2022, 12:16 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

kaz-yos/tableone
Create 'Table 1' to Describe Baseline Characteristics with or without Propensity Score Weights

R/modules-svy.R
In kaz-yos/tableone: Create 'Table 1' to Describe Baseline Characteristics with or without Propensity Score Weights

Defines functions svyModuleApproxExactTests svyTestChisq svyTestNonNormal svyTestNormal svyGlmTermTest svyCatSummary svyCatSummaryForOneVar svyPropTable svyLevel svyTable svyContSummary svyQuant svySd svyMean svyPMiss svyMiss svyN StopIfNotSurveyDesign FormulaString miss one

R Package Documentation

Browse R Packages

We want your feedback!

kaz-yos/tableone Create 'Table 1' to Describe Baseline Characteristics with or without Propensity Score Weights

R/modules-svy.R In kaz-yos/tableone: Create 'Table 1' to Describe Baseline Characteristics with or without Propensity Score Weights

Defines functions svyModuleApproxExactTests svyTestChisq svyTestNonNormal svyTestNormal svyGlmTermTest svyCatSummary svyCatSummaryForOneVar svyPropTable svyLevel svyTable svyContSummary svyQuant svySd svyMean svyPMiss svyMiss svyN StopIfNotSurveyDesign FormulaString miss one

R Package Documentation

Browse R Packages

We want your feedback!

kaz-yos/tableone
Create 'Table 1' to Describe Baseline Characteristics with or without Propensity Score Weights

R/modules-svy.R
In kaz-yos/tableone: Create 'Table 1' to Describe Baseline Characteristics with or without Propensity Score Weights