R/tab_subset.R
In surveytable: Streamlining Complex Survey Estimation and Reliability Assessment in R

Documented in tab_subset

#' Tabulate subsets or interactions
#'
#' Create subsets of the survey using one variable, and tabulate another
#' variable within each of the subsets. Interact two variables and tabulate.
#'
#' `tab_subset()` creates subsets using the levels of `vrby`, and tabulates
#' `vr` in each subset. Optionally, only use the `lvls` levels of `vrby`.
#' `vr` can be categorical (factor or character), logical, or numeric.
#'
#' `tab_cross()` crosses or interacts `vr` and `vrby` and tabulates the new
#' variable. Tables created using `tab_subset()` and `tab_cross()` have the same
#' counts but different percentages. With `tab_subset()`, percentages within each
#' subset add up to 100%. With `tab_cross()`, percentages across the entire
#' population add up to 100%. Also see `var_cross()`.
#'
#' `test = TRUE` performs a test of association between the two variables. Also
#' performs t-tests for all pairs of levels of `vr` and `vrby`.
#'
#' `test = "{LEVEL}"`, where `{LEVEL}` is a level of `vr`, performs a
#' **conditional independence test** to compare the proportion of
#' `vr = "{LEVEL}"` for different values of `vrby`.
#'
#' @param vr      variable to tabulate
#' @param vrby    use this variable to subset the survey
#' @param lvls    (optional) only show these levels of `vrby`
#' @param test    if `TRUE`, performs a test of association and t-tests for all
#' pairs of levels of `vr` and `vrby`. If `test` is the name of a level of `vr`,
#' performs a conditional independence test for that level.
#' @param alpha   significance level for tests
#' @param p_adjust adjust p-values for multiple comparisons?
#' @param drop_na drop missing values (`NA`)? Categorical variables only.
#' @param max_levels a categorical variable can have at most this many levels. Used to avoid printing huge tables.
#'
#' @return A list of tables or a single table.
#'
#' @family tables
#'
#' @export
#'
#' @examples
#' set_survey(namcs2019sv)
#'
#' # For each SEX, tabulate AGER
#' tab_subset("AGER", "SEX")
#'
#' # Same counts as tab_subset(), but different percentages.
#' tab_cross("AGER", "SEX")
#'
#' # Numeric variables
#' tab_subset("NUMMED", "AGER")
#'
#' # Hypothesis testing
#' tab_subset("NUMMED", "AGER", test = TRUE)
tab_subset = function(vr, vrby, lvls = c()
                , test = FALSE, alpha = 0.05, p_adjust = FALSE
                # , test_pairs = "depends"
                , drop_na = getOption("surveytable.drop_na")
                , max_levels = getOption("surveytable.max_levels")
              ) {
  assert_that(alpha > 0, alpha < 0.5
              , p_adjust %in% c(TRUE, FALSE)
              )
  design = .load_survey()
  nm = names(design$variables)
  assert_that(vr %in% nm, msg = paste("Variable", vr, "not in the data."))
  assert_that(vrby %in% nm, msg = paste("Variable", vrby, "not in the data."))
  assert_that(is.factor(design$variables[,vr])
              || is.logical(design$variables[,vr])
              || is.numeric(design$variables[,vr])
        , msg = paste0(vr, ": must be factor, logical, or numeric. Is "
                   , class(design$variables[,vr])[1] ))

  # Need to convert to factor for testing
  if (is.logical(design$variables[,vr])) {
    lbl = attr(design$variables[,vr], "label")
    design$variables[,vr] %<>% factor %>% droplevels

    if (drop_na) {
      design = design[which(!is.na(design$variables[,vr])),]
      if(inherits(design, "svyrep.design")) {
        design$prob = 1 / design$pweights
      }
      # drop_na in .tab_factor will set this
      # lbl %<>% paste("(knowns only)")
    } else {
      design$variables[,vr] %<>% .fix_factor
    }
    assert_that(noNA(design$variables[,vr]), noNA(levels(design$variables[,vr])))

    attr(design$variables[,vr], "label") = lbl
  }
  assert_that( (is.logical(test) && test %in% c(TRUE, FALSE))
               || (is.string(test) && test %in% levels(design$variables[,vr])) )

  type_test = if (is.logical(test) && test == TRUE) {
    "assoc"
  } else if (is.logical(test) && test == FALSE) {
    "none"
  } else if (is.string(test) && test %in% levels(design$variables[,vr])) {
    "cit"
  } else {
    stop("?")
  }

  lbl = attr(design$variables[,vrby], "label")
  if (is.logical(design$variables[,vrby])) {
    design$variables[,vrby] %<>% factor
  }
  assert_that(is.factor(design$variables[,vrby])
        , msg = glue("{vrby}: must be either categorical (factor or character) or logical.",
                     " Is {o2s(design$variables[,vrby])}"))
  design$variables[,vrby] %<>% droplevels %>% .fix_factor
  attr(design$variables[,vrby], "label") = lbl

  lvl0 = levels(design$variables[,vrby])
  if (!is.null(lvls)) {
    assert_that(all(lvls %in% lvl0))
    lvl0 = lvls
  }
  if (drop_na) {
    idx = which(lvl0 == "<N/A>")
    if (length(idx) > 0) {
      lvl0 = lvl0[-idx]
    }
  }

  ret = list()
  if (is.factor(design$variables[,vr]) && type_test == "cit") {
    r1 = NULL
    l.num = list()
    c.footer = c()
    for (ii in lvl0) {
      d1 = design[which(design$variables[,vrby] == ii),]
      if(inherits(d1, "svyrep.design")) {
        d1$prob = 1 / d1$pweights
      }
      fo = .tab_factor(design = d1
                  , vr = vr
                  , drop_na = drop_na
                  , max_levels = max_levels)
      l.num[[ length(l.num) + 1 ]] = attr(fo, "num")
      c.footer %<>% c(attr(fo, "footer"))

      idx = which(fo$Level == test)
      assert_that(length(idx) == 1)
      fo1 = fo[idx,]
      fo1$Level = ii
      r1 %<>% .rbind_dc(fo1)
    }
    attr(r1, "title") = glue("{.getvarname(design, vr)} = '{test}' "
                             , "(for different levels of {.getvarname(design, vrby)})")
    for (kk in 1:length(l.num)) assert_that(all(l.num[[kk]] == l.num[[1]]))
    attr(r1, "num") = l.num[[1]]
    ## What to do with footer?
    attr(r1, "footer") = ""
    ret[[ "table - conditional independence" ]] = r1

    # Test
    design$variables$.tmp = as.numeric(design$variables[,vr] == test)
    ret[[ "test - conditional independence" ]] = .test_numeric(
      design = design, vr = ".tmp", vrby = vrby, lvl0 = lvl0, alpha = alpha
      , p_adjust = p_adjust
      , test_title = glue("Conditional independence test of {.getvarname(design, vr)} = '{test}' "
                          , "across all pairs of {.getvarname(design, vrby)}")
    )
  } else if (is.factor(design$variables[,vr]) && type_test != "cit") {
    for (ii in lvl0) {
      d1 = design[which(design$variables[,vrby] == ii),]
      if(inherits(d1, "svyrep.design")) {
        d1$prob = 1 / d1$pweights
      }

      attr(d1$variables[,vr], "label") = paste0(
        .getvarname(design, vr), " ("
        , .getvarname(design, vrby), " = ", ii
        , ")")
      ret[[ii]] = .tab_factor(design = d1
                        , vr = vr
                        , drop_na = drop_na
                        , max_levels = max_levels)
    }

    if (type_test == "assoc") {
      frm = as.formula(paste0("~ `", vr, "` + `", vrby, "`"))
      fo = svychisq(frm, design
        , statistic = getOption("surveytable.svychisq_statistic", default = "F"))
      rT = data.frame(`Test statistic` = fo$statistic
        , `Degrees of freedom 1` = fo$parameter[1]
        , `Degrees of freedom 2` = fo$parameter[2]
        , `p-value` = fo$p.value
        , check.names = FALSE)
      test_name = fo$method
      test_title = paste0("Association between "
                          , .getvarname(design, vr)
                          , " and ", .getvarname(design, vrby))
      # do_pairs = if(test_pairs == "no") {
      #   FALSE
      # } else if(test_pairs == "yes") {
      #   TRUE
      # } else {
      #   rT$`p-value` <= alpha
      # }

      ret[[ test_name ]] = .test_table(rT = rT
             , test_name = test_name, test_title = test_title, alpha = alpha)

      ###
      for (ii in lvl0 ) {
        d1 = design[which(design$variables[,vrby] == ii),]
        if(inherits(d1, "svyrep.design")) {
          d1$prob = 1 / d1$pweights
        }

        attr(d1$variables[,vr], "label") = paste0(
          .getvarname(design, vr), " ("
          , .getvarname(design, vrby), " = ", ii
          , ")")
        ret[[paste0(ii, " - test")]] = .test_factor(design = d1
                                                    , vr = vr
                                                    , drop_na = drop_na
                                                    , alpha = alpha
                                                    , p_adjust = p_adjust)
      }

      for (jj in levels(design$variables[,vr]) ) {
        d1 = design[which(design$variables[,vr] == jj),]
        if(inherits(d1, "svyrep.design")) {
          d1$prob = 1 / d1$pweights
        }

        attr(d1$variables[,vrby], "label") = paste0(
          .getvarname(design, vrby), " ("
          , .getvarname(design, vr), " = ", jj
          , ")")
        ret[[paste0(jj, " - test")]] = .test_factor(design = d1
                                                    , vr = vrby
                                                    , drop_na = drop_na
                                                    , alpha = alpha
                                                    , p_adjust = p_adjust)
      }
    }
  } else if (is.numeric(design$variables[,vr])) {
    rA = NULL
    for (ii in lvl0) {
      d1 = design[which(design$variables[,vrby] == ii),]
      r1 = .tab_numeric_1(design = d1, vr = vr)
      rA %<>% rbind(r1)
    }
    df1 = data.frame(Level = lvl0)
    assert_that(nrow(df1) == nrow(rA))
    rA = cbind(df1, rA)
    attr(rA, "title") = paste0(.getvarname(design, vr)
         , " (for different levels of "
         , .getvarname(design, vrby), ")")
    ret[["Means"]] = .finalize_tab(rA)

    if (type_test == "assoc") {
      frm = as.formula(paste0("`", vr, "` ~ `", vrby, "`"))
      model1 = svyglm(frm, design)
      fo = regTermTest(model1, vrby, method = "Wald")
      rT = data.frame(`Test statistic` = fo$Ftest
        , `Degrees of freedom 1` = fo$df
        , `Degrees of freedom 2` = fo$ddf
        , `p-value` = fo$p, check.names = FALSE)
      # survey:::print.regTermTest
      test_name = "Wald test"
      test_title = paste0("Association between "
                          , .getvarname(design, vr)
                          , " and ", .getvarname(design, vrby))

      # do_pairs = if(test_pairs == "no") {
      #   FALSE
      # } else if(test_pairs == "yes") {
      #   TRUE
      # } else {
      #   rT$`p-value` <= alpha
      # }

      ret[[ test_name ]] = .test_table(rT = rT
             , test_name = test_name, test_title = test_title, alpha = alpha)
    # }
    # if (test && do_pairs) {
      ret[[ "t-test" ]] = .test_numeric(
        design = design, vr = vr, vrby = vrby, lvl0 = lvl0, alpha = alpha
        , p_adjust = p_adjust
        , test_title = glue("Comparison of {.getvarname(design, vr)} across all possible "
                            , "pairs of {.getvarname(design, vrby)}")
      )
    }
  } else {
    stop("How did we get here?")
  }

  class(ret) = "surveytable_list"
  if (length(ret) == 1L) ret[[1]] else ret
}

Any scripts or data that you put into this service are public.

surveytable documentation built on Aug. 26, 2025, 1:07 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

surveytable
Streamlining Complex Survey Estimation and Reliability Assessment in R

R/tab_subset.R
In surveytable: Streamlining Complex Survey Estimation and Reliability Assessment in R

Defines functions tab_subset

Documented in tab_subset

Try the surveytable package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

surveytable Streamlining Complex Survey Estimation and Reliability Assessment in R

R/tab_subset.R In surveytable: Streamlining Complex Survey Estimation and Reliability Assessment in R

Defines functions tab_subset

Documented in tab_subset

Try the surveytable package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

surveytable
Streamlining Complex Survey Estimation and Reliability Assessment in R

R/tab_subset.R
In surveytable: Streamlining Complex Survey Estimation and Reliability Assessment in R