R/tableOne.R

Defines functions keep_missing

#' removes missing values from dataset if keep_missing_flag is true. Otherwise
#' returns unalterd dataset
#' @param dset_in a dataframe containing data for table one
#' @param keep_missing_flag keep_missing If TRUE, only complete cases will be
#' included in the table
#' @return a filtered dataset if keep_missing_flag is true, otherwise the
#' original data.frame
#' @importFrom dplyr %>%
keep_missing <- function(dset_in, keep_missing_flag){
  if (identical(keep_missing_flag, TRUE)){
    print("Missing values will NOT be removed")
    dset_in
  } else {
    print("Missing values WILL be removed")
    dset_in %>% dplyr::filter(complete.cases(.))
  }
}


#' Removes variables if remove_flag is not NULL. Otherwise returns unaltered
#' dataset.
#' @param dset_in a dataframe containing data for table one
#' @param remove_flag character vector of variables to exclude from the table.
#' @return if remove_flag is not null, then a data.frame without the indicated
#' variables. Otherwise, return the original data.frame.
#' @importFrom dplyr %>%
remove_vars <- function(dset_in, remove_flag){
  if (is.null(remove_flag)){
    print("NO columns will be removed")
    dset_in
  } else {
    paste("Removing the following vars:", remove_flag) %>% print()
    vars_to_remove <- paste0("-", remove_flag)
    dplyr::select_(dset_in, .dots = as.list(vars_to_remove))
  }
}


#' Create formatted Table 1
#'
#' @param dataset a dataframe containing data for table one
#' @param strata variable for columns as string
#' @param remove charachter vector of variables to remove
#' @param keep_missing If TRUE, only complete cases will be included in the
#' table
#'
#' @return a matrix forming a formatted table 1.
#' @importFrom dplyr %>%
#' @export
#'
tableOne <- function(dataset, strata, remove = NULL, keep_missing = FALSE){

  # Remove variables that will not be part of the table (Should this be a
  # command line option?).
  dset <- dataset %>%
    remove_vars(remove) %>%
    keep_missing(keep_missing)

  # Get the categorical variables
  cats <- lapply(dset, is.factor) %>%
    Filter(function(x) x, .) %>%
    names()

  # Get the continuous variables
  conts <- lapply(dset, Negate(is.factor)) %>%
    Filter(function(x) x, .) %>%
    names()

  # Create table 1 with tableone library.
  # Remove the strata var from the list of variables to summarize in the table.
  # tableone package doesn't let you automatically add an "Overall" column.
  # Need to create two and then combine
  vars <- names(dset) %>% Filter(function(x) x != strata, .)
  # Create table with strata
  table_strata <-
    tableone::CreateTableOne(data = dset, strata = strata, vars=vars,
                             testExact = chisq.test) %>%
    print()
  # Create "Overall" table (leave strata arg unspecified.
  table_overall <-
    tableone::CreateTableOne(data = dset, vars=vars, testExact = chisq.test) %>%
    print()
  # Combine
  table_raw <- cbind(table_overall, table_strata)

  # Now replace variable names with labels.
  dset_names <- lapply(dset, function(x) attr(x, "label")) %>%
    Filter(Negate(is.null), .)
  rownames_raw <- row.names(table_raw)

  var_to_label <- function(rownames_vec, rowlabels_lst){
    if (length(rowlabels_lst) %>% identical(0L))
      rownames_vec
    else {
      from_var <- paste0("\\b", names(rowlabels_lst)[[1]], "\\b")
      to_label <- rowlabels_lst[[1]]
      rownames_out <- sub(from_var, to_label, rownames_vec)
      var_to_label(rownames_out, rowlabels_lst[-1])
    }
  }

  row.names(table_raw) <- var_to_label(rownames_raw, dset_names)

  # Return
  table_raw

}
FredHutch/sasHelpers documentation built on May 3, 2019, 3:32 p.m.