R/table-one-long.R

Defines functions fill_in_group_by_columns extract_data_from_cat_table extract_data_from_cont_table add_to_long_table table_one_long CreateTableOne.default CreateTableOne.BLLFlow CreateTableOne

#' Create Table One
#'
#' Creates Table One using the tableone package. If a bllflow object is passed a custom function
#' then extracts the necessary data for tableone call from the object avoiding unnecessary arguments.
#'
#' @param x Object to retrieve variables from
#' @param ... Additional arguments to pass to the tableone::CreateTableOne function call
#' @export
CreateTableOne <- function(x = NULL, ...) {
  UseMethod("CreateTableOne", x)
}

#' Create Table One using bllflow Object
#'
#' Creates table one using the information present in the passed bllflow object
#' specifically uses working data as the data,
#' additional arguments can be passed to create a specific table one.
#' However if no optional args are passed the variable info stored in variables MSW is used.
#'
#' @param x The bllflow object
#' @param vars The optional vars to use in creation of tableone if no vars are passed then vars in MSW variables is used
#' @param strata The optional strata to use in creation of tableone if no strata is passed no strata is used
#' @param factor_vars The optional factor_vars (categorical variables) used in creation of tableone if nothing is passed
#' the MSW variables sheet is used to determine variable types
#' @param select_role The optional argument that specifies the role of the variables to pass to tableone
#' @param ... Additional arguments to pass to the tableone::CreateTableOne function call
#'
#' @return returns a table one \href{https://cran.r-project.org/web/packages/tableone/index.html}{tableone} object
#'
#' @examples
#' library(bllflow)
#' bllflow_object <- build_bllflow(variables = cchsflow::variables,
#' variable_details = cchsflow::variable_details)
#' cchs2001 <- cchsflow::rec_with_table(cchsflow::cchs2001_p, database_name = "cchs2001_p")
#' cchs2003 <- cchsflow::rec_with_table(cchsflow::cchs2003_p, database_name = "cchs2003_p")
#' cchs2005 <- cchsflow::rec_with_table(cchsflow::cchs2005_p, database_name = "cchs2005_p")
#' cchs2007_2008_p <- cchsflow::rec_with_table(cchsflow::cchs2007_2008_p, database_name = "cchs2007_2008_p")
#' cchs2009_2010_p <- cchsflow::rec_with_table(cchsflow::cchs2009_2010_p, database_name = "cchs2009_2010_p")
#' cchs2011_2012_p <- cchsflow::rec_with_table(cchsflow::cchs2011_2012_p, database_name = "cchs2011_2012_p")
#' cchs2013_2014_p <- cchsflow::rec_with_table(cchsflow::cchs2013_2014_p, database_name = "cchs2013_2014_p")
#'
#' combined_data <- dplyr::bind_rows(cchs2001, cchs2003, cchs2005, cchs2007_2008_p, cchs2009_2010_p, cchs2011_2012_p, cchs2013_2014_p)
#'
#' bllflow_object$working_data <- combined_data
#'
#' test_table <- CreateTableOne(bllflow_object)
#'
#' @importFrom tableone CreateTableOne
#' @importFrom sjlabelled get_labels
#' @export
CreateTableOne.BLLFlow <- function(x,
                                   vars = NULL,
                                   strata = NULL,
                                   factor_vars = NULL,
                                   select_role = NULL,
                                   ...) {
  bll_flow_model <- x
  # ----Step 1: pull from variables in bll_flow_model ----
  variables_sheet <-
    bll_flow_model[[pkg.globals$bllFlowContent.Variables]]
  if (is.null(vars)) {
    if (is.null(select_role)) {
      vars <-
        as.character(variables_sheet[[pkg.globals$MSW.Variables.Columns.Variable]])
    } else {
      vars <-
        as.character(variables_sheet[grepl(select_role, variables_sheet[[pkg.globals$argument.Role]]), pkg.globals$MSW.Variables.Columns.Variable])
    }
    vars <- trimws(vars)
  }
  if (is.null(factor_vars)) {
    if (is.null(select_role)) {
      factor_vars <-
        as.character(variables_sheet[is_equal(variables_sheet[[pkg.globals$MSW.Variables.Columns.VariableType]],
                                              pkg.globals$ddiValueName.Categorical), pkg.globals$MSW.Variables.Columns.Variable])
    } else {
      factor_vars <-
        as.character(variables_sheet[is_equal(variables_sheet[[pkg.globals$MSW.Variables.Columns.VariableType]],
                                              pkg.globals$ddiValueName.Categorical) &
                                       grepl(select_role, variables_sheet[[pkg.globals$argument.Role]]), pkg.globals$MSW.Variables.Columns.Variable])
    }
    factor_vars <- trimws(factor_vars)
  }
  dropped_vars <-
    vars[!(vars %in% colnames(bll_flow_model[[pkg.globals$bllFlowContent.WorkingData]]))]
  if (length(dropped_vars) > 0) {
    message(paste(dropped_vars, "were not present in working data"))
  }
  dropped_factor <-
    factor_vars[!(factor_vars %in% colnames(bll_flow_model[[pkg.globals$bllFlowContent.WorkingData]]))]
  if (length(dropped_factor) > 0) {
    message(paste(dropped_factor, "were not present in working data"))
  }
  vars <-
    vars[vars %in% colnames(bll_flow_model[[pkg.globals$bllFlowContent.WorkingData]])]
  factor_vars <-
    factor_vars[factor_vars %in% colnames(bll_flow_model[[pkg.globals$bllFlowContent.WorkingData]])]
  # ----Step 2: Create the tableone ----
  if (is.null(strata)) {
    final_table <-
      tableone::CreateTableOne(data = bll_flow_model[[pkg.globals$bllFlowContent.WorkingData]],
                               vars = vars,
                               factorVars = factor_vars,
                               ...)
  } else {
    final_table <-
      tableone::CreateTableOne(
        data = bll_flow_model[[pkg.globals$bllFlowContent.Data]],
        vars = vars,
        factorVars = factor_vars,
        strata = strata,
        ...
      )
  }
  # Appends valLabels to metadata of tableone for val printing
  valLabels <-
    sjlabelled::get_labels(bll_flow_model[[pkg.globals$bllFlowContent.WorkingData]][vars], values = "n")
  final_table$MetaData[["valLabels"]] <- valLabels
  return(final_table)
}

#' @export
CreateTableOne.default <- function(...) {
  tableone::CreateTableOne(...)
}

#' Table one long
#'
#' Creates a Long table to summarize data from Table one tables.
#'
#' @param table_one the table one object to be converted into a long table
#' @param bll_flow_model The optional bllFlow object containing labels and extra information on the variables
#'
#' @return Returns the long table
#'
#' @examples
#' library(survival)
#' data(pbc)
#' pbc$exp_percentile <- runif(nrow(pbc), 0, 1)
#' pbc$ageGroup <- ifelse(pbc$age < 20, 1,
#' ifelse(pbc$age >= 20 & pbc$age < 40, 2,
#' ifelse(pbc$age >= 40 & pbc$age < 80, 3,
#' ifelse(pbc$age >= 80, 4, NA))))
#'
#'
#' pbc_table_one <- CreateTableOne(data = pbc, strata = "edema")
#' pbc_summary_table<- table_one_long(pbc_table_one)
#' @export
table_one_long <-
  function(table_one, bll_flow_model = NULL) {
    long_table <- data.frame()
    if (is.null(table_one)) {
      warning("No table one was passed to table_one_long",
              call. = FALSE)
    }
    long_table <- data.frame(stringsAsFactors = FALSE)
    long_table[[pkg.globals$LongTable.VariableCategory]] <-
      character()
    long_table[[pkg.globals$LongTable.Variable]] <- character()
    long_table[[pkg.globals$LongTable.Prevalence]] <-  numeric()
    long_table[[pkg.globals$LongTable.Frequency]] <-  numeric()
    long_table[[pkg.globals$LongTable.NMissing]] <-  numeric()
    long_table[[pkg.globals$LongTable.Mean]] <-  numeric()
    long_table[[pkg.globals$LongTable.SD]] <-  numeric()
    long_table[[pkg.globals$LongTable.Percentile25]] <-  numeric()
    long_table[[pkg.globals$LongTable.Percentile75]] <-  numeric()
    
    return_table <-
      add_to_long_table(table_one, long_table, bll_flow_model[[pkg.globals$bllFlowContent.PopulatedVariableDetails]])
    if (!pkg.globals$LongTable.ClassName %in% class(return_table)) {
      class(return_table) <-
        append(class(return_table), pkg.globals$LongTable.ClassName)
    }
    return_table <- unique(return_table)
    return_summary_data <- list(summary_data = return_table)
    class(return_summary_data) <- "SummaryData"
    
    return(return_summary_data)
    
  }

# Function to create a long table one for one tableOne
add_to_long_table <-
  function(passed_table,
           long_table,
           variable_details) {
    # ----Step 1: Populate long table from cont and cat tableone tables ----
    # Call Cont table extraction if tableOne contains ContTable
    returned_long_tables <- list()
    # table_count is used to populate list and avoid list append issues
    table_count <- 0
    if (!is.null(passed_table$ContTable)) {
      dim_names <- attr(passed_table$ContTable, "dimnames")
      strata_values <- clean_strata_values(dim_names)
      table_count <- table_count + 1
      cont_table_long_table <-
        extract_data_from_cont_table(
          passed_table$ContTable,
          attr(
            passed_table$ContTable,
            pkg.globals$tableOne.StrataVarName
          ),
          strata_values,
          long_table,
          variable_details
        )
      returned_long_tables[[table_count]] <- cont_table_long_table
    }
    
    # Call Cat table extraction if tableOne contains CatTable
    if (!is.null(passed_table$CatTable)) {
      dim_names <- attr(passed_table$CatTable, "dimnames")
      strata_values <- clean_strata_values(dim_names)
      table_count <- table_count + 1
      cat_table_long_table <-
        extract_data_from_cat_table(
          passed_table$CatTable,
          attr(
            passed_table$CatTable,
            pkg.globals$tableOne.StrataVarName
          ),
          strata_values,
          long_table,
          variable_details
        )
      returned_long_tables[[table_count]] <- cat_table_long_table
    }
    
    # ----Step 2: Add any missing columns to the newly created tables----
    for (table_to_append in returned_long_tables) {
      for (column_missing in colnames(long_table)) {
        if (!column_missing %in% colnames(table_to_append)) {
          table_to_append[[column_missing]] <- NA
        }
      }
      # synchronizing columns to avoid binding issues
      for (column_missing in colnames(table_to_append)) {
        if (!column_missing %in% colnames(long_table)) {
          # in case of zero row table columns need to be declared in columns <- dataType()
          # Set data type of missing column to type of append table
          if (nrow(long_table) == 0) {
            class(long_table[[column_missing]]) <-
              class(table_to_append[[column_missing]])
          } else {
            long_table[[column_missing]] <- NA
          }
        }
      }
      
      long_table <-
        rbind(long_table, table_to_append,  stringsAsFactors = FALSE)
    }
    
    return(long_table)
  }

# Create long table from contTable
extract_data_from_cont_table <-
  function(cont_table,
           strata_name,
           strata_values,
           long_table,
           variable_details) {
    strata_split_name <- character()
    
    # ----Step 1: Split the strata name into the two variables ----
    if (!is.null(strata_name)) {
      strata_split_name <-
        unlist(strsplit(as.character(strata_name), split = ":"))
    } else{
      strata_split_name <- strata_name
    }
    
    # ----Step 2: Add columns to long table
    long_table_rows <- data.frame()
    
    # loop through each strata columns
    # ----Step 3: Extract information for each new row of the longtable ----
    for (strata_index in 1:length(cont_table)) {
      variables <- (row.names(cont_table[[strata_index]]))
      for (row in 1:nrow(cont_table[[strata_index]])) {
        strata_split_values <-
          unlist(strsplit(as.character(strata_values[[strata_index]]), split = ":"))
        # extract all the information for that row
        num <-
          cont_table[[strata_index]][row, pkg.globals$tableOne.N]
        n_miss <-
          cont_table[[strata_index]][row, pkg.globals$tableOne.Miss]
        row_mean <-
          cont_table[[strata_index]][row, pkg.globals$tableOne.Mean]
        row_SD <-
          cont_table[[strata_index]][row, pkg.globals$tableOne.SD]
        row_percentile25 <-
          cont_table[[strata_index]][row, pkg.globals$tableOne.p25]
        row_percentile75 <-
          cont_table[[strata_index]][row, pkg.globals$tableOne.p75]
        
        # create the row to add to tableOne Long
        group_by_list <- list()
        if (length(strata_split_name) > 0) {
          group_by_list <-
            fill_in_group_by_columns(strata_split_name,
                                     strata_split_values,
                                     group_by_list,
                                     variable_details)
        }
        
        # ----Step 4: Create long table row ----
        long_table_row <- list()
        long_table_row[[pkg.globals$LongTable.VariableCategory]] <-
          NA
        long_table_row[[pkg.globals$LongTable.Variable]] <-
          variables[[row]]
        long_table_row[[pkg.globals$LongTable.Prevalence]] <-  NA
        long_table_row[[pkg.globals$LongTable.Frequency]] <-  num
        long_table_row[[pkg.globals$LongTable.NMissing]] <-  n_miss
        long_table_row[[pkg.globals$LongTable.Mean]] <-  row_mean
        long_table_row[[pkg.globals$LongTable.SD]] <-  row_SD
        long_table_row[[pkg.globals$LongTable.Percentile25]] <-
          row_percentile25
        long_table_row[[pkg.globals$LongTable.Percentile75]] <-
          row_percentile75
        long_table_row <- append(long_table_row, group_by_list)
        
        # ----Step 5: Clean the row
        for (each_element_index in 1:length(long_table_row)) {
          # remove empty classes to avoid bind conflicts
          # example character(0)
          if (length(long_table_row[[each_element_index]]) == 0) {
            long_table_row[[each_element_index]] <- NA
          }
        }
        
        # ----Step 6: Add row to the rest of the rows----
        long_table_rows <-
          rbind(long_table_rows, long_table_row,  stringsAsFactors = FALSE)
      }
    }
    
    return(long_table_rows)
  }

# Create long table from CatTable
extract_data_from_cat_table <-
  function(cat_table,
           strata_name,
           strata_values,
           long_table,
           variable_details) {
    # ----Step 1: Split the strata name into the two variables ----
    variables_checked <- 0
    var_names <- attr(cat_table[[1]], "names")
    strata_split_name <-
      unlist(strsplit(as.character(strata_name), split = ":"))
    # Adds group by columns not found in the long table
    
    # ----Step 2: Add columns to long table
    long_table_rows <- data.frame()
    
    # ----Step 3: Extract information for each new row of the longtable ----
    for (strata_counter in 1:length(cat_table)) {
      strata_split_values <-
        unlist(strsplit(as.character(strata_values[[strata_counter]]), split = ":"))
      # Loop through the tables of each variable
      for (selected_variable_table in cat_table[[strata_counter]]) {
        # Used to specify the variable being writen
        variables_checked <- variables_checked + 1
        
        # Loop through the levels of each variable
        for (row in 1:nrow(selected_variable_table)) {
          n_miss <- selected_variable_table[row, pkg.globals$tableOne.Miss]
          frequency <-
            selected_variable_table[row, pkg.globals$tableOne.Freq]
          lev_name <-
            selected_variable_table[row, pkg.globals$tableOne.Level]
          prevalence <-
            selected_variable_table[row, pkg.globals$tableOne.Percent]
          group_by_list <- list()
          if (length(strata_split_name) > 0) {
            group_by_list <-
              fill_in_group_by_columns(strata_split_name,
                                       strata_split_values,
                                       group_by_list,
                                       variable_details)
            if (!is.null(variable_details)) {
              group_by_list[[pkg.globals$LongTable.VariableCategoryLabel]] <-
                variable_details[is_equal(variable_details[[pkg.globals$argument.VariableStart]], var_names[[variables_checked]]) &
                                   is_equal(variable_details[[pkg.globals$argument.CatStartValue]], as.character(lev_name)), pkg.globals$argument.CatStartLabel]
              # If empty add NA
              if (length(group_by_list[[pkg.globals$LongTable.VariableCategoryLabel]]) == 0) {
                group_by_list[[pkg.globals$LongTable.VariableCategoryLabel]] <- NA
              }
            }
          }

          # ----Step 4: Create long table row ----
          long_table_row <- list()
          long_table_row[[pkg.globals$LongTable.VariableCategory]] <-
            lev_name
          long_table_row[[pkg.globals$LongTable.Variable]] <-
            var_names[variables_checked]
          long_table_row[[pkg.globals$LongTable.Prevalence]] <-
            prevalence
          long_table_row[[pkg.globals$LongTable.Frequency]] <-
            frequency
          long_table_row[[pkg.globals$LongTable.NMissing]] <-
            n_miss
          long_table_row[[pkg.globals$LongTable.Mean]] <-  NA
          long_table_row[[pkg.globals$LongTable.SD]] <-  NA
          long_table_row[[pkg.globals$LongTable.Percentile25]] <-
            NA
          long_table_row[[pkg.globals$LongTable.Percentile75]] <-
            NA
          long_table_row <- append(long_table_row, group_by_list)

          # ----Step 5: Clean the row
          for (each_element_index in 1:length(long_table_row)) {
            if (length(long_table_row[[each_element_index]]) == 0) {
              long_table_row[[each_element_index]] <- NA
            }
          }

          # ----Step 6: Add row to the rest of the rows----
          long_table_rows <-
            rbind(long_table_rows, long_table_row,  stringsAsFactors = FALSE)
        }
      }
      variables_checked <- 0
    }

    return(long_table_rows)
  }

# Fills group by columns with information from variable details
fill_in_group_by_columns <-
  function(strata_split_name,
           strata_split_values,
           long_table_row,
           variable_details = NULL) {
    for (group_by_index in 1:length(strata_split_name)) {
      long_table_row[[paste(pkg.globals$LongTable.GroupBy, group_by_index, sep = "")]] <-
        strata_split_name[[group_by_index]]
      long_table_row[[paste(pkg.globals$LongTable.GroupByValue, group_by_index, sep = "")]] <-
        strata_split_values[[group_by_index]]
      
      if (!is.null(variable_details)) {
        long_table_row[[paste(pkg.globals$LongTable.GroupByLabel, group_by_index, sep = "")]] <-
          variable_details[is_equal(variable_details[[pkg.globals$argument.VariableStart]], strata_split_name[[group_by_index]]) &
                             is_equal(variable_details[[pkg.globals$argument.CatStartValue]], strata_split_values[[group_by_index]]), pkg.globals$argument.VariableStartLabel]
        long_table_row[[paste(pkg.globals$LongTable.GroupByValueLabel,
                              group_by_index,
                              sep = "")]] <-
          variable_details[is_equal(variable_details[[pkg.globals$argument.VariableStart]], strata_split_name[[group_by_index]]) &
                             is_equal(variable_details[[pkg.globals$argument.CatStartValue]], strata_split_values[[group_by_index]]), pkg.globals$argument.CatStartLabel]
        
      }
    }
    
    return(long_table_row)
  }
Big-Life-Lab/bllflow documentation built on Feb. 1, 2023, 12:29 p.m.