esci2: Effect Sizes and Confidence Intervals

# Takes an overview table and fills in mean CI, df, se, and MoE
# If assume_equal_variance, uses a pooled sd and total df for estimating means
# Otherwise, just uses group sd and df for estimating each group mean
# Uses CI_mdiff_contrast_bs as the base function for calculating CIs
overview.base <- function(
  overview_table,
  conf_level = 0.95,
  assume_equal_variance = FALSE
) {

  over_valid <- overview_table
  # over_valid <- overview_table[overview_table$n > 1, ]
  # 
  
  n_means <- nrow(over_valid)
  contrasts <- matrix(data = 0, nrow = n_means, ncol = n_means)
  diag(contrasts) <- 1
  
  for (x in 1:n_means) {
    res <- CI_mdiff_contrast_bs(
      means = over_valid$m,
      sds = over_valid$s,
      ns = over_valid$n,
      contrast = contrasts[x, ],
      conf_level = conf_level,
      assume_equal_variance = assume_equal_variance
    )
    res$effect_size <- NULL
    over_valid[x, names(res)] <- res
  }
  
  # over_valid <- rbind(
  #   over_valid,
  #   overview_table[overview_table$n < 2, ]
  # )
  
  return(over_valid)
}

# Produces an overview table from summary data
overview.summary <- function(
  means, 
  sds, 
  ns, 
  group_labels = NULL,
  conf_level = 0.95,
  assume_equal_variance = FALSE
) {
  
  # Check group_labels
  if(!is.null(group_labels)) {
    esci_assert_type(group_labels, "is.character")
    row_report <- esci_assert_vector_valid_length(
      group_labels,
      lower = length(means),
      upper = length(means),
      lower_inclusive = TRUE,
      upper_inclusive = TRUE,
      na.rm = FALSE
    )
    names(means) <- group_labels
  } else {
    names(means) <- rep("All", length(means))
  }
  
  overview <- overview.table_definition(rows = length(means))
  overview$group = names(means)
  overview$m = means
  overview$s = sds
  overview$n = ns

  return(
    overview.base(
      overview_table = overview,
      conf_level = conf_level,
      assume_equal_variance = assume_equal_variance
    )
  )
}


# Produces an overview table from vector data
overview.vector <- function(
  grouping_variable = NULL,
  outcome_variable,
  conf_level = 0.95,
  assume_equal_variance = TRUE
) {
  
  if (is.null(grouping_variable)) {
    grouping_variable <- rep("All", length(outcome_variable))
  }
  
  # Deal with NA values in grouping variable------------------------------------
  grouping_variable_report <- esci_assert_vector_valid_length(
    grouping_variable, 
    lower = 2, 
    lower_inclusive = FALSE)
  grouping_variable_all <-grouping_variable_report$total
  grouping_variable_valid <- grouping_variable_report$valid
  grouping_variable_missing <- grouping_variable_report$missing
  
  
  # Calculate overview ---------------------------------------------------------
  groups <- levels(addNA(grouping_variable))
  
  overview <- overview.table_definition(rows = length(groups))
  overview$group = groups
  overview$m <- aggregate(
    outcome_variable,
    by = list(addNA(grouping_variable)),
    FUN = mean,
    drop = FALSE,
    na.rm = TRUE)[, 2]
  overview[ , c("median", "q1", "q3")] <-aggregate(
    outcome_variable,
    by = list(addNA(grouping_variable)),
    FUN = quantile,
    drop = FALSE,
    probs = c(0.50, 0.25, 0.75),
    na.rm = TRUE)[, 2]
  overview$s <- aggregate(
    outcome_variable,
    by = list(addNA(grouping_variable)),
    FUN = sd,
    drop = FALSE,
    na.rm = TRUE)[, 2]
  overview$min <- aggregate(
    outcome_variable,
    by = list(addNA(grouping_variable)),
    FUN = min,
    drop = FALSE,
    na.rm = TRUE)[, 2]
  overview$max <- aggregate(
    outcome_variable,
    by = list(addNA(grouping_variable)),
    FUN = max,
    drop = FALSE,
    na.rm = TRUE)[, 2]
  overview$n <- aggregate(
    outcome_variable,
    by = list(addNA(grouping_variable)),
    drop = FALSE,
    FUN = length)[, 2]
  overview$missing <-aggregate(
    outcome_variable,
    by = list(addNA(grouping_variable)),
    drop = FALSE,
    function(x) { sum(is.na(x)) })[, 2]
  overview$n <- overview$n - overview$missing
  
  if (nrow(overview[is.na(overview$n), ]) > 0) {
    overview[is.na(overview$n), ]$n <- 0
  }
  
  na_level <- NULL
  if(overview[nrow(overview), "n"] == 0) {
    overview <- head(overview, -1)
  } else {
    na_level <- "Missing"
    while (na_level %in% overview$group) {
      na_level <- paste(na_level, "*", sep ="")
    }
    overview[nrow(overview), "group"] <- na_level
    row.names(overview)[nrow(overview)] <- "missing"
  }
  
  overview_no_miss <- overview[row.names(overview) != 'missing', ]
  overview_valid <- overview_no_miss[overview_no_miss$n > 1, ]

  overview_valid <- overview.base(
    overview_table = overview_valid,
    conf_level = conf_level,
    assume_equal_variance = assume_equal_variance
  )
  
  overview_all <- rbind(
    overview_valid,
    overview[row.names(overview) != "missing" & overview$n < 2, ],
    overview[row.names(overview) == "missing", ]
  )
  
  return(overview_all)
  
}

overview.list <- function(
  data,
  grouping_variable = NULL,
  outcome_variable,
  conf_level = 0.95,
  assume_equal_variance = FALSE
  ) {
  
  res <- NULL
  
  # Cycle through the list of columns; 
  #  for each call estimate_mean_one.character, which handles 1 column
  for (outcome in outcome_variable) {
    
    outcome_name <- rlang::as_name(outcome)
    
    # Now pass along to the .vector version of this function
    myres <- overview.vector(
      grouping_variable = data[[grouping_variable]],
      outcome_variable = data[[outcome_name]],
      conf_level = conf_level,
      assume_equal_variance = assume_equal_variance
    )
    
  res <- rbind(res, cbind(outcome_variable_name = outcome_name, myres))
  
  } # Finish cycle through variables

  return(res)
}


# Base definition for the overview table
overview.table_definition <- function(rows = 1) {
  overview <- data.frame(
    group = rep(NA, times = rows),
    m = rep(NA, times = rows),
    lower = rep(NA, times = rows),
    upper = rep(NA, times = rows),
    median = rep(NA, times = rows),
    s = rep(NA, times = rows),
    min = rep(NA, times = rows),
    max = rep(NA, times = rows),
    q1 = rep(NA, times = rows),
    q3 = rep(NA, times = rows),
    n = rep(NA, times = rows),
    missing = rep(NA, times = rows),
    df = rep(NA, times = rows),
    se = rep(NA, times = rows),
    moe = rep(NA, times = rows),
    variability_component = rep(NA, times = rows)
  )
  return(overview)
}