R/function_boot_two_groups.R

Defines functions boot_two_groups

Documented in boot_two_groups

##########################################################################
##########################################################################
##########################################################################
# Function - boot_two_groups
#          - Write function to perform bootstrap resampling for two
#            groups comparisons and calculate differences between
#            groups and CI's across a range of sample sizes
##########################################################################
##########################################################################
##########################################################################

# Define bootstrap sampling function ---------------------------------

#' Bootstrap sampling for difference in means between two groups
#'
#' Calculate difference in mean CT limits between two groups.
#' @param data Data frame contain raw data.
#' @param groups_col Factor. Column containing names of two populations to compare
#' @param n_max Numeric. Maximum sample size to extrapolate simulations.
#' @param n_min Numeric. Minimum sample size to extrapolate simulations. Defaults to 3.
#' @param iter Numeric. Number of bootstrap samples to draw. Defaults to 29.
#' @param response Numeric. Column containing thermal limit data for individual samples.
#' @param group1 String. Name of first population to compare.
#' @param group2 String. Name of second population to compare.
#' @importFrom magrittr %>%
#' @importFrom purrr map
#' @importFrom purrr map2
#' @importFrom tidyr nest
#' @importFrom dplyr filter
#' @importFrom dplyr group_by
#' @importFrom dplyr left_join
#' @importFrom dplyr select
#' @importFrom dplyr mutate
#' @importFrom dplyr summarise
#' @importFrom dplyr sample_n
#' @importFrom stats qt
#' @importFrom stats sd
#' @importFrom tidyr unnest
#' @importFrom tidyr crossing
#' @import rlang
#' @return A data frame of bootstrap resamples
#' @examples
#' head(coreid_data)
#' sims <- boot_two_groups(data = coreid_data,
#'                         groups_col = col,
#'                         response = response,
#'                         group1 = "Catorhintha schaffneri_APM",
#'                         group2 = "Catorhintha schaffneri_NPM",
#'                         n_max = 49,
#'                         iter = 99)
#' @export

# Define function
boot_two_groups <- function(data,
                            groups_col,
                            n_max,
                            n_min = 3,
                            iter = 29,
                            response,
                            group1,
                            group2){

  # Filter which species to plot
  df <- dplyr::filter({{ data }},
                      {{ groups_col }} %in% c( {{group1}}, {{ group2 }} ))

  # Perform boostrap sampling
  boot_data <- df %>%
    dplyr::group_by( {{ groups_col }} ) %>%
    tidyr::nest() %>%
    tidyr::crossing(sample_size = c({{ n_min }} : {{ n_max }}),
                    iter = seq(1: {{ iter }} )) %>%
    # Added sampling with replacement to code below
    dplyr::mutate(sample_data = purrr::map2(data,
                                            sample_size,
                                            ~ dplyr::sample_n(.x, .y,
                                                             # Sample with replacement
                                                             replace = TRUE))) %>%
    dplyr::mutate(calc = purrr::map(sample_data,
                                    ~ dplyr::summarise(.,
                                               mean_val = mean( {{ response }} ),
                                               sd_val = stats::sd(( {{ response }} ))))) %>%
    dplyr::select({{ groups_col }}, sample_size, iter, calc) %>%
    tidyr::unnest(cols = calc)

  # Add CI's to raw bootstrap samples
  boot_data <- boot_data %>%
    # Add standard errors
    dplyr::mutate(std_error = sd_val/sqrt(sample_size)) %>%
    # Now find error
    dplyr::mutate(error = stats::qt(0.975, df = sample_size - 1) * std_error) %>%
    # Calculate lower and upper 95% CI limits
    dplyr::mutate(lower_ci  = mean_val - error,
                  upper_ci  = mean_val + error) %>%
    dplyr::ungroup()

  # Split data from two groups
  sp1_data <- boot_data %>%
    dplyr::filter({{ groups_col }} == {{ group1 }})
  sp2_data <- boot_data %>%
    dplyr::filter({{ groups_col }} == {{ group2 }})

  # Convert data into wide format
  comb_data <- dplyr::left_join(sp1_data,
                                sp2_data,
                                by = c("sample_size", "iter"))

  # Let's keep only the columns we need, as things are about to get real
  comb_data <- comb_data %>%
    dplyr::select(sample_size,
                  iter,
                  col.x,
                  mean_val.x,
                  sd_val.x,
                  col.y,
                  mean_val.y,
                  sd_val.y)

  # Add student t CI's
  comb_data <- comb_data %>%
    dplyr::mutate(mean_diff    = mean_val.x - mean_val.y,
           first_term   = (sample_size - 1) * sd_val.x^2,
           second_term  = (sample_size - 1) * sd_val.y^2,
           pooled_df    = (sample_size * 2) - 2,
           s_pooled     = sqrt((first_term + second_term) / pooled_df),
           se_diff      = s_pooled * sqrt((1 / sample_size) + (1 / sample_size)),
           error        = stats::qt(0.975, df = pooled_df) * se_diff,
           lower_ci     = mean_diff - error,
           upper_ci     = mean_diff + error)

  # Calculate summary statistics
  comb_data_sum <- comb_data %>%
    dplyr::group_by(sample_size) %>%
    dplyr::summarise(mean_low_ci = mean(lower_ci),
              mean_upp_ci    = mean(upper_ci),
              mean_diff      = mean(mean_diff),
              width_ci       = mean_upp_ci - mean_low_ci,
              sd_width       = stats::sd(upper_ci - lower_ci),
              sd_width_lower = width_ci - sd_width,
              sd_width_upper = width_ci + sd_width)

  return(comb_data_sum)

}

##########################################################################
##########################################################################
##########################################################################
guysutton/ThermalSampleR documentation built on Sept. 8, 2020, 12:07 a.m.