R/test_stats.R
In permutest: Run Permutation Tests and Construct Associated Confidence Intervals

Documented in diff_in_means diff_in_medians one_sample_mean one_way_anova_stat ttest_stat

#' Calculate difference in means
#'
#' This function takes a data frame, and group and outcome column names as input
#' and returns the difference in mean outcome between the two groups
#'
#' @param df A data frame
#' @param group_col The name of the column in df that corresponds to the group label
#' @param outcome_col The name of the column in df that corresponds to the outcome variable
#' @param treatment_value The value of group_col to be considered 'treatment'
#' @return The difference in mean outcome between the two groups
#' @examples
#' data <- data.frame(group = c(rep(1, 4), rep(2, 4)),
#'                    outcome = c(rep(3, 4), rep(5, 4)))
#'
#' diff_in_means(df = data,
#'               group_col = "group",
#'               outcome_col = "outcome",
#'               treatment_value = 1)
#'
#' @export
diff_in_means <- function(df, group_col, outcome_col, treatment_value=NULL){
  # get unique groups
  groups <- unique(df[[group_col]])
  # if more or less than 2 groups, throw error
  if(length(groups) != 2){
    stop("Error: dataset must contain exactly 2 unique groups to use this test statistic")
  }
  # check which group to consider 'treatment'
  if(is.null(treatment_value)){
    warning(paste0("No value for parameter treatment_value, using ", groups[1], " as treatment group"))
    treatment_group = groups[1]
    control_group = groups[2]
  } else if (treatment_value == groups[1]) {
    treatment_group = groups[1]
    control_group = groups[2]
  } else if(treatment_value == groups[2]) {
    treatment_group = groups[2]
    control_group = groups[1]
  } else {
    stop("treatment_value is not a valid value contained in the group column")
  }
  # calculate difference in means test statistic
  t <- mean(df[[outcome_col]][df[group_col] == treatment_group]) - mean(df[[outcome_col]][df[group_col] == control_group])
  # return test statistic value
  return(t)
}


#' Calculate difference in medians
#'
#' This function takes a data frame, and group and outcome column names as input
#' and returns the difference in median outcome between the two groups
#'
#' @importFrom stats median
#' @param df A data frame
#' @param group_col The name of the column in df that corresponds to the group label
#' @param outcome_col The name of the column in df that corresponds to the outcome variable
#' @param treatment_value The value of group_col to be considered 'treatment'
#' @return The difference in median outcome between the two groups
#' @examples
#' data <- data.frame(group = c(rep(1, 4), rep(2, 4)),
#'                    outcome = c(rep(3, 4), rep(5, 4)))
#'
#' diff_in_medians(df = data,
#'               group_col = "group",
#'               outcome_col = "outcome",
#'               treatment_value = 1)
#' @export
diff_in_medians <- function(df, group_col, outcome_col, treatment_value=NULL){
  # get unique groups
  groups <- unique(df[[group_col]])
  # if more or less than 2 groups, throw error
  if(length(groups) != 2){
    stop("Error: dataset must contain exactly 2 unique groups to use this test statistic")
  }
  # check which group to consider 'treatment'
  if(is.null(treatment_value)){
    warning(paste0("No value for parameter treatment_value, using ", groups[1], " as treatment group"))
    treatment_group = groups[1]
    control_group = groups[2]
  } else if (treatment_value == groups[1]) {
    treatment_group = groups[1]
    control_group = groups[2]
  } else if(treatment_value == groups[2]) {
    treatment_group = groups[2]
    control_group = groups[1]
  } else {
    stop("treatment_value is not a valid value contained in the group column")
  }
  # calculate difference in means test statistic
  t <- median(df[[outcome_col]][df[group_col] == treatment_group]) - median(df[[outcome_col]][df[group_col] == control_group])
  # return test statistic value
  return(t)
}


#' Calculate t-test statistic
#'
#' This function takes a data frame, and group and outcome column names as input
#' and returns the t test statistic
#'
#' @importFrom stats t.test
#' @param df A data frame
#' @param group_col The name of the column in df that corresponds to the group label
#' @param outcome_col The name of the column in df that corresponds to the outcome variable
#' @return The t test statistic
#' @export
ttest_stat <- function(df, group_col, outcome_col){
  # get unique groups
  groups <- unique(df[[group_col]])
  # if more or less than 2 groups, throw error
  if(length(groups) != 2){
    stop("Error: dataset must contain exactly 2 unique groups to use this test statistic")
  }
  # calculate the t-test statistic
  t <- t.test(df[[outcome_col]][df[group_col] == groups[1]], df[[outcome_col]][df[group_col] == groups[2]])$statistic
  t <- as.numeric(t)
  # return test statistic value
  return(t)
}

#' Calculate one-way anova test statistic
#'
#' This function takes a data frame, and group and outcome column names as input
#' and returns the one-way anova test statistic
#'
#' @param df A data frame
#' @param group_col The name of the column in df that corresponds to the group label
#' @param outcome_col The name of the column in df that corresponds to the outcome variable
#' @return The one-way anova test statistic:
#' \eqn{\sum_{g=1}^G n_g(\overline{X_g} - \overline{X})^2} where \eqn{g} indexes the groups
#' @export
one_way_anova_stat <- function(df, group_col, outcome_col){
  # get unique groups
  groups <- unique(df[[group_col]])
  # calculate overall mean
  overall_mean <- mean(df[[outcome_col]])
  # initialize t
  t <- 0
  for(g in groups){
    x <- df[[outcome_col]][df[group_col] == g]
    n <- length(x)
    t <- t + n * (mean(x) - overall_mean)^2
  }

  return(t)
}

#' Calculate the one-sample problem test statistic
#'
#' This function takes a data frame, and group and outcome column names as input
#' and returns the mean of the product of the outcome and group. This test statistic
#' is used for the one-sample problem.
#'
#' @param df A data frame
#' @param group_col The name of the column in df that corresponds to the group label
#' @param outcome_col The name of the column in df that corresponds to the outcome variable
#' @return The one-sample problem test statistic: the mean of the product of the outcome and group
#' @examples
#' data <- data.frame(group = c(rep(1, 4), rep(2, 4)),
#'                    outcome = c(rep(3, 4), rep(5, 4)))
#'
#' one_sample_mean(df = data,
#'               group_col = "group",
#'               outcome_col = "outcome")
#'
#' @export
one_sample_mean <- function(df, group_col, outcome_col){
  t <- mean(df[[outcome_col]] * df[[group_col]])

  return(t)
}

Any scripts or data that you put into this service are public.

permutest documentation built on Sept. 26, 2024, 5:07 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

permutest
Run Permutation Tests and Construct Associated Confidence Intervals

R/test_stats.R
In permutest: Run Permutation Tests and Construct Associated Confidence Intervals

Defines functions one_sample_mean one_way_anova_stat ttest_stat diff_in_medians diff_in_means

Documented in diff_in_means diff_in_medians one_sample_mean one_way_anova_stat ttest_stat

Try the permutest package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

permutest Run Permutation Tests and Construct Associated Confidence Intervals

R/test_stats.R In permutest: Run Permutation Tests and Construct Associated Confidence Intervals

Defines functions one_sample_mean one_way_anova_stat ttest_stat diff_in_medians diff_in_means

Documented in diff_in_means diff_in_medians one_sample_mean one_way_anova_stat ttest_stat

Try the permutest package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

permutest
Run Permutation Tests and Construct Associated Confidence Intervals

R/test_stats.R
In permutest: Run Permutation Tests and Construct Associated Confidence Intervals