R/permutation.R

Defines functions permutation output summary.permutation plot.permutation

Documented in output permutation plot.permutation summary.permutation

#' Permutation
#'
#' Tool to do a two-sided permutation test on a dataset.
#'
#' Given a dataset where one column specifies the groups and another column specifies the observed values for each group, the function makes a two-sided permuation test. The method to calculate the test statistic used for the test can either be mean, median or a user defined method. Summary, plot or print can be called on the output of class "permutation" and "htest".
#'
#' @param groups The column of the datasat containing the two groups. Has to be factors.
#'
#' @param observations The column of the dataset containing the observations for the two groups. Has to be numeric.
#'
#' @param method One of 3 options: "mean" (Default), "median" or "my_method". If "my_method", the user has to define a function called "my_method(observations)" that calculates the test-statistic, with the only input being the observations from the data grouped by group.
#'
#' @param nPerm Number of permutations. Default is 10^5
#'
#' @return a list of class object "htest" and "permutation".
#'
#' @examples
#'
#' The test data provided in the package, BloodPressure, can be used as follows:
#'
#' permutation(groups = BloodPressure$Group, observations = BloodPressure$Blood_pressure, method = "median", nPerm = 10^4)
#'
#' summary(permutation)
#'
#' plot(permutation)
#'
#' @export
#'

permutation <- function(groups, observations, method = "mean", nPerm = 10^5) {

  data = data.frame("Group" = groups, "Observations" = observations)

  if (method == "mean" ) {
    null.value = c("Difference in mean" = 0)
    fun = mean
    estimate = c("mean_of_group1" = 0, "mean_of_group2" = 0)
  }
  else if (method == "median") {
    null.value = c("Difference in median" = 0)
    fun = median
    estimate = c("median_of_group1" = 0, "median_of_group2" = 0)
  }
  else if (method == "my_method") {
    null.value = c("my_method" = 0)
    fun = my_method
    estimate = c("my_method_of_group1" = 0, "my_method_of_group2" = 0)
  }
  else {
    return(print("Method to calculate test-statistic not known. See description for further help."))
  }

  test_data <- data.frame(Group = unique(data$Group),
                      Val = c(fun(data$Observations[which(data$Group == unique(data$Group)[1])]),
                              fun(data$Observations[which(data$Group == unique(data$Group)[2])])))

  estimate[1] = test_data$Val[1]
  estimate[2] = test_data$Val[2]

  obs_test_stat <- diff(test_data$Val)

  perm_test_values = rep(NA,nPerm)

  for (p in seq(1:nPerm)) {
    perm_data =
      cbind.data.frame(Group = sample(c(data[[1]]), replace = FALSE),
                       Value= c(data[[2]]))

    perm_test_data <- data.frame(Group = unique(perm_data$Group),
                             Val = c(fun(perm_data$Value[which(perm_data$Group == unique(perm_data$Group)[1])]),
                                     fun(perm_data$Value[which(perm_data$Group == unique(perm_data$Group)[2])])))

    perm_test_val <- diff(perm_test_data$Val)

    perm_test_values[p] <- perm_test_val
  }
  
  if (obs_test_stat > 0) {
    p.value = ((sum(perm_test_values >= obs_test_stat))/nPerm) *2 }
  
  if (obs_test_stat < 0) {
    p.value = ((sum(perm_test_values <= obs_test_stat))/nPerm) *2 }

  if (p.value == 0) {
    warn = "Warning" = "nPerm was to low to get any permuted test statistics equal to or more extreme than your observed"
    return(output(data = data,
                  null.value = null.value,
                  estimate = estimate,
                  data.name = "Groups and observations",
                  statistic = c("statistic" = obs_test_stat),
                  perm_statistics = perm_test_values,
                  p.value = p.value,
                  fun = fun,
                  Warnings = warn))
  }
  return(output(data = data,
                null.value = null.value,
                estimate = estimate,
                data.name = "Groups and observations",
                statistic = c("Test statistic" = obs_test_stat),
                perm_statistics = perm_test_values,
                p.value = p.value,
                fun = fun))
}



#' output
#'
#' @return a list of class object "permutation".
#'
#'
#' @export
#'

# Defining class Permutation
output <- function(data = data.frame(),
                   null.value = c(),
                   alternative = "two-sided",
                   method = "Two-sided permutation test",
                   estimate = c(),
                   data.name = "",
                   statistic = c(),
                   parameters = c(),
                   perm_statistics = 0,
                   p.value = 0,
                   fun = "No function specified",
                   Warnings = "None") {

  value <- list(data = data,
                null.value = null.value,
                alternative = alternative,
                method = method,
                estimate = estimate,
                data.name = data.name,
                statistic = statistic,
                parameters = parameters,
                perm_statistics = perm_statistics,
                p.value = p.value,
                Function = fun,
                Warnings = Warnings)

  attr(value,"class") <- c("htest","permutation")
  value
}




#' summary
#'
#' @return a summary of the class object "permutation".
#'
#' @export
#'

# Calling the summary on permutation
summary.permutation <- function(permutation) {
  print(permutation$method)
  print(permutation$estimate)
  print(permutation$statistic)
  print(paste("Permuted p-value:", permutation$p.value))
  if (permutation$Warnings != "None") {
    print(paste("Warning!!",permutation$Warnings))
  }
}




#' plot
#'
#' @return two plots of the class object "permutation".
#'
#' @export
#'

# Calling plot on permutation
plot.permutation <- function(permutation) {
  if (nzchar(system.file(package = "ggplot2")) == TRUE) {

    library(ggplot2)

    plot(ggplot(permutation$data) +
           geom_histogram(aes(x = Observations, fill=Group),
                          alpha = 0.6, bins = 50, color = "white") +
           geom_vline(aes(xintercept=unname(permutation$estimate[1])),
                      size = 0.8, linetype="dashed", color = "red") +
           geom_vline(aes(xintercept=unname(permutation$estimate[2])),
                      size = 0.8, linetype="dashed", color = "blue") +
           annotate(geom="text", x = Inf,y=Inf,
                    label=paste(names(permutation$null.value),"test:",round(permutation$statistic,2)),
                    size = 4,
                    vjust=2, hjust=1) +
           scale_fill_manual(values = c("red", "blue")) +
           theme_bw(base_size = 15) +
           theme(plot.title = element_text(hjust = 0.5)) +
           labs(title = "The observed values",
                y = "Counts"))

    plot(ggplot() +
           geom_histogram(aes(x = permutation$perm_statistics),
                          bins = 50,
                          color = "white",
                          fill = "blue",
                          alpha = 0.6) +
           geom_vline(aes(xintercept = unname(permutation$statistic), color = "Observed\ntest-stat"),
                      size = 0.8, linetype = "dashed") +
           scale_color_manual(values = c("Observed\ntest-stat" = "red")) +
           theme_bw(base_size = 15) +
           theme(plot.title = element_text(hjust = 0.5)) +
           labs(title = "Null distribution of test-statistic",
                x = "Permuted test-statistic",
                y = "Counts"))
  }
  else {
    par(mfrow=c(1,2))
    hist(permutation$data$Observations[permutation$data$Group==unique(permutation$data$Group)[1]],
         main = "Group 1",
         xlab = "Observations",
         col = alpha("red", 0.4))
    abline(v = unname(permutation$estimate[1]),
           col = "red",
           lty = "dashed",
           lwd = 2)
    hist(permutation$data$Observations[permutation$data$Group==unique(permutation$data$Group)[2]],
         main = "Group 2",
         xlab = "Observations",
         col = alpha("blue", 0.4))
    abline(v = unname(permutation$estimate[2]),
           col = "blue",
           lty = "dashed",
           lwd = 2)
    mtext(paste(names(permutation$null.value),"test:",round(permutation$statistic,2)), side = 3, line = -2, outer = TRUE)

    par(mfrow=c(1,1))
    hist(permutation$perm_statistics,
         main = "Null distribution of test-statistic",
         xlab = "Permuted test-statistic",
         col = alpha("blue", 0.4))
    abline(v = unname(permutation$statistic))

  }
}
aumath-advancedr2019/Sampling documentation built on Nov. 26, 2019, 2:08 a.m.