R/MeanTest.R

Defines functions MeanTest

#' @title Mean Hypothesis Test
#'
#' @description This package runs a hypothesis test for the mean of your data.
#'
#' @param x
#' @param y
#' @param paired
#' @param null
#' @param alternative
#' @param bootstrap
#' @param number.of.permutations
#' @param seed
#'
#' @return Results of hypothesis test for the mean
#'
#' @examples MeanTest(x, y)
#'
#' @export

MeanTest <- function(x, y, paired=FALSE, null=0, alternative="ne", bootstrap = 5000,
                     number.of.permutations = 5000, seed = NULL) {
  # Computes a hypothesis test for the mean of one or two samples of data.
  #
  # Args:
  #   x: A compulsary numeric vector for which the MeanTest takes place upon.
  #   y: An optional numeric vector. x and y must have the same length,
  #      greater than one, with no missing values.
  #   paired: If FALSE, assumes data is independent; if not, dependent.
  #      Default is FALSE.
  #   null: The null hypothesis you are testing the data against.
  #   alternative: Type of test. "ne" for not equal. "l" for less than.
  #      "g" for greater than.
  #   bootstrap: Number of bootstraps performed / mote-carlo samples generated.
  #   number.of.permutations: Number of samples from the permutation distribution
  #   seed: Allows reproduction of result. Only use is oneSamplePermutationTest.
  #
  # Returns:
  #   The hypothesis test for the mean of your data.


  # Setting 'l', 'g', 'ne' to a more universal form.
  if (alternative == "l") {
    sided.test = "less"
  } else
    if (alternative == "g") {
      sided.test = "greater"
    } else {
      sided.test = "two.sided"
    }

  # Preventing Human error.
  if (missing(y) == TRUE & paired == TRUE) {
    stop("Single input data cannot be paired")
  }

  # My version of t.test, does one and two samples.
  HomemadeTTest <- function(x, y, null = 0, var.equal = FALSE, alternative = "ne") {

    # This is used to condense the code in this function.
    results <- function(test.type, alternative, null = 0, test.statistic, p, Degrees.of.freedom){
      print(test.type)
      print(alternative)
      if (null != "FALSE"){ # Had to use "FALSE" here as '0 == FALSE' returns TRUE.
        print(null)
      }
      print("Test statistic")
      print(test.statistic)
      print("P-value")
      print(p)
      print("Degrees of freedom:")
      print(Degrees.of.freedom)
    }
    # t.test for one sample.
    HomemadeOneTTest <- function(x, null = 0, alternative = "ne") {
      mean.x <- mean(x)
      standard.devation.x <- sd(x)
      length.x <- length(x)
      test.statistic <- (mean.x - null) / ((standard.devation.x) / sqrt(length.x))
      if (alternative == "less") {
        p <- pt(test.statistic, df = length.x - 1)

        results(test.type = "One Sample homemade t-test",
                alternative = "alternative hypothesis: true mean is less than the null:",
                null = null, test.statistic = test.statistic, p, Degrees.of.freedom = length.x - 1)
      } else
        if (alternative == "greater") {
          p <- pt(test.statistic, df = length.x - 1, lower.tail = FALSE) # pt gives the distribution function.

          results(test.type = "One Sample homemade t-test",
                  alternative = "alternative hypothesis: true mean is greater than the null:",
                  test.statistic = test.statistic,
                  null = null,
                  p,
                  Degrees.of.freedom = length.x - 1)
        } else {
          p <- 2 * pt(-abs(test.statistic), df = length.x - 1)

          results(test.type = "One Sample homemade t-test",
                  alternative = "alternative hypothesis: true mean is not equal to the null:",
                  null = null,
                  test.statistic,
                  p,
                  Degrees.of.freedom = length.x - 1)
        }
    }

    if(missing(y) == TRUE) {
      HomemadeOneTTest(x, null = null, alternative = sided.test)
    } else {
      mean.x<-mean(x)
      standard.devation.x<-sd(x)
      length.x<-length(x)
      mean.y <- mean(y)
      standard.deviation.y <- sd(y)
      length.y <- length(y)
      pooled.variance <- (((length.x - 1) * (standard.devation.x^2)) + ((length.y - 1) * (standard.deviation.y^2))) / (length.x + length.y - 2) # Pooled variance(pooled.variance^2). Remember it is already the squared version.
      test.statistic.equal.variance <- (mean.x - mean.y) / (sqrt(pooled.variance * ((1 / length.x) + (1 / length.y)))) # Move this into the if bit?
      test.statistic.unequal.variance <- (mean.x - mean.y) / (sqrt(((standard.devation.x^2) / length.x) + ((standard.deviation.y^2) / length.y)))

      if (var.equal == TRUE){
        if (sided.test == "less"){
          p <- pt(test.statistic.equal.variance, df = (length.x + length.y - 1))
          results(test.type = "Two sample homemade t-test with equal variance.",
                  alternative = "alternative hypothesis: true mean of sample x is less than the true mean of sample y.",
                  null = "FALSE",
                  test.statistic = test.statistic.equal.variance,
                  p,
                  Degrees.of.freedom = length.x + length.y - 2)
        }else
          if (sided.test == "greater"){
            p <- pt(test.statistic.equal.variance, df=length.x + length.y -1, lower.tail = FALSE) # pt gives the distribution function.

            results(test.type = "Two sample homemade t-test with equal variance.",
                    alternative = "alternative hypothesis: true mean of sample x is greater than the true mean of sample y.",
                    null = "FALSE",
                    test.statistic = test.statistic.equal.variance,
                    p,
                    Degrees.of.freedom = length.x + length.y - 2)
          }else {
            p <- 2 * pt(-abs(test.statistic.equal.variance), df=length.x + length.y -2)

            results(test.type = "Two sample homemade t-test with equal variance.",
                    alternative = "alternative hypothesis: true mean of sample x is not equal to the true mean of sample y.",
                    null = "FALSE",
                    test.statistic = test.statistic.equal.variance,
                    p,
                    Degrees.of.freedom = length.x + length.y - 2)
          }
      } else {
        variance.x <- var(x)
        variance.y <- var(y)
        sd.over.root.n.x <- sqrt(variance.x / length.x)
        sd.over.root.n.y <- sqrt(variance.y / length.y)
        composition.of.sd.over.roots <- sqrt(sd.over.root.n.x^2 + sd.over.root.n.y^2)
        df.long <- composition.of.sd.over.roots^4 / (sd.over.root.n.x^4 / (length.x - 1) + sd.over.root.n.y^4 / (length.y - 1))

        if (sided.test == "less") {
          p <- pt(test.statistic.unequal.variance, df = df.long)

          results(test.type = "Two sample homemade t-test with unequal variance.",
                  alternative = "alternative hypothesis: true mean of sample x is less than the true mean of sample y.",
                  null = "FALSE",
                  test.statistic = test.statistic.unequal.variance,
                  p,
                  Degrees.of.freedom = df.long)
        } else
          if (sided.test == "greater") {
            p <- pt(test.statistic.unequal.variance, df = df.long, lower.tail = FALSE)

            results(test.type = "Two sample homemade t-test with unequal variance.",
                    alternative = "alternative hypothesis: true mean of sample x is greater than the true mean of sample y.",
                    null = "FALSE",
                    test.statistic = test.statistic.unequal.variance,
                    p,
                    Degrees.of.freedom = df.long)

          } else {
            p <- 2 * pt(-abs(test.statistic.unequal.variance), df = df.long)

            results(test.type = "Two sample homemade t-test with unequal variance.",
                    alternative = "alternative hypothesis: true mean of sample x is not equal to the true mean of sample y.",
                    null = "FALSE",
                    test.statistic = test.statistic.unequal.variance,
                    p,
                    Degrees.of.freedom = df.long)
          }

      }
    }
  }
  MeanTestOne <- function(x)
  {
    mean <- mean(x)
    if (shapiro.test(x)[2] > 0.05) {
      HomemadeTTest(x, null = null, alternative = sided.test)
    } else
      if (symmetry.test(x, B = bootstrap)[4] > 0.05){
        wilcox.test(x, mu = null, alternative = sided.test)
      } else {
        oneSamplePermutationTest(x, mu = null, alternative = sided.test, n.permutations = number.of.permutations, seed = seed)
      }

  }
  if (missing(y) == TRUE) {
    MeanTestOne(x) # If one sample case is inputted.
  } else # Dependent
    if (paired == TRUE){
      d <- (x - y)
      print("Differences")
      MeanTestOne(d)
    } else # Independent
      if ((shapiro.test(x)[2] > 0.05) & (shapiro.test(y)[2] > 0.05)){ # Test for normal data.
        if (var.test(x ,y)[3] > 0.05){
          HomemadeTTest(x, y, var.equal=TRUE, alternative = sided.test)
        } else {
          HomemadeTTest(x, y, alternative = sided.test)
        }
      } else
        if (symmetry.test(x, B = bootstrap)[4] > 0.05 & symmetry.test(y, B = bootstrap)[4] > 0.05) {
          wilcox.test(x, y, mu = null, alternative = sided.test)
        } else {
          perm.t.test(x, y, alternative = sided.test, B = number.of.permutations)
        }
}
hectorhaffenden/meantest documentation built on May 12, 2018, 1:21 p.m.