R/MeanTest.R In hectorhaffenden/meantest: Mean Hypothesis Test

Defines functions MeanTest

```#' @title Mean Hypothesis Test
#'
#' @description This package runs a hypothesis test for the mean of your data.
#'
#' @param x
#' @param y
#' @param paired
#' @param null
#' @param alternative
#' @param bootstrap
#' @param number.of.permutations
#' @param seed
#'
#' @return Results of hypothesis test for the mean
#'
#' @examples MeanTest(x, y)
#'
#' @export

MeanTest <- function(x, y, paired=FALSE, null=0, alternative="ne", bootstrap = 5000,
number.of.permutations = 5000, seed = NULL) {
# Computes a hypothesis test for the mean of one or two samples of data.
#
# Args:
#   x: A compulsary numeric vector for which the MeanTest takes place upon.
#   y: An optional numeric vector. x and y must have the same length,
#      greater than one, with no missing values.
#   paired: If FALSE, assumes data is independent; if not, dependent.
#      Default is FALSE.
#   null: The null hypothesis you are testing the data against.
#   alternative: Type of test. "ne" for not equal. "l" for less than.
#      "g" for greater than.
#   bootstrap: Number of bootstraps performed / mote-carlo samples generated.
#   number.of.permutations: Number of samples from the permutation distribution
#   seed: Allows reproduction of result. Only use is oneSamplePermutationTest.
#
# Returns:
#   The hypothesis test for the mean of your data.

# Setting 'l', 'g', 'ne' to a more universal form.
if (alternative == "l") {
sided.test = "less"
} else
if (alternative == "g") {
sided.test = "greater"
} else {
sided.test = "two.sided"
}

# Preventing Human error.
if (missing(y) == TRUE & paired == TRUE) {
stop("Single input data cannot be paired")
}

# My version of t.test, does one and two samples.
HomemadeTTest <- function(x, y, null = 0, var.equal = FALSE, alternative = "ne") {

# This is used to condense the code in this function.
results <- function(test.type, alternative, null = 0, test.statistic, p, Degrees.of.freedom){
print(test.type)
print(alternative)
if (null != "FALSE"){ # Had to use "FALSE" here as '0 == FALSE' returns TRUE.
print(null)
}
print("Test statistic")
print(test.statistic)
print("P-value")
print(p)
print("Degrees of freedom:")
print(Degrees.of.freedom)
}
# t.test for one sample.
HomemadeOneTTest <- function(x, null = 0, alternative = "ne") {
mean.x <- mean(x)
standard.devation.x <- sd(x)
length.x <- length(x)
test.statistic <- (mean.x - null) / ((standard.devation.x) / sqrt(length.x))
if (alternative == "less") {
p <- pt(test.statistic, df = length.x - 1)

results(test.type = "One Sample homemade t-test",
alternative = "alternative hypothesis: true mean is less than the null:",
null = null, test.statistic = test.statistic, p, Degrees.of.freedom = length.x - 1)
} else
if (alternative == "greater") {
p <- pt(test.statistic, df = length.x - 1, lower.tail = FALSE) # pt gives the distribution function.

results(test.type = "One Sample homemade t-test",
alternative = "alternative hypothesis: true mean is greater than the null:",
test.statistic = test.statistic,
null = null,
p,
Degrees.of.freedom = length.x - 1)
} else {
p <- 2 * pt(-abs(test.statistic), df = length.x - 1)

results(test.type = "One Sample homemade t-test",
alternative = "alternative hypothesis: true mean is not equal to the null:",
null = null,
test.statistic,
p,
Degrees.of.freedom = length.x - 1)
}
}

if(missing(y) == TRUE) {
HomemadeOneTTest(x, null = null, alternative = sided.test)
} else {
mean.x<-mean(x)
standard.devation.x<-sd(x)
length.x<-length(x)
mean.y <- mean(y)
standard.deviation.y <- sd(y)
length.y <- length(y)
pooled.variance <- (((length.x - 1) * (standard.devation.x^2)) + ((length.y - 1) * (standard.deviation.y^2))) / (length.x + length.y - 2) # Pooled variance(pooled.variance^2). Remember it is already the squared version.
test.statistic.equal.variance <- (mean.x - mean.y) / (sqrt(pooled.variance * ((1 / length.x) + (1 / length.y)))) # Move this into the if bit?
test.statistic.unequal.variance <- (mean.x - mean.y) / (sqrt(((standard.devation.x^2) / length.x) + ((standard.deviation.y^2) / length.y)))

if (var.equal == TRUE){
if (sided.test == "less"){
p <- pt(test.statistic.equal.variance, df = (length.x + length.y - 1))
results(test.type = "Two sample homemade t-test with equal variance.",
alternative = "alternative hypothesis: true mean of sample x is less than the true mean of sample y.",
null = "FALSE",
test.statistic = test.statistic.equal.variance,
p,
Degrees.of.freedom = length.x + length.y - 2)
}else
if (sided.test == "greater"){
p <- pt(test.statistic.equal.variance, df=length.x + length.y -1, lower.tail = FALSE) # pt gives the distribution function.

results(test.type = "Two sample homemade t-test with equal variance.",
alternative = "alternative hypothesis: true mean of sample x is greater than the true mean of sample y.",
null = "FALSE",
test.statistic = test.statistic.equal.variance,
p,
Degrees.of.freedom = length.x + length.y - 2)
}else {
p <- 2 * pt(-abs(test.statistic.equal.variance), df=length.x + length.y -2)

results(test.type = "Two sample homemade t-test with equal variance.",
alternative = "alternative hypothesis: true mean of sample x is not equal to the true mean of sample y.",
null = "FALSE",
test.statistic = test.statistic.equal.variance,
p,
Degrees.of.freedom = length.x + length.y - 2)
}
} else {
variance.x <- var(x)
variance.y <- var(y)
sd.over.root.n.x <- sqrt(variance.x / length.x)
sd.over.root.n.y <- sqrt(variance.y / length.y)
composition.of.sd.over.roots <- sqrt(sd.over.root.n.x^2 + sd.over.root.n.y^2)
df.long <- composition.of.sd.over.roots^4 / (sd.over.root.n.x^4 / (length.x - 1) + sd.over.root.n.y^4 / (length.y - 1))

if (sided.test == "less") {
p <- pt(test.statistic.unequal.variance, df = df.long)

results(test.type = "Two sample homemade t-test with unequal variance.",
alternative = "alternative hypothesis: true mean of sample x is less than the true mean of sample y.",
null = "FALSE",
test.statistic = test.statistic.unequal.variance,
p,
Degrees.of.freedom = df.long)
} else
if (sided.test == "greater") {
p <- pt(test.statistic.unequal.variance, df = df.long, lower.tail = FALSE)

results(test.type = "Two sample homemade t-test with unequal variance.",
alternative = "alternative hypothesis: true mean of sample x is greater than the true mean of sample y.",
null = "FALSE",
test.statistic = test.statistic.unequal.variance,
p,
Degrees.of.freedom = df.long)

} else {
p <- 2 * pt(-abs(test.statistic.unequal.variance), df = df.long)

results(test.type = "Two sample homemade t-test with unequal variance.",
alternative = "alternative hypothesis: true mean of sample x is not equal to the true mean of sample y.",
null = "FALSE",
test.statistic = test.statistic.unequal.variance,
p,
Degrees.of.freedom = df.long)
}

}
}
}
MeanTestOne <- function(x)
{
mean <- mean(x)
if (shapiro.test(x)[2] > 0.05) {
HomemadeTTest(x, null = null, alternative = sided.test)
} else
if (symmetry.test(x, B = bootstrap)[4] > 0.05){
wilcox.test(x, mu = null, alternative = sided.test)
} else {
oneSamplePermutationTest(x, mu = null, alternative = sided.test, n.permutations = number.of.permutations, seed = seed)
}

}
if (missing(y) == TRUE) {
MeanTestOne(x) # If one sample case is inputted.
} else # Dependent
if (paired == TRUE){
d <- (x - y)
print("Differences")
MeanTestOne(d)
} else # Independent
if ((shapiro.test(x)[2] > 0.05) & (shapiro.test(y)[2] > 0.05)){ # Test for normal data.
if (var.test(x ,y)[3] > 0.05){
HomemadeTTest(x, y, var.equal=TRUE, alternative = sided.test)
} else {