#-------------------------------------- HEADER --------------------------------------------#
#' @title Distributed Student's t-Test
#' @description Computes one and two sample t-tests on vectors of data
#' @details If paired is TRUE then both x and y must be specified and they must be the same length. Missing values are silently
#' removed (in pairs if paired is TRUE). If var.equal is TRUE then the pooled estimate of the variance is used. By default, if
#' var.equal is FALSE then the variance is estimated separately for both groups and the Welch modification to the degrees of freedom is used.
#'
#' @param x a character, the name of a numeric vector of data values.
#' @param y x a character, the name of a optional numeric vector of data values.
#' @param mu a number indicating the true value of the mean (or difference in means if you are performing a two sample test).
#' @param paired a logical indicating whether you want a paired t-test.
#' @param conf.level confidence level of the interval.
#' @param var.equal a logical variable indicating whether to treat the two variances as being equal.
#' If TRUE then the pooled variance is used to estimate the variance otherwise the Welch (or Satterthwaite) approximation to the degrees of freedom is used.
#' @param datasources a list of opal object(s) obtained after login in to opal servers;
#' these objects hold also the data assign to R, as \code{data frame}, from opal datasources.
#'
#' @return A list containing the following components:
#' \item{statistic}{the value of the t-statistic.}
#' \item{parameter}{the degrees of freedom for the t-statistic.}
#' \item{p.value}{the p-value for the test.}
#' \item{conf.int}{a confidence interval for the mean appropriate to the specified alternative hypothesis.}
#' \item{estimate}{the estimated mean or difference in means depending on whether it was a one-sample test or a two-sample test.}
#' \item{null.value}{the specified hypothesized value of the mean or mean difference depending on whether it was a one-sample test or a two-sample test.}
#' \item{method}{a character string indicating what type of t-test was performed.}
#'
#' @author Paula Raissa Costa e Silva
#' @section Dependencies:
#' \code{\link{getLength}}, \code{\link{getVariance}}
#' @export
#' @examples {
#' ds.tTest(x)
#' ds.tTest(x, y)
#' }
#'
ds.tTest <- function(x, y=NULL, mu=0, paired=FALSE, conf.level=0.95, var.equal=FALSE, datasources=NULL) {
#-------------------------------------- BASIC CHECKS ----------------------------------------------#
if(is.null(x)){
stop("Please provide the name of the input vector", call. = FALSE)
}
if (!missing(mu) && (length(mu)!=1 || is.na(mu)))
stop("Please provide a single value as mu")
if (!missing(conf.level) && (length(conf.level) != 1 || !is.finite(conf.level) || conf.level < 0 || conf.level > 1))
stop("Please provide a single number between 0 and 1")
# if no opal login details are provided look for 'opal' objects in the environment
if(is.null(datasources)){
datasources <- findLoginObjects()
}
xnames <- extract(x)
varname <- xnames$elements
obj2lookfor <- xnames$holders
if(is.na(obj2lookfor)){
defined <- isDefined(datasources, varname)
} else {
defined <- isDefined(datasources, obj2lookfor)
}
xLenght <- ds.length(x, datasources=datasources)
xMean <- ds.arMean(x, datasources=datasources)
xVar <- ds.var(x, datasources=datasources)
if (!is.null(y)) {
yLenght <- ds.length(y, datasources=datasources)
if(xLenght < 1 || (!var.equal && xLenght < 2)) stop("not enough 'x' observations")
if(yLenght < 1 || (!var.equal && yLenght < 2)) stop("not enough 'y' observations")
if(var.equal && xLenght+yLenght < 3) stop("not enough observations")
yMean <- ds.arMean(y, datasources=datasources)
yVar <- ds.var(y, datasources=datasources)
method <- paste(if(!var.equal) "Welch", "Two Sample t-test")
estimate <- c(xMean, yMean)
names(estimate) <- c("mean of x", "mean of y")
if(var.equal) {
df <- xLenght + yLenght - 2
variance <- 0
if(xLenght > 1) variance <- variance + (xLenght-1) * xVar
if(yLenght > 1) variance <- variance + (yLenght-1) * yVar
variance <- variance / df
stderr <- sqrt(variance * (1/xLenght + 1/yLenght))
} else {
stderrx <- sqrt(xVar / xLenght)
stderry <- sqrt(yVar / yLenght)
stderr <- sqrt(stderrx^2 + stderry^2)
df <- stderr^4 / (stderrx^4/(xLenght-1) + stderry^4 / (yLenght-1))
}
if(stderr < 10 * .Machine$double.eps * max(abs(xMean), abs(yMean))) stop("data are essentially constant")
tstat <- (xMean - yMean - mu)/stderr
} else {
if(xLenght < 2) stop("not enough 'x' observations")
df <- xLenght - 1
stderr <- sqrt(xVar / xLenght)
if(stderr < 10 * .Machine$double.eps * abs(xMean)) stop("data are essentially constant")
tstat <- (xMean - mu) / stderr
method <- if (paired) "Paired t-test" else "One Sample t-test"
estimate <- setNames(xMean, if(paired) "mean of the differences" else "mean of x")
}
# mean <- ds.arMean(x, type, datasources)
# sem <- ds.sem(x, type, datasources)
#
# t <- (mean - mu) / sem
pval <- 2 * pt(-abs(tstat), df)
alpha <- 1 - conf.level
# Confidence interval
confidence <- qt(1 - alpha/2, df)
confidence <- tstat + c(-confidence, confidence)
confidence <- mu + confidence * stderr
names(tstat) <- "t"
names(df) <- "df"
names(mu) <- if(paired || !is.null(y)) "difference in means" else "mean"
attr(confidence, "conf.level") <- conf.level
t <- list(statistic = tstat, parameter = df, p.value = pval,
conf.int = confidence, estimate = estimate, null.value = mu,
method = method)
return(t)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.