#' Robustified Student's t-Test
#'
#' Performs one and two sample robustified t-tests on vectors
#' of data using ranks or Windsorized data.
#'
#' @param x a (non-empty) numeric vector of data values.
#' @param y an optional (non-empty) numeric vector of data values.
#' @param alternative a character string specifying the alternative
#' hypothesis, must be one of "two.sided" (default),
#' "greater" or "less". You can specify just the
#' initial letter.
#' @param mu a number indicating the true value of the mean
#' (or difference in means if you are performing
#' a two sample test).
#' @param paired (Not availadle, included only for consistency with
#' \code{\link{t.test}})
#' @param var.equal a logical variable indicating whether to treat the
#' two variances as being equal. If TRUE then the pooled
#' variance is used to estimate the variance otherwise
#' the Welch (or Satterthwaite) approximation to the
#' degrees of freedom is used.
#' @param conf.level confidence level of the interval.
#' @param method Possible values are "none" for standard t-test,
#' "windsorized" for t-test on Windsorized data,
#' "ranks" for t-test on ranked data.
#' @param trim used only when \code{method = "windsorized"};
#' the fraction (0 to 0.5) of observations to be
#' trimmed from each end of x. Values of trim
#' outside that range are replaced with the
#' nearest endpoint.
#'
#' @seealso \code{\link{t.test}}, \code{\link{windsorize}}
#'
#' @references
#' Keselman, H. J., Algina, J., Lix, L. M., Wilcox, R. R.,
#' & Deering, K. (2008). A generally robust approach for
#' testing hypotheses and setting confidence intervals for
#' effect sizes. Psychological Methods, 13, 110–129.
#'
#' @references
#' Winter, J.C.F. (2013). Using the Student’s t-test
#' with extremely small sample sizes. Practical Assessment,
#' Research and Evaluation, 18:10.
#'
#' @importFrom stats quantile var pt qt setNames
#' @export
robust.t.test <- function(x, y = NULL,
alternative = c("two.sided", "less", "greater"),
mu = 0, paired = FALSE, var.equal = FALSE,
conf.level = 0.95,
method = c("windsorized", "ranks", "none"),
trim = 0.2, ...) {
trim <- trim/2
stopifnot(trim >= 0 && trim < 0.5)
if (is.null(y) && method == "ranks") {
warning("One-sample t-test on the ranks is not available, using standard one-sample t-test.")
method <- "none"
}
if (paired) {
warning("Robistified paired t-test is not available, using standard paired t-test.")
method <- "none"
}
alternative <- match.arg(alternative)
method <- match.arg(method)
xnam <- deparse(substitute(x))
if (paired) {
x <- x - y
y <- NULL
}
if (!is.null(y)) {
ynam <- deparse(substitute(y))
if (method == "ranks") {
nx <- length(x)
ny <- length(y)
ranks <- rank(c(x,y), ties.method = "average")
x <- ranks[1:nx]
y <- ranks[(nx+1):(nx+ny)]
}
if (method == "windsorized") {
Ny <- length(y)
limy <- quantile(y, probs=c(trim, 1-trim))
ny <- length(y[y >= limy[1] & y <= limy[2]])
y[y < limy[1]] <- limy[1]
y[y > limy[2]] <- limy[2]
my <- mean(y)
vy <- (var(y)*(Ny-1))/(ny-1)
} else {
ny <- length(y)
my <- mean(y)
vy <- var(y)
}
}
if (method == "windsorized") {
Nx <- length(x)
limx <- quantile(x, probs=c(trim, 1-trim))
nx <- length(x[x >= limx[1] & x <= limx[2]])
x[x < limx[1]] <- limx[1]
x[x > limx[2]] <- limx[2]
mx <- mean(x)
vx <- (var(x)*(Nx-1))/(nx-1)
} else {
nx <- length(x)
mx <- mean(x)
vx <- var(x)
}
if (is.null(y)) {
df <- nx-1
stderr <- sqrt(vx/nx)
tstat <- (mx - mu) / stderr
method <- if (paired)
"Paired t-test"
else "Robustified One Sample t-test"
data.name <- xnam
estimate <- setNames(mx, "mean of x")
} else {
estimate <- c(mx, my)
names(estimate) <- c("mean of x", "mean of y")
if (var.equal) {
df <- nx + ny - 2
v <- 0
if (nx > 1)
v <- v + (nx - 1) * vx
if (ny > 1)
v <- v + (ny - 1) * vy
v <- v/df
stderr <- sqrt(v * (1/nx + 1/ny))
} else {
stderrx <- sqrt(vx/nx)
stderry <- sqrt(vy/ny)
stderr <- sqrt(stderrx^2 + stderry^2)
df <- stderr^4/(stderrx^4/(nx - 1) + stderry^4/(ny - 1))
}
tstat <- (mx - my - mu)/stderr
method <- if (var.equal)
"Robustified Two Sample t-test"
else "Robustified Welch Two Sample t-test"
names(mu) <- "difference in means"
data.name <- paste0(xnam, " and ", ynam)
}
if (alternative == "less") {
pval <- pt(tstat, df)
cint <- c(-Inf, tstat + qt(conf.level, df))
}
else if (alternative == "greater") {
pval <- pt(tstat, df, lower.tail = FALSE)
cint <- c(tstat - qt(conf.level, df), Inf)
}
else {
pval <- 2 * pt(-abs(tstat), df)
alpha <- 1 - conf.level
cint <- qt(1 - alpha/2, df)
cint <- tstat + c(-cint, cint)
}
cint <- mu + cint * stderr
names(tstat) <- "t"
names(df) <- "df"
names(mu) <- if (paired || !is.null(y))
"difference in means"
else "mean"
attr(cint, "conf.level") <- conf.level
structure(list(statistic = tstat,
parameter = df,
p.value = pval,
conf.int = cint,
estimate = estimate,
null.value = mu,
alternative = alternative,
method = method,
trim = trim,
data.name = data.name),
class = "htest")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.