R/robust-t-test.R

Defines functions robust.t.test

Documented in robust.t.test

#' Robustified Student's t-Test
#' 
#' Performs one and two sample robustified t-tests on vectors
#' of data using ranks or Windsorized data.
#' 
#' @param x           a (non-empty) numeric vector of data values.
#' @param y           an optional (non-empty) numeric vector of data values.
#' @param alternative a character string specifying the alternative
#'                    hypothesis, must be one of "two.sided" (default),
#'                    "greater" or "less". You can specify just the
#'                    initial letter.
#' @param mu          a number indicating the true value of the mean
#'                    (or difference in means if you are performing
#'                    a two sample test).
#' @param paired      (Not availadle, included only for consistency with
#'                    \code{\link{t.test}})
#' @param var.equal   a logical variable indicating whether to treat the
#'                    two variances as being equal. If TRUE then the pooled
#'                    variance is used to estimate the variance otherwise
#'                    the Welch (or Satterthwaite) approximation to the
#'                    degrees of freedom is used.
#' @param conf.level  confidence level of the interval.
#' @param method      Possible values are "none" for standard t-test,
#'                    "windsorized" for t-test on Windsorized data,
#'                    "ranks" for t-test on ranked data.
#' @param trim        used only when \code{method = "windsorized"};
#'                    the fraction (0 to 0.5) of observations to be
#'                    trimmed from each end of x. Values of trim
#'                    outside that range are replaced with the
#'                    nearest endpoint.
#' 
#' @seealso \code{\link{t.test}}, \code{\link{windsorize}}
#' 
#' @references 
#' Keselman, H. J., Algina, J., Lix, L. M., Wilcox, R. R.,
#' & Deering, K. (2008). A generally robust approach for
#' testing hypotheses and setting confidence intervals for
#' effect sizes. Psychological Methods, 13, 110–129.
#' 
#' @references 
#' Winter, J.C.F. (2013). Using the Student’s t-test
#' with extremely small sample sizes. Practical Assessment,
#' Research and Evaluation, 18:10.
#' 
#' @importFrom stats quantile var pt qt setNames 
#' @export

robust.t.test <- function(x, y = NULL,
                          alternative = c("two.sided", "less", "greater"),
                          mu = 0, paired = FALSE, var.equal = FALSE,
                          conf.level = 0.95, 
                          method = c("windsorized", "ranks", "none"),
                          trim = 0.2, ...) {
  
  trim <- trim/2
  stopifnot(trim >= 0 && trim < 0.5)
  
  if (is.null(y) && method == "ranks") {
    warning("One-sample t-test on the ranks is not available, using standard one-sample t-test.")
    method <- "none"
  }
  if (paired) {
    warning("Robistified paired t-test is not available, using standard paired t-test.")
    method <- "none"
  }
  
  alternative <- match.arg(alternative)
  method <- match.arg(method)
  
  xnam <- deparse(substitute(x))
  
  if (paired) {
    x <- x - y
    y <- NULL
  }
  
  if (!is.null(y)) {
    ynam <- deparse(substitute(y))
    
    if (method == "ranks") {
      nx <- length(x)
      ny <- length(y)
      ranks <- rank(c(x,y), ties.method = "average")
      x <- ranks[1:nx]
      y <- ranks[(nx+1):(nx+ny)]
    }
    
    if (method == "windsorized") {
      Ny <- length(y)
      limy <- quantile(y, probs=c(trim, 1-trim))
      ny <- length(y[y >= limy[1] & y <= limy[2]])
      y[y < limy[1]] <- limy[1]
      y[y > limy[2]] <- limy[2]
      
      my <- mean(y)
      vy <- (var(y)*(Ny-1))/(ny-1)
    } else {
      ny <- length(y)
      my <- mean(y)
      vy <- var(y)
    }
    
  } 
  
  if (method == "windsorized") {
    Nx <- length(x)
    limx <- quantile(x, probs=c(trim, 1-trim))
    nx <- length(x[x >= limx[1] & x <= limx[2]])
    x[x < limx[1]] <- limx[1]
    x[x > limx[2]] <- limx[2]
    
    mx <- mean(x)
    vx <- (var(x)*(Nx-1))/(nx-1)
  } else {
    nx <- length(x)
    mx <- mean(x)
    vx <- var(x)
  }
  
  if (is.null(y)) {
    
    df <- nx-1
    stderr <- sqrt(vx/nx)
    tstat <- (mx - mu) / stderr
    method <- if (paired) 
      "Paired t-test"
    else "Robustified One Sample t-test"
    data.name <- xnam
    estimate <- setNames(mx, "mean of x")
    
  } else {
    
    estimate <- c(mx, my)
    names(estimate) <- c("mean of x", "mean of y")
    
    if (var.equal) {
      
      df <- nx + ny - 2
      v <- 0
      if (nx > 1) 
        v <- v + (nx - 1) * vx
      if (ny > 1) 
        v <- v + (ny - 1) * vy
      v <- v/df
      stderr <- sqrt(v * (1/nx + 1/ny))
      
    } else {
      
      stderrx <- sqrt(vx/nx)
      stderry <- sqrt(vy/ny)
      stderr <- sqrt(stderrx^2 + stderry^2)
      df <- stderr^4/(stderrx^4/(nx - 1) + stderry^4/(ny - 1))
      
    }
    
    tstat <- (mx - my - mu)/stderr
    method <- if (var.equal)
      "Robustified Two Sample t-test"
    else "Robustified Welch Two Sample t-test"
    names(mu) <- "difference in means"
    
    data.name <- paste0(xnam, " and ", ynam)
  }
  
  if (alternative == "less") {
    pval <- pt(tstat, df)
    cint <- c(-Inf, tstat + qt(conf.level, df))
  }
  else if (alternative == "greater") {
    pval <- pt(tstat, df, lower.tail = FALSE)
    cint <- c(tstat - qt(conf.level, df), Inf)
  }
  else {
    pval <- 2 * pt(-abs(tstat), df)
    alpha <- 1 - conf.level
    cint <- qt(1 - alpha/2, df)
    cint <- tstat + c(-cint, cint)
  }
  cint <- mu + cint * stderr
  
  names(tstat) <- "t"
  names(df) <- "df"
  names(mu) <- if (paired || !is.null(y)) 
    "difference in means"
  else "mean"
  attr(cint, "conf.level") <- conf.level
  
  structure(list(statistic = tstat,
                 parameter = df,
                 p.value = pval,
                 conf.int = cint,
                 estimate = estimate,
                 null.value = mu,
                 alternative = alternative,
                 method = method,
                 trim = trim,
                 data.name = data.name),
            class = "htest")
}
twolodzko/twextras documentation built on May 3, 2019, 1:52 p.m.