R/wilcoxon.mann.whitney.R
In ANSM5: Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition

Documented in wilcoxon.mann.whitney

#' Perform Wilcoxon-Mann-Whitney test
#'
#' @description
#' `wilcoxon.mann.whitney()` performs the Wilcoxon-Mann-Whitney test and is used in chapters 6, 8, 9 and 12 of "Applied Nonparametric Statistical Methods" (5th edition)
#'
#' @param x Numeric vector, or factor with same levels as y
#' @param y Numeric vector, or factor with same levels as x
#' @param H0 Null hypothesis value (defaults to `NULL`)
#' @param alternative Type of alternative hypothesis (defaults to `two.sided`)
#' @param cont.corr Boolean indicating whether or not to use continuity correction (defaults to `TRUE`)
#' @param CI.width Confidence interval width (defaults to `0.95`)
#' @param max.exact.cases Maximum number of cases allowed for exact calculations (defaults to `1000`)
#' @param nsims.mc Number of Monte Carlo simulations to be performed (defaults to `100000`)
#' @param seed Random number seed to be used for Monte Carlo simulations (defaults to `NULL`)
#' @param do.asymp Boolean indicating whether or not to perform asymptotic calculations (defaults to `FALSE`)
#' @param do.exact Boolean indicating whether or not to perform exact calculations (defaults to `TRUE`)
#' @param do.mc Boolean indicating whether or not to perform Monte Carlo calculations (defaults to `FALSE`)
#' @param do.CI Boolean indicating whether or not to perform confidence interval calculations (defaults to `TRUE`)
#' @returns An ANSMtest object with the results from applying the function
#' @examples
#' # Examples 6.1 and 6.2 from "Applied Nonparametric Statistical Methods" (5th edition)
#' wilcoxon.mann.whitney(ch6$groupA, ch6$groupB)
#'
#' # Exercise 12.4 from "Applied Nonparametric Statistical Methods" (5th edition)
#' wilcoxon.mann.whitney(ch12$feedback.satisfaction[ch12$PPI.person.2 == "Representative"],
#'   ch12$feedback.satisfaction[ch12$PPI.person.2 == "Researcher"],
#'   do.exact = FALSE, do.asymp = TRUE)
#'
#' @importFrom stats complete.cases wilcox.test
#' @importFrom utils combn tail
#' @export
wilcoxon.mann.whitney <-
  function(x, y, H0 = NULL, alternative=c("two.sided", "less", "greater"),
           cont.corr = TRUE, CI.width = 0.95, max.exact.cases = 1000,
           nsims.mc = 100000, seed = NULL, do.asymp = FALSE, do.exact = TRUE,
           do.mc = FALSE, do.CI = TRUE) {
    stopifnot((is.vector(x) && is.numeric(x) && is.vector(y) && is.numeric(y)) |
                (is.factor(x) && is.factor(y) && all(levels(x) == levels(y)) &&
                   is.null(H0)),
              ((is.numeric(H0) && length(H0) == 1) | is.null(H0)),
              is.numeric(max.exact.cases), length(max.exact.cases) == 1,
              is.numeric(nsims.mc), length(nsims.mc) == 1,
              is.numeric(seed) | is.null(seed),
              is.logical(cont.corr) == TRUE, CI.width > 0, CI.width < 1,
              is.logical(do.asymp) == TRUE, is.logical(do.exact) == TRUE,
              is.logical(do.CI) == TRUE)
    alternative <- match.arg(alternative)

    #labels
    varname1 <- deparse(substitute(x))
    varname2 <- deparse(substitute(y))

    #default outputs
    pval <- NULL
    pval.stat <- NULL
    pval.note <- NULL
    pval.asymp <- NULL
    pval.asymp.stat <- NULL
    pval.asymp.note <- NULL
    pval.exact <- NULL
    pval.exact.stat <- NULL
    pval.exact.note <- NULL
    pval.mc <- NULL
    pval.mc.stat <- NULL
    pval.mc.note <- NULL
    actualCIwidth.exact <- NULL
    CI.exact.lower <- NULL
    CI.exact.upper <- NULL
    CI.exact.note <- NULL
    CI.asymp.lower <- NULL
    CI.asymp.upper <- NULL
    CI.asymp.note <- NULL
    CI.mc.lower <- NULL
    CI.mc.upper <- NULL
    CI.mc.note <- NULL
    test.note <- NULL

    #prepare
    x <- x[complete.cases(x)] #remove missing cases
    y <- y[complete.cases(y)] #remove missing cases
    n.x <- length(x)
    n.y <- length(y)
    n <- n.x + n.y
    if (is.factor(x) && is.factor(y)){
      x <- as.numeric(x)
      y <- as.numeric(y)
    }else{
      x <- round(x, -floor(log10(sqrt(.Machine$double.eps)))) #handle floating point issues
      y <- round(y, -floor(log10(sqrt(.Machine$double.eps)))) #handle floating point issues
    }
    if (!is.null(H0)) {
      xy <- c(x - H0, y)
      varname1 <- paste0(varname1, " - ", H0)
    }else{
      H0 <- 0
      xy <- round(c(x, y), -floor(log10(sqrt(.Machine$double.eps))))
    }
    s <- rank(xy, ties.method = "average")
    ranksumx <- sum(s[1 : length(x)])
    ranksumy <- sum(s) - ranksumx
    mannwhitneyux <- ranksumx - length(x) * (length(x) + 1) / 2
    mannwhitneyuy <- ranksumy - length(y) * (length(y) + 1) / 2
    stat <- paste0("\n", ranksumx, " (rank sum from ", varname1, "), ",
                   ranksumy, " (rank sum from ", varname2, ")\n", mannwhitneyux,
                   " (Mann-Whitney U from ", varname1, "), ", mannwhitneyuy,
                   " (Mann-Whitney U from ", varname2, ")")

    #give MC output if exact not possible
    if (do.exact && n > max.exact.cases){
      do.mc <- TRUE
    }

    #check for ties
    tiesexist = !all(s == round(s,0)) # TRUE if ties exist

    #exact p-value & CI
    OverflowState <- FALSE
    if (do.exact && tiesexist){
      try_result <- suppressWarnings(try(
        combins <- combn(length(xy), min(length(x), length(y))), silent = TRUE)
        )
      if (any(class(try_result) == "try-error")){
        OverflowState <- TRUE
      }
      if (OverflowState){
        do.mc <- TRUE
      }
    }
    if (do.exact && ((!tiesexist && n <= max.exact.cases) |
                     (tiesexist && !OverflowState))){
      pval.exact.stat <- stat
      if (!tiesexist){
        wilcox.test.output <- wilcox.test(x, y, alternative = alternative,
                                          mu = H0, exact = TRUE,
                                          conf.int = do.CI,
                                          conf.level = CI.width)
        pval.exact <- wilcox.test.output$p.value
        if (do.CI){
          CI.exact.lower <- wilcox.test.output$conf.int[1]
          CI.exact.upper <- wilcox.test.output$conf.int[2]
        }
      }else{
        nfrom <- min(length(x), length(y))
        permfrom <- s * 2
        permfrom <- sort(permfrom)
        permsums <- rep(0,sum(tail(permfrom, nfrom)))
        for (i in 1:dim(combins)[2]){
          tmpsum <- sum(permfrom[combins[,i]])
          permsums[tmpsum] <- permsums[tmpsum] + 1
        }
        ranksum <- ifelse(length(x) < length(y), ranksumx * 2, ranksumy * 2)
        if (length(x) < length(y) && alternative == "less"){
          pval.exact.less <-
            sum(permsums[1:ranksum]) / sum(permsums)
        }else if (length(x) < length(y) && alternative == "greater"){
          pval.exact.greater <-
            sum(permsums[ranksum:length(permsums)]) / sum(permsums)
        }else if (length(x) >= length(y) && alternative == "less"){
          pval.exact.less <-
            sum(permsums[ranksum:length(permsums)]) / sum(permsums)
        }else if (length(x) >= length(y) && alternative == "greater"){
          pval.exact.greater <-
            sum(permsums[1:ranksum]) / sum(permsums)
        }else{
          pval.exact.less <-
            sum(permsums[1:ranksum]) / sum(permsums)
          pval.exact.greater <-
            sum(permsums[ranksum:length(permsums)]) / sum(permsums)
        }
        if (alternative=="two.sided"){
          pval.exact <- min(1, min(pval.exact.less, pval.exact.greater) * 2)
        }else if (alternative == "less"){
          pval.exact <- pval.exact.less
        }else if (alternative == "greater"){
          pval.exact <- pval.exact.greater
        }
        if (do.CI){
          CI.exact.lower <- NULL
          CI.exact.upper <- NULL
        }
      }
    }

    #asymptotic p-value and CI (with/without continuity correction)
    if (do.asymp){
      wilcox.test.output <- wilcox.test(x, y, alternative = alternative,
                                        mu = H0, exact = FALSE,
                                        correct = cont.corr, conf.int = do.CI,
                                        conf.level = CI.width)
      pval.asymp.stat <- stat
      pval.asymp <- wilcox.test.output$p.value
      if (do.CI){
        if (alternative != "two.sided"){
          wilcox.test.output <- wilcox.test(x, y, alternative = "two.sided",
                                            mu = H0, exact = FALSE,
                                            correct = cont.corr, conf.int = TRUE,
                                            conf.level = CI.width)
        }
        CI.asymp.lower <- wilcox.test.output$conf.int[1]
        CI.asymp.upper <- wilcox.test.output$conf.int[2]
      }
    }

    #Monte Carlo p-value
    if(do.mc){
      pval.mc.stat <- stat
      stat.mc <- wilcox.test(x, y, exact = FALSE, correct = cont.corr,
                             conf.int = FALSE)$statistic
      if (!is.null(seed)){set.seed(seed)}
      pval.lt <- 0
      pval.gt <- 0
      for (i in 1:nsims.mc){
        xy.tmp <- sample(n, n, replace = FALSE)
        x.tmp <- xy[xy.tmp[1:n.x]]
        y.tmp <- xy[xy.tmp[(n.x + 1):n]]
        stat.tmp <-
          wilcox.test(x.tmp, y.tmp, exact = FALSE, correct = cont.corr,
                      conf.int = FALSE)$statistic
        if (stat.tmp <= stat.mc){
          pval.lt <- pval.lt + 1 / nsims.mc
        }
        if (stat.tmp >= stat.mc){
          pval.gt <- pval.gt + 1 / nsims.mc
        }
      }
      if (alternative == "two.sided"){
        pval.mc <- min(1, min(pval.lt, pval.gt) * 2)
      }else if (alternative == "less"){
        pval.mc <- pval.lt
      }else if (alternative == "greater"){
        pval.mc <- pval.gt
      }
    }

    #Bootstrap CI
    if(do.CI && (do.mc | (do.exact && is.null(CI.exact.lower)))){
      bs.ci.res <- bs(x = x, y = y, CI.width = CI.width, nsims.bs = nsims.mc,
                      seed = seed)$CI
      CI.mc.lower <- bs.ci.res[1]
      CI.mc.upper <- bs.ci.res[2]
      CI.mc.note <- paste0("Confidence interval for difference (", varname1,
                           " minus ", varname2, ")\nis basic bootstrap interval for the median")
    }

    #check if message needed
    if (!do.asymp && !do.exact) {
      test.note <- paste("Neither exact nor asymptotic test/confidence interval ",
                         "requested")
    }else if (n > max.exact.cases) {
      affected <- NULL
      if (do.exact && do.CI){
        affected <- "exact test and confidence interval"
      }else if (do.exact) {
        affected <- "exact test"
      }
      if (!is.null(affected)){
        test.note <- paste0("NOTE: Number of useful cases greater than current ",
                            "maximum allowed for exact\ncalculations required ",
                            "for ", affected, " (max.exact.cases = ",
                            sprintf("%1.0f", max.exact.cases), ")")
      }
    }
    if (tiesexist && (n > max.exact.cases | OverflowState)){
      if (!is.null(test.note)){
        test.note <- paste0(test.note, "\n")
      }
      test.note <- paste0(test.note, "NOTE: Ties exist in data and sample ",
                          "too large for exact\ncalculations required ",
                          "for exact test")
    }
    if (tiesexist && !OverflowState && do.exact && do.CI){
      if (!is.null(test.note)){
        test.note <- paste0(test.note, "\n")
      }
      if (do.CI){
        test.note <- paste0(test.note, "NOTE: Ties exist in data so exact ",
                            "confidence interval\nnot available")
      }else if (!do.CI){
        test.note <- paste0(test.note, "NOTE: Ties exist in data so mid-ranks ",
                            "used for asymptotic test")
      }
    }
    if (tiesexist && do.asymp){
      if (!is.null(test.note)){
        test.note <- paste0(test.note, "\n")
      }
      if (do.CI){
        test.note <- paste0(test.note, "NOTE: Ties exist in data so mid-ranks ",
                            "used for asymptotic\ntest and confidence interval")
      }else if (!do.CI){
        test.note <- paste0(test.note, "NOTE: Ties exist in data so mid-ranks ",
                            "used for asymptotic test")
      }
    }

    #define hypotheses
    if (alternative == "two.sided"){
      H0 <- paste0("H0: samples are from the same population\n",
                   "H1: samples differ in location\n")
    }else if (alternative == "less"){
      H0 <- paste0("H0: samples are from the same population\n",
                   "H1: location of ", varname1, " is less than location of ",
                   varname2, "\n")
    }else if (alternative == "greater"){
      H0 <- paste0("H0: samples are from the same population\n",
                   "H1: location of ", varname1, " is greater than location of ",
                   varname2, "\n")
    }

    #return
    result <- list(title = "Wilcoxon-Mann-Whitney test", varname1 = varname1,
                   varname2 = varname2, H0 = H0,
                   alternative = alternative, cont.corr = cont.corr, pval = pval,
                   pval.stat = pval.stat, pval.note = pval.note,
                   pval.exact = pval.exact, pval.exact.stat = pval.exact.stat,
                   pval.exact.note = pval.exact.note, targetCIwidth = CI.width,
                   actualCIwidth.exact = actualCIwidth.exact,
                   CI.exact.lower = CI.exact.lower,
                   CI.exact.upper = CI.exact.upper, CI.exact.note = CI.exact.note,
                   pval.asymp = pval.asymp, pval.asymp.stat = pval.asymp.stat,
                   pval.asymp.note = pval.asymp.note,
                   CI.asymp.lower = CI.asymp.lower,
                   CI.asymp.upper = CI.asymp.upper, CI.asymp.note = CI.asymp.note,
                   pval.mc = pval.mc, pval.mc.stat = pval.mc.stat,
                   nsims.mc = nsims.mc, pval.mc.note = pval.mc.note,
                   CI.mc.lower = CI.mc.lower, CI.mc.upper = CI.mc.upper,
                   CI.mc.note = CI.mc.note,
                   test.note = test.note)
    class(result) <- "ANSMtest"
    return(result)
  }

Any scripts or data that you put into this service are public.

ANSM5 documentation built on Sept. 11, 2024, 6:45 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

ANSM5
Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition

R/wilcoxon.mann.whitney.R
In ANSM5: Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition

Defines functions wilcoxon.mann.whitney

Documented in wilcoxon.mann.whitney

Try the ANSM5 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ANSM5 Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition

R/wilcoxon.mann.whitney.R In ANSM5: Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition

Defines functions wilcoxon.mann.whitney

Documented in wilcoxon.mann.whitney

Try the ANSM5 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ANSM5
Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition

R/wilcoxon.mann.whitney.R
In ANSM5: Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition