R/wilcoxon.signedrank.R
In ANSM5: Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition

Documented in wilcoxon.signedrank

#' Perform Wilcoxon signed-rank test
#'
#' @description
#' `wilcoxon.signedrank()` performs the Wilcoxon signed-rank test and is used in chapters 3, 4 and 5 of "Applied Nonparametric Statistical Methods" (5th edition)
#'
#' @param x Numeric vector
#' @param H0 Null hypothesis value (defaults to `NULL`)
#' @param alternative Type of alternative hypothesis (defaults to `two.sided`)
#' @param cont.corr Boolean indicating whether or not to use continuity correction (defaults to `TRUE`)
#' @param CI.width Confidence interval width (defaults to `0.95`)
#' @param max.exact.cases Maximum number of cases allowed for exact calculations (defaults to `1000`)
#' @param do.asymp Boolean indicating whether or not to perform asymptotic calculations (defaults to `FALSE`)
#' @param do.exact Boolean indicating whether or not to perform exact calculations (defaults to `TRUE`)
#' @param do.CI Boolean indicating whether or not to perform confidence interval calculations (defaults to `TRUE`)
#' @returns An ANSMtest object with the results from applying the function
#' @examples
#' # Example 3.4 from "Applied Nonparametric Statistical Methods" (5th edition)
#' wilcoxon.signedrank(ch3$heartrates1, 70, "greater")
#'
#' # Exercise 5.12 from "Applied Nonparametric Statistical Methods" (5th edition)
#' wilcoxon.signedrank(ch5$kHz0.125 - ch5$kHz0.25, 0)
#'
#' @importFrom stats complete.cases pnorm qnorm
#' @export
wilcoxon.signedrank <-
  function(x, H0 = NULL, alternative=c("two.sided", "less", "greater"),
           cont.corr = TRUE, CI.width = 0.95, max.exact.cases = 1000,
           do.asymp = FALSE, do.exact = TRUE, do.CI = TRUE) {
    stopifnot(is.vector(x), is.numeric(x),
              ((is.numeric(H0) && length(H0) == 1) | is.null(H0)),
              is.numeric(max.exact.cases), length(max.exact.cases) == 1,
              is.logical(cont.corr) == TRUE, is.numeric(CI.width),
              length(CI.width) == 1, CI.width > 0, CI.width < 1,
              is.logical(do.asymp) == TRUE, is.logical(do.exact) == TRUE,
              is.logical(do.CI) == TRUE)
    alternative <- match.arg(alternative)

    #labels
    varname1 <- deparse(substitute(x))

    #default outputs
    varname2 <- NULL
    pval <- NULL
    pval.stat <- NULL
    pval.note <- NULL
    pval.asymp <- NULL
    pval.asymp.stat <- NULL
    pval.asymp.note <- NULL
    pval.exact <- NULL
    pval.exact.stat <- NULL
    pval.exact.note <- NULL
    pval.mc <- NULL
    pval.mc.stat <- NULL
    nsims.mc <- NULL
    pval.mc.note <- NULL
    actualCIwidth.exact <- NULL
    CI.exact.lower <- NULL
    CI.exact.upper <- NULL
    CI.exact.note <- NULL
    CI.asymp.lower <- NULL
    CI.asymp.upper <- NULL
    CI.asymp.note <- NULL
    CI.mc.lower <- NULL
    CI.mc.upper <- NULL
    CI.mc.note <- NULL
    test.note <- NULL

    #prepare
    x <- x[complete.cases(x)] #remove missing cases
    x <- round(x, -floor(log10(sqrt(.Machine$double.eps)))) #handle floating point issues
    n <- length(x)
    multiplier.test <- NULL
    multiplier.ci <- NULL

    #prepare for tests
    if (!is.null(H0)){
      s.test <- rank(abs(x - H0), ties.method = "average")
      s.test[x == H0] <- 0 #make ranks equal to zero for values equal to H0
      multiplier.test = 2 - all(s.test == round(s.test,0)) # multiplier of 2 if average ranks, otherwise multiplier of 1
      if (!is.null(H0)){
        ranksumplus <- sum(s.test[sign(x - H0) == 1]) * multiplier.test
        ranksumminus <- sum(s.test[sign(x - H0) == -1]) * multiplier.test
      }
    }

    #prepare for CIs
    if (do.CI){
      s.ci <- rank(abs(x), ties.method = "average")
      multiplier.ci = 2 - all(s.ci == round(s.ci,0)) # multiplier of 2 if average ranks, otherwise multiplier of 1
      Walsh.averages <- NULL
      for (i in 1:length(x)){
        for (j in i:length(x)){
          Walsh.averages <- c(Walsh.averages, (x[i] + x[j]) / 2)
        }
      }
    }

    #give asymptotic output if exact not possible
    if (do.exact && n > max.exact.cases){
      do.asymp <- TRUE
    }

    #exact p-value
    if (!is.null(H0) && do.exact && n <= max.exact.cases){
      permfrom.test <- s.test * multiplier.test
      permfrom.test <- sort(permfrom.test)
      permsums.test <- rep(0, sum(permfrom.test) + 1)
      permsums.test[1] <- 1
      current.max <- 0
      for (i in 1:length(permfrom.test)){
        permsumsnow.test <- permsums.test
        to.add <- permfrom.test[i]
        j <- 1:(current.max + 1)
        k <- (j - 1) + to.add + 1
        gtzero <- permsumsnow.test[j] > 0
        permsums.test[k][gtzero] <- permsums.test[k][gtzero] + permsumsnow.test[j][gtzero]
        current.max <- current.max + to.add
      }
      permsums.test <- data.frame(0:(length(permsums.test) - 1), permsums.test)

      pval.exact.stat <- paste0(ranksumminus / multiplier.test,
                                " (sum of negative ranks), ",
                                ranksumplus / multiplier.test,
                                " (sum of positive ranks)")
      pval.exact.less <-
        sum(permsums.test[permsums.test[, 1] >= ranksumminus, 2]) / sum(permsums.test[, 2])
      pval.exact.greater <-
        sum(permsums.test[permsums.test[, 1] >= ranksumplus, 2]) / sum(permsums.test[, 2])
      if (alternative=="two.sided"){
        pval.exact <- min(1, min(pval.exact.less, pval.exact.greater) * 2)
      }else if (alternative == "less"){
        pval.exact <- pval.exact.less
      }else if (alternative == "greater"){
        pval.exact <- pval.exact.greater
      }
    }

    #exact CI
    if (do.exact && do.CI && n <= max.exact.cases){
      permfrom.ci <- s.ci * multiplier.ci
      permfrom.ci <- sort(permfrom.ci)
      permsums.ci <- rep(0, sum(permfrom.ci) + 1)
      permsums.ci[1] <- 1
      current.max <- 0
      for (i in 1:length(permfrom.ci)){
        permsumsnow.ci <- permsums.ci
        to.add <- permfrom.ci[i]
        j <- 1:(current.max + 1)
        k <- (j - 1) + to.add + 1
        gtzero <- permsumsnow.ci[j] > 0
        permsums.ci[k][gtzero] <- permsums.ci[k][gtzero] + permsumsnow.ci[j][gtzero]
        current.max <- current.max + to.add
      }
      permsums.ci <- data.frame(0:(length(permsums.ci) - 1), permsums.ci)

      i <- 0
      denomsum <- sum(permsums.ci[, 2])
      repeat{
        pval.tmp <-
          sum(permsums.ci[permsums.ci[, 1] <= i, 2]) / denomsum * 2
        if (pval.tmp > (1 - CI.width)) {break}
        i <- i + multiplier.ci
      }
      CI.exact.lower <- sort(Walsh.averages, decreasing = FALSE)[i / multiplier.ci]
      CI.exact.upper <- sort(Walsh.averages, decreasing = TRUE)[i / multiplier.ci]
      actualCIwidth.exact <- 1 - sum(permsums.ci[permsums.ci[, 1] <= i - multiplier.ci, 2]) /
        denomsum * 2
      if (is.null(CI.exact.lower) | is.null(CI.exact.upper) | is.null(actualCIwidth.exact)){
        actualCIwidth.exact <- NULL
        CI.exact.lower <- NULL
        CI.exact.upper <- NULL
      }
    }

    #asymptotic p-value (with/without continuity correction)
    if (!is.null(H0) && do.asymp){
      s.test.2 <- min(ranksumminus, ranksumplus) / multiplier.test
      #with ties
      if (multiplier.test == 2){
        pval.asymp.stat <- (s.test.2 - 0.5 * sum(abs(s.test))) / (0.5 * sqrt(sum(s.test ** 2)))
        pval.asymp.less <-
          pnorm((ranksumminus / 2 - 0.5 * sum(abs(s.test))) / (0.5 * sqrt(sum(s.test ** 2))),
                lower.tail = FALSE)
        pval.asymp.greater <-
          pnorm((ranksumplus / 2 - 0.5 * sum(abs(s.test))) / (0.5 * sqrt(sum(s.test ** 2))),
                lower.tail = FALSE)
      }else{
        #without ties
        pval.asymp.stat <- ((s.test.2 + 0.5 * (cont.corr == TRUE)) - n * (n + 1) / 4) /
          sqrt(n * (n + 1) * (2 * n + 1) / 24)
        pval.asymp.less <-
          pnorm(((ranksumminus + (0.5 - (ranksumminus > ranksumplus)) *
                    (cont.corr == TRUE)) - n * (n + 1) / 4) /
                  sqrt(n * (n + 1) * (2 * n + 1) / 24), lower.tail = FALSE)
        pval.asymp.greater <-
          pnorm(((ranksumplus + (0.5 - (ranksumminus < ranksumplus)) *
                    (cont.corr == TRUE)) - n * (n + 1) / 4) /
                  sqrt(n * (n + 1) * (2 * n + 1) / 24), lower.tail = FALSE)
      }
      if (alternative=="two.sided"){
        pval.asymp <- min(1, min(pval.asymp.less, pval.asymp.greater) * 2)
      }else if (alternative == "less"){
        pval.asymp <- pval.asymp.less
      }else if (alternative == "greater"){
        pval.asymp <- pval.asymp.greater
      }
      if (n < 20){
        pval.asymp.note <- paste("(WARNING: n is less than 20 so asymptotic",
                                 "test is not recommended)")
      }
    }

    #asymptotic CI
    if (do.asymp && do.CI){
      if (n <= max.exact.cases){
        if (multiplier.ci == 2){ #recalculate if multiplier of 2 used for exact test
          permfrom.ci <- 1:n
          permsums.ci <- rep(0,sum(permfrom.ci) + 1)
          permsums.ci[1] <- 1
          current.max <- 0
          for (i in 1:length(permfrom.ci)){
            permsumsnow.ci <- permsums.ci
            to.add <- permfrom.ci[i]
            j <- 1:(current.max + 1)
            k <- (j - 1) + to.add + 1
            gtzero <- permsumsnow.ci[j] > 0
            permsums.ci[k][gtzero] <- permsums.ci[k][gtzero] + permsumsnow.ci[j][gtzero]
            current.max <- current.max + to.add
          }
          permsums.ci <- data.frame(0:(length(permsums.ci) - 1), permsums.ci)
        }
        meanstat <- sum(permsums.ci[, 1] * permsums.ci[, 2]) / sum(permsums.ci[, 2])
        sdstat <- sqrt(sum(permsums.ci[, 1] ^ 2 * permsums.ci[, 2]) / sum(permsums.ci[, 2]) - meanstat ^ 2)
        S <- qnorm((1 - CI.width) / 2) * sdstat + meanstat
        CI.asymp.lower <- sort(Walsh.averages, decreasing = FALSE)[round(S, 0) + 1]
        CI.asymp.upper <- sort(Walsh.averages, decreasing = FALSE)[length(Walsh.averages) - round(S, 0)]
        if (n < 20){
          CI.asymp.note <- paste("(WARNING: n is less than 20 so asymptotic",
                                 "CI is not recommended)")
        }
      }else{
        CI.asymp.note <- paste0("NOTE: Number of useful cases greater than current ",
                                "maximum allowed for exact\ncalculations required ",
                                "for statistics for asymptotic confidence interval\n",
                                "(max.exact.cases = ",
                                sprintf("%1.0f", max.exact.cases), ")")
      }
    }

    #check if message needed
    if (!do.asymp && !do.exact) {
      test.note <- paste("Neither exact nor asymptotic test/confidence interval ",
                         "requested")
    }else if (n > max.exact.cases) {
      affected <- NULL
      if (do.exact && do.CI && !is.null(H0)){
        affected <- "exact test and confidence interval"
      }else if (do.exact && do.CI && is.null(H0)) {
        affected <- "exact confidence interval"
      }else if (do.exact) {
        affected <- "exact test"
      }
      if (!is.null(affected)){
        test.note <- paste0("NOTE: Number of useful cases greater than current ",
                            "maximum allowed for exact\ncalculations required ",
                            "for ", affected, " (max.exact.cases = ",
                            sprintf("%1.0f", max.exact.cases), ")")
      }
    }

    if (multiplier.test == 2 && do.asymp && !is.null(H0)){
      if (!is.null(test.note)){
        test.note <- paste0(test.note, "\n")
      }
      if (cont.corr){
        cont.corr <- FALSE #set to FALSE so passed appropriately to print
        test.note <- paste0(test.note, "NOTE: Ties exist in data so mid-ranks ",
                            "used for asymptotic test and the\ncontinuity ",
                            "correction has not been used")
      }else{
        test.note <- paste0(test.note, "NOTE: Ties exist in data so mid-ranks ",
                            "used for asymptotic test")
      }
    }
    if (multiplier.ci == 2 && do.asymp && do.CI){
      if (!is.null(test.note)){
        test.note <- paste0(test.note, "\n")
      }
      if (cont.corr){
        cont.corr <- FALSE #set to FALSE so passed appropriately to print
        test.note <- paste0(test.note, "NOTE: Ties exist in data so mid-ranks ",
                            "used for asymptotic confidence interval\nand the ",
                            "continuity correction has not been used")
      }else{
        test.note <- paste0(test.note, "NOTE: Ties exist in data so mid-ranks ",
                            "used for asymptotic confidence interval")
      }
    }
    if (is.null(H0) && (do.exact | do.asymp)){
      if (!is.null(test.note)){
        test.note <- paste0(test.note, "\n")
      }
      test.note <- paste0(test.note, "NOTE: No value for H0 given so no p-value ",
                          "can be calculated")
    }
    if (!is.null(H0) && (do.exact | do.asymp)){
      if (sum(x == H0) != 0){
        if (!is.null(test.note)){
          test.note <- paste0(test.note, "\n")
        }
        test.note <- paste0(test.note, "NOTE: At least one value is equal to ",
                            "the null hypothesis - method of calculating\nranks ",
                            "which includes values equal to the null hypothesis ",
                            "in the ranking before\nsetting their ",
                            "ranks to zero has been used")
      }
    }

    #return
    result <- list(title = "Wilcoxon signed-rank test", varname1 = varname1,
                   varname2 = varname2, H0 = H0,
                   alternative = alternative, cont.corr = cont.corr, pval = pval,
                   pval.stat = pval.stat, pval.note = pval.note,
                   pval.exact = pval.exact, pval.exact.stat = pval.exact.stat,
                   pval.exact.note = pval.exact.note, targetCIwidth = CI.width,
                   actualCIwidth.exact = actualCIwidth.exact,
                   CI.exact.lower = CI.exact.lower,
                   CI.exact.upper = CI.exact.upper, CI.exact.note = CI.exact.note,
                   pval.asymp = pval.asymp, pval.asymp.stat = pval.asymp.stat,
                   pval.asymp.note = pval.asymp.note,
                   CI.asymp.lower = CI.asymp.lower,
                   CI.asymp.upper = CI.asymp.upper, CI.asymp.note = CI.asymp.note,
                   pval.mc = pval.mc, pval.mc.stat = pval.mc.stat,
                   nsims.mc = nsims.mc, pval.mc.note = pval.mc.note,
                   CI.mc.lower = CI.mc.lower, CI.mc.upper = CI.mc.upper,
                   CI.mc.note = CI.mc.note,
                   test.note = test.note)
    class(result) <- "ANSMtest"
    return(result)
  }
Any scripts or data that you put into this service are public.
ANSM5 documentation built on Sept. 11, 2024, 6:45 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
ANSM5
Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition

R/wilcoxon.signedrank.R
In ANSM5: Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition

Defines functions wilcoxon.signedrank

Documented in wilcoxon.signedrank

Try the ANSM5 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ANSM5 Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition

R/wilcoxon.signedrank.R In ANSM5: Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition

Defines functions wilcoxon.signedrank

Documented in wilcoxon.signedrank

Try the ANSM5 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ANSM5
Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition

R/wilcoxon.signedrank.R
In ANSM5: Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition