R/pearson.R
In ANSM5: Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition

Documented in pearson

#' Calculate Pearson correlation
#'
#' @description
#' `pearson()` calculates the Pearson correlation and is used in chapters 10 and 11 of "Applied Nonparametric Statistical Methods" (5th edition)
#'
#' @param x Numeric vector of same length as y
#' @param y Numeric vector of same length as x
#' @param alternative Type of alternative hypothesis (defaults to `two.sided`)
#' @param max.exact.cases Maximum number of cases allowed for exact calculations (defaults to `10`)
#' @param nsims.mc Number of Monte Carlo simulations to be performed (defaults to `100000`)
#' @param seed Random number seed to be used for Monte Carlo simulations (defaults to `NULL`)
#' @param do.asymp Boolean indicating whether or not to perform asymptotic calculations (defaults to `FALSE`)
#' @param do.exact Boolean indicating whether or not to perform exact calculations (defaults to `TRUE`)
#' @param do.mc Boolean indicating whether or not to perform Monte Carlo calculations (defaults to `FALSE`)
#' @returns An ANSMstat object with the results from applying the function
#' @examples
#' # Section 10.1.2 from "Applied Nonparametric Statistical Methods" (5th edition)
#' pearson(ch10$q1, ch10$q2, alternative = "greater", do.asymp = TRUE, do.exact = FALSE)
#'
#' # Example 11.2 from "Applied Nonparametric Statistical Methods" (5th edition)
#' pearson(ch11$parentlimit, ch11$reportedtime - 1 * ch11$parentlimit, alternative = "two.sided")
#'
#' @importFrom stats complete.cases cor pt
#' @export
pearson <-
  function(x, y, alternative = c("two.sided", "less", "greater"),
           max.exact.cases = 10, nsims.mc = 100000, seed = NULL,
           do.asymp = FALSE, do.exact = TRUE, do.mc = FALSE) {
    stopifnot(is.vector(x), is.numeric(x), is.vector(y), is.numeric(y),
              length(x) == length(y),
              is.numeric(max.exact.cases), length(max.exact.cases) == 1,
              is.numeric(nsims.mc), length(nsims.mc) == 1,
              is.numeric(seed) | is.null(seed),
              is.logical(do.asymp) == TRUE, is.logical(do.exact) == TRUE,
              is.logical(do.mc) == TRUE)
    alternative <- match.arg(alternative)

    #labels
    varname1 <- deparse(substitute(x))
    varname2 <- deparse(substitute(y))
    varname3 <- NULL

    #unused arguments
    cont.corr <- NULL
    CI.width <- NULL
    do.CI <- FALSE
    #default outputs
    pval <- NULL
    pval.stat <- NULL
    pval.note <- NULL
    pval.asymp <- NULL
    pval.asymp.stat <- NULL
    pval.asymp.note <- NULL
    pval.exact <- NULL
    pval.exact.stat <- NULL
    pval.exact.note <- NULL
    pval.mc <- NULL
    pval.mc.stat <- NULL
    pval.mc.note <- NULL
    actualCIwidth.exact <- NULL
    CI.exact.lower <- NULL
    CI.exact.upper <- NULL
    CI.exact.note <- NULL
    CI.asymp.lower <- NULL
    CI.asymp.upper <- NULL
    CI.asymp.note <- NULL
    CI.mc.lower <- NULL
    CI.mc.upper <- NULL
    CI.mc.note <- NULL
    CI.sample.lower <- NULL
    CI.sample.upper <- NULL
    CI.sample.note <- NULL
    stat.note <- NULL

    #prepare
    x <- x[complete.cases(x)] #remove missing cases
    y <- y[complete.cases(y)] #remove missing cases
    x <- round(x, -floor(log10(sqrt(.Machine$double.eps)))) #handle floating point issues
    y <- round(y, -floor(log10(sqrt(.Machine$double.eps)))) #handle floating point issues
    n <- length(x)
    stat <- cor(x, y, method = "pearson")
    stat <- round(stat, -floor(log10(sqrt(.Machine$double.eps)))) #handle floating point issues
    statlabel <- "Pearson correlation"

    #give mc output if exact not possible
    if (do.exact && n > max.exact.cases){
      do.mc <- TRUE
    }

    #exact p-value
    if(do.exact && n <= max.exact.cases){
      permutations <- perms(n)
      n.perms <- dim(permutations)[1]
      pval.exact <- 0
      for (i in 1:n.perms){
        cor.tmp <- cor(x[permutations[i,]], y, method = "pearson")
        cor.tmp <- round(cor.tmp, -floor(log10(sqrt(.Machine$double.eps)))) #handle floating point issues
        if (alternative == "two.sided"){
          if (abs(cor.tmp) >= abs(stat)){
            pval.exact <- pval.exact + 1 / n.perms
          }
        }else if (alternative == "less"){
          if (cor.tmp <= stat){
            pval.exact <- pval.exact + 1 / n.perms
          }
        }else if (alternative == "greater"){
          if (cor.tmp >= stat){
            pval.exact <- pval.exact + 1 / n.perms
          }
        }
      }
    }

    #Monte Carlo p-value
    if(do.mc){
      if (!is.null(seed)){set.seed(seed)}
      pval.mc <- 0
      for (i in 1:nsims.mc){
        cor.tmp <- cor(x[sample(n, n, replace = FALSE)], y, method = "pearson")
        if (alternative == "two.sided"){
          if (abs(cor.tmp) >= abs(stat)){
            pval.mc <- pval.mc + 1 / nsims.mc
          }
        }else if (alternative == "less"){
          if (cor.tmp <= stat){
            pval.mc <- pval.mc + 1 / nsims.mc
          }
        }else if (alternative == "greater"){
          if (cor.tmp >= stat){
            pval.mc <- pval.mc + 1 / nsims.mc
          }
        }
      }
    }

    #asymptotic p-value
    if(do.asymp){
      t <- stat * sqrt((n - 2) / (1 - stat ^ 2))
      if (alternative == "two.sided"){
        pval.asymp <- pt(t, n - 2, lower.tail = FALSE) * 2
      }else if (alternative == "less"){
        if (stat > 0){
          pval.asymp <- 1
        }else{
          pval.asymp <- pt(abs(t), n - 2, lower.tail = FALSE)
        }
      }else if (alternative == "greater"){
        if (stat < 0){
          pval.asymp <- 1
        }else{
          pval.asymp <- pt(t, n - 2, lower.tail = FALSE)
        }
      }
    }

    #check if message needed
    if (!do.asymp && !do.exact && !do.mc) {
      stat.note <- paste("Neither exact, Monte Carlo nor asymptotic test",
                         "requested")
    }else if (do.exact && n > max.exact.cases) {
      stat.note <- paste0("NOTE: Number of useful cases greater than current ",
                          "maximum allowed for exact calculations\nrequired for ",
                          "exact test (max.exact.cases = ",
                          sprintf("%1.0f", max.exact.cases), ") so Monte ",
                          "Carlo p-value given")
    }

    #create hypotheses
    H0 <- paste0("H0: Pearson correlation between ", varname1, " and ",
                 varname2, " is 0")
    if (alternative == "two.sided"){
      H0 <- paste0(H0, "\nH1: Pearson correlation between ", varname1, " and ",
                   varname2, " is not 0")
    }else if(alternative == "greater"){
      H0 <- paste0(H0, "\nH1: Pearson correlation between ", varname1, " and ",
                   varname2, " is greater than 0")
    }else{
      H0 <- paste0(H0, "\nH1: Pearson correlation between ", varname1, " and ",
                   varname2, " is less than 0")
    }
    H0 <- paste0(H0, "\n")

    #return
    result <- list(title = "Pearson correlation", varname1 = varname1,
                   varname2 = varname2, varname3 = varname3, stat = stat,
                   statlabel = statlabel, H0 = H0,
                   alternative = alternative, cont.corr = cont.corr, pval = pval,
                   pval.stat = pval.stat, pval.note = pval.note,
                   pval.exact = pval.exact, pval.exact.stat = pval.exact.stat,
                   pval.exact.note = pval.exact.note, targetCIwidth = CI.width,
                   actualCIwidth.exact = actualCIwidth.exact,
                   CI.exact.lower = CI.exact.lower,
                   CI.exact.upper = CI.exact.upper, CI.exact.note = CI.exact.note,
                   pval.asymp = pval.asymp, pval.asymp.stat = pval.asymp.stat,
                   pval.asymp.note = pval.asymp.note,
                   CI.asymp.lower = CI.asymp.lower,
                   CI.asymp.upper = CI.asymp.upper, CI.asymp.note = CI.asymp.note,
                   pval.mc = pval.mc, pval.mc.stat = pval.mc.stat,
                   nsims.mc = nsims.mc, pval.mc.note = pval.mc.note,
                   CI.mc.lower = CI.mc.lower, CI.mc.upper = CI.mc.upper,
                   CI.mc.note = CI.mc.note, CI.sample.lower = CI.sample.lower,
                   CI.sample.upper = CI.sample.upper, CI.sample.note = CI.sample.note,
                   stat.note = stat.note)
    class(result) <- "ANSMstat"
    return(result)
  }

Any scripts or data that you put into this service are public.

ANSM5 documentation built on Sept. 11, 2024, 6:45 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

ANSM5
Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition

R/pearson.R
In ANSM5: Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition

Defines functions pearson

Documented in pearson

Try the ANSM5 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ANSM5 Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition

R/pearson.R In ANSM5: Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition

Defines functions pearson

Documented in pearson

Try the ANSM5 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ANSM5
Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition

R/pearson.R
In ANSM5: Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition