arsenal: An Arsenal of 'R' Functions for Large-Scale Statistical Summaries

### Test functions #######

notest <- function(x, x.by, ...)
{
  list(p.value = NA_real_, method = "No test")
}

stddiff <- function(x, x.by, ..., test.always = FALSE) {
  tab <- table(is.na(x), x.by)
  cs <- colSums(tab)
  if(!test.always && (any(tab[1, ] == 0) || any(colSums(tab) == 0))) {
    return(list(p.value=NA_real_, method="Standardized difference"))
  }
  if(sum(cs > 0) != 2) stop("Standardized differences require exactly two by-groups", call. = FALSE)
  tab <- tab[, cs > 0]
  trt <- levels(x.by)[cs > 0][2]
  x.by <- +(x.by == trt)
  check_pkg("stddiff")
  if(is.ordered(x) || inherits(x, "Surv") || is.selectall(x)) {
    stop("Can't run standardized differences unless the object is factor-ish or numeric-ish", call. = FALSE)
  } else if(is.factor(x)) {
    if(length(levels(x)) < 2) stop("At least two levels are required to run standardized differences")
    if(length(levels(x)) == 2) {
      s <- stddiff::stddiff.binary(data.frame(x, x.by), 2, 1)
    } else {
      s <- stddiff::stddiff.category(data.frame(x, x.by), 2, 1)
    }
  } else if(is.numeric(x) || inherits(x, "difftime") || is.Date(x)) {
    s <- stddiff::stddiff.numeric(data.frame(x, x.by), 2, 1)
  } else stop("Can't run standardized differences unless the object is categorical (but not ordered), numeric, or date", call. = FALSE)

  list(p.value=s[1, "stddiff"], method="Standardized difference")
}


## continuous tests:
## 1. anova  (parametric)
## consider allowing glm, for now just lm with gaussian errors
## Would like to just use either "anova" or "aov", anova needs lm(group~x first,
## aov does not return p-value. Could add it after.
## For now, just write our own to avoid over-writing anova R-base function
## also, nice to keep same format to call, eval(call(function, x, x,by)), as other tests
anova <- function(x, x.by, ..., test.always = FALSE) {
  tab <- table(is.na(x), x.by)
  if(!test.always && (any(tab[1, ] == 0) || any(colSums(tab) == 0))) {
    return(list(p.value=NA_real_, statistic.F=NA_real_, method="Linear Model ANOVA"))
  }
  aov.out <- stats::lm(x~x.by)
  test <- stats::anova(aov.out)
  list(p.value = test[1,ncol(test)],
       statistic.F = test[1,ncol(test)-1],
       method = "Linear Model ANOVA")
}
## 2. kruskal-wallis (non-parametric)
kwt <- function(x, x.by, ..., test.always = FALSE) {
  tab <- table(is.na(x), x.by)
  if(!test.always && (any(tab[1, ] == 0) || any(colSums(tab) == 0))) {
    return(list(p.value=NA_real_, statistic.F=NA_real_, method="Kruskal-Wallis rank sum test"))
  }
  stats::kruskal.test(x, as.factor(x.by))
}

## 2. wilcoxon (non-parametric)
wt <- function(x, x.by, ..., wilcox.correct = FALSE, wilcox.exact = NULL, test.always = FALSE) {
  tab <- table(is.na(x), x.by)
  if(ncol(tab) != 2) stop("The Wilcoxon Rank Sum test must have exactly two groups")

  if(!test.always && (any(tab[1, ] == 0) || any(colSums(tab) == 0))) {
    return(list(p.value=NA_real_, statistic.F=NA_real_, method="Wilcoxon rank sum test"))
  }
  stats::wilcox.test(x ~ as.factor(x.by), correct = wilcox.correct, exact = wilcox.exact)
}

## median test
medtest <- function(x, x.by, ..., test.always = FALSE) {
  if(!requireNamespace("coin", quietly = TRUE))
  {
    warning("The \"coin\" package is required to run a median test.", call. = FALSE)
    return(notest(x, x.by, ...))
  }

  tab <- table(is.na(x), x.by)
  if(!test.always && (any(tab[1, ] == 0) || any(colSums(tab) == 0))) {
    return(list(p.value=NA_real_, method = "Median test"))
  }
  ## should be taken care of with coin::
  check_pkg("coin")
  mtest <- coin::median_test(x~as.factor(x.by), teststat="quad")
  list(p.value=coin::pvalue(mtest), method="Median test", statistic=mtest@statistic@teststatistic)
}

## two tests for categorical,
## 1. chisq goodness of fit, equal proportions across table cells
chisq <- function(x, x.by, ..., chisq.correct=FALSE, simulate.p.value=FALSE, B=2000, test.always = FALSE) {
  tab <- table(x, x.by, exclude=NA)
  rs <- rowSums(tab)
  cs <- colSums(tab)

  if(!test.always && (any(rs == 0) || any(cs == 0)) && ncol(tab) > 1 && nrow(tab) > 1) {
    return(list(p.value=NA_real_, method="Pearson's Chi-squared test"))
  }
  if(length(cs) > 1) tab <- tab[rs > 0, , drop = FALSE]
  if(length(rs) > 1) tab <- tab[, cs > 0, drop = FALSE]
  suppressWarnings(stats::chisq.test(tab, correct=chisq.correct, simulate.p.value=simulate.p.value, B=B))
}

## 2. Fisher's exact test for prob of as or more extreme table
fe <- function(x, x.by, ..., simulate.p.value=FALSE, B=2000, test.always = FALSE) {
  tab <- table(x, x.by, exclude=NA)
  rs <- rowSums(tab)
  cs <- colSums(tab)

  if((!test.always && (any(rs == 0) || any(cs == 0))) || ncol(tab) == 1 || nrow(tab) == 1) {
    return(list(p.value=NA_real_, method = "Fisher's Exact Test for Count Data"))
  }
  # this already subsets out rows and cols with all 0's
  stats::fisher.test(tab, simulate.p.value=simulate.p.value, B=B)
}

## trend test for ordinal data
trend <- function(x, x.by, ..., test.always = FALSE) {
  if(!requireNamespace("coin", quietly = TRUE))
  {
    warning("The \"coin\" package is required to run a trend test.", call. = FALSE)
    return(notest(x, x.by, ...))
  }

  tab <- table(x, x.by, exclude=NA)
  rs <- rowSums(tab)
  cs <- colSums(tab)

  if(!test.always && (any(rs == 0) || any(cs == 0))) {
    return(list(p.value=NA_real_, method = "Trend test for ordinal variables"))
  }
  ## should be taken care of with coin::
  check_pkg("coin")
  indtest <- coin::independence_test(x~as.factor(x.by), teststat="quad")
  list(p.value=coin::pvalue(indtest), method="Trend test for ordinal variables", statistic=indtest@statistic@teststatistic)
}

## ' logrank
## '
## ' survdiff logrank test
## ' @param x  surv variable
## ' @param x.by  by, categorical variable
## ' @return   test output with $method and $p.value
logrank <- function(x, x.by, ..., test.always = FALSE) {
  tab <- table(is.na(x), x.by, exclude=NA)
  if(!test.always && (any(tab[1, ] == 0) || any(colSums(tab) == 0))) {
    return(list(p.value=NA_real_, method="survdiff logrank"))
  }
  out <- survival::survdiff(x ~ x.by)
  out$p.value <- 1-stats::pchisq(out$chisq, df=sum(tab[1,] != 0)-1)
  out$method <- "survdiff logrank"
  out
}

mayoverse/arsenal documentation built on Aug. 20, 2024, 4:32 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

mayoverse/arsenal
An Arsenal of 'R' Functions for Large-Scale Statistical Summaries

R/tableby.stat.tests.R
In mayoverse/arsenal: An Arsenal of 'R' Functions for Large-Scale Statistical Summaries

Defines functions logrank trend fe chisq medtest wt kwt anova stddiff notest

R Package Documentation

Browse R Packages

We want your feedback!

mayoverse/arsenal An Arsenal of 'R' Functions for Large-Scale Statistical Summaries

R/tableby.stat.tests.R In mayoverse/arsenal: An Arsenal of 'R' Functions for Large-Scale Statistical Summaries

Defines functions logrank trend fe chisq medtest wt kwt anova stddiff notest

R Package Documentation

Browse R Packages

We want your feedback!

mayoverse/arsenal
An Arsenal of 'R' Functions for Large-Scale Statistical Summaries

R/tableby.stat.tests.R
In mayoverse/arsenal: An Arsenal of 'R' Functions for Large-Scale Statistical Summaries