R/oddsratio.2x2diff.R
In ANSM5: Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition

Documented in oddsratio.2x2diff

#' Perform test for difference in odds ratios
#'
#' @description
#' `oddsratio.2x2diff()` performs the test for difference in odds ratios and is used in chapter 13 of "Applied Nonparametric Statistical Methods" (5th edition)
#'
#' @param x Binary factor of same length as y, z
#' @param y Binary factor of same length as x, z
#' @param z Binary factor of same length as x, y
#' @param alternative Type of alternative hypothesis (defaults to `two.sided`)
#' @param CI.width Confidence interval width (defaults to `0.95`)
#' @param max.exact.perms Maximum number of permutations allowed for exact calculations (defaults to `1000000`)
#' @param nsims.mc Number of Monte Carlo simulations to be performed (defaults to `100000`)
#' @param seed Random number seed to be used for Monte Carlo simulations (defaults to `NULL`)
#' @param do.exact Boolean indicating whether or not to perform exact calculations (defaults to `TRUE`)
#' @param do.asymp Boolean indicating whether or not to perform asymptotic calculations (defaults to `FALSE`)
#' @param do.mc Boolean indicating whether or not to perform Monte Carlo calculations (defaults to `FALSE`)
#' @param do.CI Boolean indicating whether or not to perform confidence interval calculations (defaults to `TRUE`)
#' @returns An ANSMtest object with the results from applying the function
#' @examples
#' # Example 13.2 from "Applied Nonparametric Statistical Methods" (5th edition)
#' oddsratio.2x2diff(ch13$physical.activity, ch13$tv.viewing, ch13$gender,
#'   do.exact = FALSE, do.asymp = TRUE)
#' oddsratio.2x2diff(ch13$physical.activity, ch13$tv.viewing, ch13$gender,
#'   do.exact = FALSE, do.mc = TRUE, seed = 1, nsims = 10000)
#'
#' @importFrom stats complete.cases r2dtable quantile pnorm qnorm
#' @export
oddsratio.2x2diff <-
  function(x, y, z, alternative = c("two.sided", "less", "greater"),
           CI.width = 0.95, max.exact.perms = 1000000, nsims.mc = 100000,
           seed = NULL, do.exact = TRUE, do.asymp = FALSE, do.mc = FALSE,
           do.CI = TRUE) {
    stopifnot(is.factor(x), is.factor(y), is.factor(z),
              nlevels(x) == 2, nlevels(y) == 2, nlevels(z) == 2,
              length(x) == length(y), length(y) == length(z),
              length(CI.width) == 1, CI.width > 0, CI.width < 1,
              is.numeric(max.exact.perms), length(max.exact.perms) == 1,
              is.numeric(nsims.mc), length(nsims.mc) == 1,
              is.numeric(seed) | is.null(seed),
              length(seed) == 1 | is.null(seed),
              is.logical(do.exact) == TRUE, is.logical(do.asymp) == TRUE,
              is.logical(do.mc) == TRUE, is.logical(do.CI) == TRUE)
    alternative <- match.arg(alternative)

    #labels
    varname1 <- deparse(substitute(x))
    varname2 <- deparse(substitute(y))
    varname3 <- deparse(substitute(z))

    #unused arguments
    H0 <- NULL
    cont.corr <- NULL
    #default outputs
    pval <- NULL
    pval.stat <- NULL
    pval.note <- NULL
    pval.asymp <- NULL
    pval.asymp.stat <- NULL
    pval.asymp.note <- NULL
    pval.exact <- NULL
    pval.exact.stat <- NULL
    pval.exact.note <- NULL
    pval.mc <- NULL
    pval.mc.stat <- NULL
    pval.mc.note <- NULL
    actualCIwidth.exact <- NULL
    CI.exact.lower <- NULL
    CI.exact.upper <- NULL
    CI.exact.note <- NULL
    CI.asymp.lower <- NULL
    CI.asymp.upper <- NULL
    CI.asymp.note <- NULL
    CI.mc.lower <- NULL
    CI.mc.upper <- NULL
    CI.mc.note <- NULL
    test.note <- NULL

    #prepare
    complete.cases.id <- complete.cases(x, y, z)
    x <- x[complete.cases.id] #remove missing cases
    y <- y[complete.cases.id] #remove missing cases
    z <- z[complete.cases.id] #remove missing cases
    x <- droplevels(x)
    y <- droplevels(y)
    z <- droplevels(z)
    tab.z1 <- table(x[z == levels(z)[1]], y[z == levels(z)[1]])
    rtot.z1 <- rowSums(tab.z1)
    ctot.z1 <- colSums(tab.z1)
    if (min(tab.z1) == 0){
      phi.star.1 <- log(((tab.z1[1, 1] + 0.5) * (tab.z1[2, 2] + 0.5)) /
                          ((tab.z1[1, 2] + 0.5) * (tab.z1[2, 1] + 0.5)))
    }else{
      phi.star.1 <- log((tab.z1[1, 1] * tab.z1[2, 2]) /
                          (tab.z1[1, 2] * tab.z1[2, 1]))
    }
    var.phi.star.1 <- sum(1 / tab.z1)
    tab.z2 <- table(x[z == levels(z)[2]], y[z == levels(z)[2]])
    rtot.z2 <- rowSums(tab.z2)
    ctot.z2 <- colSums(tab.z2)
    if (min(tab.z1) == 0){
      phi.star.2 <- log(((tab.z2[1, 1] + 0.5) * (tab.z2[2, 2] + 0.5)) /
                          ((tab.z2[1, 2] + 0.5) * (tab.z2[2, 1] + 0.5)))
    }else{
      phi.star.2 <- log((tab.z2[1, 1] * tab.z2[2, 2]) /
                          (tab.z2[1, 2] * tab.z2[2, 1]))
    }
    var.phi.star.2 <- sum(1 / tab.z2)
    stat <- phi.star.1 - phi.star.2

    #give mc output if exact not possible
    if (do.exact){
      max.z1perm.pos <- max(tab.z1) + 1
      for (i in 1:2){
        for (j in 1:2){
          if (tab.z1[i, j] == min(tab.z1)){
            if (min(rtot.z1[i], ctot.z1[j]) < max.z1perm.pos){
              max.z1perm.pos <- min(rtot.z1[i], ctot.z1[j])
              z1perm.row <- i
              z1perm.col <- j
            }
          }
        }
      }
      max.z2perm.pos <- max(tab.z2) + 1
      for (i in 1:2){
        for (j in 1:2){
          if (tab.z2[i, j] == min(tab.z2)){
            if (min(rtot.z2[i], ctot.z2[j]) < max.z2perm.pos){
              max.z2perm.pos <- min(rtot.z2[i], ctot.z2[j])
              z2perm.row <- i
              z2perm.col <- j
            }
          }
        }
      }
      n.perms <- (max.z1perm.pos + 1) * (max.z2perm.pos + 1)
      if (n.perms > max.exact.perms){
        do.mc <- TRUE
      }
    }else{
      n.perms <- 0
    }

    #exact p-value
    if(do.exact && n.perms <= max.exact.perms){
      pval.exact.stat <- stat
      pval.exact <- 0
      stat.tmp <- rep(NA, n.perms)
      prob.tmp <- rep(NA, n.perms)
      counter <- 1
      tab.tmp1 <- matrix(c(0, 0, 0, 0), nrow = 2, ncol = 2)
      tab.tmp2 <- matrix(c(0, 0, 0, 0), nrow = 2, ncol = 2)
      #loop for z1 table
      for (z1perm.pos in 0:max.z1perm.pos){
        #create z1 table
        tab.tmp1[z1perm.row, z1perm.col] <- z1perm.pos
        tab.tmp1[z1perm.row, 3 - z1perm.col] <-
          rtot.z1[z1perm.row] - tab.tmp1[z1perm.row, z1perm.col]
        tab.tmp1[3 - z1perm.row, z1perm.col] <-
          ctot.z1[z1perm.col] - tab.tmp1[z1perm.row, z1perm.col]
        tab.tmp1[3 - z1perm.row, 3 - z1perm.col] <-
          rtot.z1[3- z1perm.col] - tab.tmp1[3 - z1perm.row, z1perm.col]
        #calculate table probability
        dhyper.tmp1 <- dhyper(x = tab.tmp1[1, 1], m = ctot.z1[1],
                              n = ctot.z1[2], k = rtot.z1[1])
        #calculate phi
        if (min(tab.tmp1) == 0){
          tab.tmp1 <- tab.tmp1 + 0.5
        }
        phi.star.tmp1 <- log((tab.tmp1[1, 1] * tab.tmp1[2, 2]) /
                               (tab.tmp1[1, 2] * tab.tmp1[2, 1]))
        #loop for z2 table
        for (z2perm.pos in 0:max.z2perm.pos){
          #create z2 table
          tab.tmp2[z2perm.row, z2perm.col] <- z2perm.pos
          tab.tmp2[z2perm.row, 3 - z2perm.col] <-
            rtot.z2[z2perm.row] - tab.tmp2[z2perm.row, z2perm.col]
          tab.tmp2[3 - z2perm.row, z2perm.col] <-
            ctot.z2[z2perm.col] - tab.tmp2[z2perm.row, z2perm.col]
          tab.tmp2[3 - z2perm.row, 3 - z2perm.col] <-
            rtot.z2[3- z2perm.col] - tab.tmp2[3 - z2perm.row, z2perm.col]
          #calculate table probability
          dhyper.tmp2 <- dhyper(x = tab.tmp2[1, 1], m = ctot.z2[1],
                                n = ctot.z2[2], k = rtot.z2[1])
          #calculate phi
          if (min(tab.tmp2) == 0){
            tab.tmp2 <- tab.tmp2 + 0.5
          }
          phi.star.tmp2 <- log((tab.tmp2[1, 1] * tab.tmp2[2, 2]) /
                                 (tab.tmp2[1, 2] * tab.tmp2[2, 1]))
          #compare with observed statistic
          stat.tmp[counter] <- phi.star.tmp1 - phi.star.tmp2
          prob.tmp[counter] <- dhyper.tmp1 * dhyper.tmp2
          if (alternative == "less" && stat.tmp[counter] <= pval.exact.stat){
            pval.exact <- pval.exact + prob.tmp[counter]
          }else if (alternative == "greater" && stat.tmp[counter] >= pval.exact.stat){
            pval.exact <- pval.exact + prob.tmp[counter]
          }else if (alternative == "two.sided" &&
                    abs(stat.tmp[counter]) >= abs(pval.exact.stat)){
            pval.exact <- pval.exact + prob.tmp[counter]
          }
          counter <- counter + 1
        }
      }
      if (do.CI){
        prob.tmp2 <- cumsum(prob.tmp[order(stat.tmp)])
        stat.tmp2 <- sort(stat.tmp)
        i <- 1
        repeat{
          if (prob.tmp2[i] <= (1 - CI.width) / 2 &&
              prob.tmp2[i + 1] > (1 - CI.width) / 2) {
            CI.exact.lower <- stat.tmp2[i]
            break
          }
          i <- i + 1
        }
        prob.tmp2 <- cumsum(prob.tmp[order(stat.tmp, decreasing = TRUE)])
        stat.tmp2 <- sort(stat.tmp, decreasing = TRUE)
        i <- 1
        repeat{
          if (prob.tmp2[i] <= (1 - CI.width) / 2 &&
              prob.tmp2[i + 1] > (1 - CI.width) / 2) {
            CI.exact.upper <- stat.tmp2[i]
            break
          }
          i <- i + 1
        }
      }
    }

    #Monte Carlo p-value
    if (do.mc){
      pval.mc.stat <- stat
      if (!is.null(seed)){set.seed(seed)}
      pval.mc <- 0
      stat.tmp <- rep(NA, nsims.mc)
      for (i in 1:nsims.mc){
        #simulate tables
        tab.tmp1 <- r2dtable(1, rtot.z1, ctot.z1)[[1]]
        tab.tmp2 <- r2dtable(1, rtot.z2, ctot.z2)[[1]]
        #create z1 stat
        phi.star.tmp1 <- log((tab.tmp1[1, 1] * tab.tmp1[2, 2]) /
                               (tab.tmp1[1, 2] * tab.tmp1[2, 1]))
        #create z2 stat
        phi.star.tmp2 <- log((tab.tmp2[1, 1] * tab.tmp2[2, 2]) /
                               (tab.tmp2[1, 2] * tab.tmp2[2, 1]))
        #compare with observed statistic
        stat.tmp[i] <- phi.star.tmp1 - phi.star.tmp2
        if (alternative == "less" && stat.tmp[i] <= pval.mc.stat){
          pval.mc <- pval.mc + 1 / nsims.mc
        }else if (alternative == "greater" && stat.tmp[i] >= pval.mc.stat){
          pval.mc <- pval.mc + 1 / nsims.mc
        }else if (alternative == "two.sided" &&
                  abs(stat.tmp[i]) >= abs(pval.mc.stat)){
          pval.mc <- pval.mc + 1 / nsims.mc
        }
      }
      if (do.CI){
        CI.mc.lower <- quantile(stat.tmp, (1 - CI.width) / 2, na.rm = TRUE)[[1]]
        CI.mc.upper <- quantile(stat.tmp, 1 - (1 - CI.width) / 2, na.rm = TRUE)[[1]]
      }
    }

    #asymptotic p-value
    if (do.asymp){
      pval.asymp.stat <- (phi.star.1 - phi.star.2) /
        sqrt(var.phi.star.1 + var.phi.star.2)
      if (alternative == "less"){
        pval.asymp <- pnorm(pval.asymp.stat, lower.tail = TRUE)
      }else if (alternative == "greater"){
        pval.asymp <- pnorm(pval.asymp.stat, lower.tail = FALSE)
      }else if (alternative == "two.sided"){
        pval.asymp <- pnorm(abs(pval.asymp.stat), lower.tail = FALSE) * 2
      }
      #confidence intervals
      if (do.CI){
        CI.asymp.lower <- phi.star.1 - phi.star.2 -
          qnorm((1 - CI.width) / 2, lower.tail = FALSE) *
          sqrt(var.phi.star.1 + var.phi.star.2)
        CI.asymp.upper <- phi.star.1 - phi.star.2 +
          qnorm((1 - CI.width) / 2, lower.tail = FALSE) *
          sqrt(var.phi.star.1 + var.phi.star.2)
      }
    }

    #check if message needed
    if (!do.mc && !do.asymp) {
      test.note <- paste("Neither Asymptotic nor Monte Carlo test requested")
    }

    #define hypotheses
    if (alternative == "two.sided"){
      H0 <- paste0("H0: Odds ratio for ", varname1, " by ", varname2,
                   " is the same for level '", levels(z)[1], "' and level '",
                   levels(z)[2], "' of ", varname3, "\n",
                   "H1: Odds ratio for ", varname1, " by ", varname2,
                   " is not the same for level '", levels(z)[1], "' and level '",
                   levels(z)[2], "' of ", varname3, "\n")
    }else if (alternative == "less"){
      H0 <- paste0("H0: Odds ratio for ", varname1, " by ", varname2,
                   " is the same for level '", levels(z)[1], "' and level '",
                   levels(z)[2], "' of ", varname3, "\n",
                   "H1: Odds ratio for ", varname1, " by ", varname2,
                   " is smaller for level '", levels(z)[1], "' than level '",
                   levels(z)[2], "' of ", varname3, "\n")
    }else if (alternative == "greater"){
      H0 <- paste0("H0: Odds ratio for ", varname1, " by ", varname2,
                   " is the same for level '", levels(z)[1], "' and level '",
                   levels(z)[2], "' of ", varname3, "\n",
                   "H1: Odds ratio for ", varname1, " by ", varname2,
                   " is larger for level '", levels(z)[1], "' than level '",
                   levels(z)[2], "' of ", varname3, "\n")
    }

    #return
    result <- list(title = "Test for difference in odds ratios",
                   varname1 = varname1, varname2 = varname2, H0 = H0,
                   alternative = alternative, cont.corr = cont.corr, pval = pval,
                   pval.stat = pval.stat, pval.note = pval.note,
                   pval.exact = pval.exact, pval.exact.stat = pval.exact.stat,
                   pval.exact.note = pval.exact.note, targetCIwidth = CI.width,
                   actualCIwidth.exact = actualCIwidth.exact,
                   CI.exact.lower = CI.exact.lower,
                   CI.exact.upper = CI.exact.upper, CI.exact.note = CI.exact.note,
                   pval.asymp = pval.asymp, pval.asymp.stat = pval.asymp.stat,
                   pval.asymp.note = pval.asymp.note,
                   CI.asymp.lower = CI.asymp.lower,
                   CI.asymp.upper = CI.asymp.upper, CI.asymp.note = CI.asymp.note,
                   pval.mc = pval.mc, pval.mc.stat = pval.mc.stat,
                   nsims.mc = nsims.mc, pval.mc.note = pval.mc.note,
                   CI.mc.lower = CI.mc.lower, CI.mc.upper = CI.mc.upper,
                   CI.mc.note = CI.mc.note,
                   test.note = test.note)
    class(result) <- "ANSMtest"
    return(result)
  }
Any scripts or data that you put into this service are public.
ANSM5 documentation built on Sept. 11, 2024, 6:45 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
ANSM5
Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition

R/oddsratio.2x2diff.R
In ANSM5: Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition

Defines functions oddsratio.2x2diff

Documented in oddsratio.2x2diff

Try the ANSM5 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ANSM5 Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition

R/oddsratio.2x2diff.R In ANSM5: Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition

Defines functions oddsratio.2x2diff

Documented in oddsratio.2x2diff

Try the ANSM5 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ANSM5
Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition

R/oddsratio.2x2diff.R
In ANSM5: Functions and Data for the Book "Applied Nonparametric Statistical Methods", 5th Edition