R/chi_perm.r
In GmAMisc: 'Gianmarco Alberti' Miscellaneous

Documented in chiperm

#' R function for permutation-based chi-square test of independence
#'
#' The function performs the chi-square test of independence on the basis of permuted tables, whose
#' number is selected by user.\cr
#'
#' For the rationale of this approach, see for instance the description provided by:\cr Beh E.J.,
#' Lombardo R. 2014, Correspondence Analysis: Theory, Practice and New Strategies, Chichester,
#' Wiley, pages 62-64.\cr
#'
#' @param data Dataframe containing the input contingency table.
#' @param B Desired number of permuted tables (999 by default).
#' @param resid TRUE or FALSE (default) if the user does or doesn't want to plot the table of
#'   Pearson's standardized residuals.
#' @param filter Takes TRUE or FALSE (default) if the user does or does't want to filter the
#'   Pearson's standardized residuals according to the threshold provided by the thresh parameter;
#'   by default, the threshold is set at 1.96, which corresponds to an alpha level of 0.05.
#' @param thresh Value of the standardized residuals below which the residuals will be not
#'   displayed (by default, the threshold is set at 1.96, which corresponds to an alpha level of
#'   0.05).
#' @param cramer Takes TRUE or FALSE (default) if the user does or doesn't want to calculate and
#'   plot the bootstrap confidence interval for Cramer's V.
#'
#' @return The function produces:\cr
#' (1) a chart that displays the permuted distribution of the
#' chi-square statistic based on B permuted tables. The selected number of permuted tables, the
#' observed chi-square, the 95th percentile of the permuted distribution, and the associated p value
#' are reported at the bottom of the chart;\cr
#'
#' (2) a chart that displays the bootstrap distribution of Cramer's V coefficient, based on a number
#' of bootstrap replicates which is equal to the value of the function's parameter B;\cr
#'
#' (3) a chart that the Pearson's Standardized Residuals: a colour scale allows to easily understand
#' which residual is smaller (BLUE) or larger (RED) than expected under the hypothesis of
#' independence. Should the user want to only display residuals larger than a given threshold, it
#' suffices to set the filter parameter to TRUE, and to specify the desired threshold by means of
#' the thresh parameter, which is set at 1.96 by default.
#'
#' @keywords chiperm
#'
#' @export
#'
#' @importFrom stats chisq.test
#' @importFrom lsr cramersV
#' @importFrom InPosition contingency.data.break
#'
#' @examples
#' data(assemblage)
#' chiperm(data=assemblage, B=199, resid=TRUE, cramer=TRUE)
#'
chiperm <- function(data, B=999, resid=FALSE, filter=FALSE, thresh=1.96, cramer=FALSE){
  options(warn=-1)

  rowTotals <- rowSums(data)
  colTotals <- colSums(data)

  obs.chi.value <- chisq.test(data)$statistic

  chistat.perm <- vector(mode = "numeric", length = B)
  chi.statistic <- function(x)  chisq.test(x)$statistic
  chistat.perm <- sapply(r2dtable(B, rowTotals, colTotals), chi.statistic)

  p.lowertail <- (1 + sum (chistat.perm < obs.chi.value)) / (1 + B)
  p.uppertail <- (1 + sum (chistat.perm > obs.chi.value)) / (1 + B)
  two.sided.p <- 2 * min(p.lowertail, p.uppertail)

  p.to.report <- ifelse(two.sided.p < 0.001, "< 0.001",
                        ifelse(two.sided.p < 0.01, "< 0.01",
                               ifelse(two.sided.p < 0.05, "< 0.05",
                                      round(two.sided.p, 3))))

  graphics::hist(chistat.perm, main="Chi-square statistic Permuted Distribution",
       sub=paste0("\nBalck dot: observed chi-sq (", round(obs.chi.value, 3), ")","\nDashed line: 95th percentile of the permuted chi-sq (=alpha 0.05 threshold) (", round(quantile(chistat.perm, c(0.95)),3), ")", "\np value: ", p.to.report, " (n. of permutations: ", B,")"),
       xlab = "",
       cex.main=0.85,
       cex.sub=0.70)
  rug(chistat.perm, col = "#0000FF")
  points(x = obs.chi.value, y=0, pch=20, col="black")
  abline(v = round(quantile(chistat.perm, c(0.95)), 5), lty = 2, col = "blue")

  if (resid==TRUE) {
    res <- chisq.test(data)
    col1 <- colorRampPalette(c("blue", "white", "red"))
    ifelse(filter==FALSE, res$stdres,res$stdres[abs(res$stdres) <= thresh] <- 0)
    corrplot(res$stdres, method="circle", addCoef.col="black", is.corr=FALSE, cl.lim = c(min(res$stdres), max(res$stdres)),tl.col="black", tl.cex=0.8, col = col1(100)) #requires 'corrplot'
  } else {}

  if (cramer==TRUE) {
    cramerv <- vector (mode = "numeric", length = B)
    cramerv[1] <- cramersV(data) #requires 'lsr'
    for(i in 2:B){
      cramerv[i] <- cramersV(contingency.data.break(data, boot=TRUE))
    }

    perc5 <- quantile(cramerv, c(0.05))
    perc95 <- quantile(cramerv, c(0.95))
    graphics::hist(cramerv, xlab="", main="Cramer's V Bootstrap Distribution", sub=paste0("\nCramer's V: ", round(cramerv[1], 3), "\nDashed lines: 5th and 95th percentile", "\n95% Condifence Interval: ", round(perc5, 3), "-", round(perc95, 3), " (n. of bootstrap replicates: ",B,")"), cex.sub=0.8)
    rug(cramerv, col = "#0000FF")
    abline(v = perc5, lty = 2, col = "blue")
    abline(v = perc95, lty = 2, col = "blue")
  } else {}
}