#' @title Proportion Z-test for WT transition probabilities
#' @description A wrapper of prop.test to assess the significance of WT transition probability
#' resemblance between GCM and reanalysis
#' @param obs.grid Cluster grid of observations (reanalysis)
#' @param gcm.grid Cluster grid of GCM simulation
#' @param what Parameter to be returned. Default to \code{"p.value"}, returning the p-value of the test.
#' See \code{\link[stats]{prop.test}} return values, for the meaning of the other possible values, namely
#' \code{"statistic"}, \code{"parameter"} and \code{"estimate"}.
#' @param ... Further arguments passed to \code{\link[stats]{prop.test}}
#' @details The function receives as input cluster grids, as returned by \code{\link[transformeR]{clusterGrid}}.
#'
#' The null hypothesis H0 is that two populations from which the WT transitions were drawn have the same true
#' proportion of such transition (i.e., for p-value > 0.05 at a 95% confidence interval, H0 can't be rejected).
#' The alternative hypothesis A is that this proportion is different in at least one of the populations
#' (i.e., the GCM has a different distribution).
#'
#' @return a numeric square matrix with the proportion Z-test results for each transition probability
#'
#' @references How the test works, it is well explained here: \url{http://www.sthda.com/english/wiki/two-proportions-z-test-in-r}
#' @importFrom transformeR getWT
#' @importFrom stats prop.test
#' @importFrom magrittr %>% extract2
#'
#' @examples
#' load("./Data/WTs_cmip5.RData", verbose = TRUE)
#' load("./Data/clustering_eraInterim.RData", verbose = TRUE)
#' pvalues <- transitionProb.test(obs.grid = clusters.era.interim,
#' gcm.grid = wts.EC.earth)
#'
#' pcolors <- RColorBrewer::brewer.pal(n = 9, "YlOrRd") %>% colorRampPalette()
#' # NOTE that levelplot will depict the transposed transition matrix by default!
#'
#'
#' lattice::levelplot(t(pvalues), col.regions = pcolors(201), at = seq(0,1,0.01),
#' main = "EC-Earth prop.test - p.values",
#' xlab = "", ylab = "",
#' scales = list(alternating = 3))
#'
transitionProb.test <- function(obs.grid, gcm.grid, what = "p.value", ...) {
arg.list <- list(...)
wts.obs <- getWT(obs.grid)
wts.gcm <- getWT(gcm.grid)
if (is.null(wts.obs) | is.null(wts.gcm)) stop("Input is not a clustering grid")
obs.grid <- gcm.grid <- NULL
what <- match.arg(what, choices = c("statistic", "parameter", "p.value", "estimate"))
wto1 <- wts.obs[-length(wts.obs)]
wto2 <- wts.obs[-1]
wtp1 <- wts.gcm[-length(wts.gcm)]
wtp2 <- wts.gcm[-1]
# compute all transition probabilities as a vector
tlabels <- paste(names(wto1), names(wto2), sep = "-->")
cross.freqs.obs <- table(tlabels)
wtnames.obs <- unique(names(wts.obs))
tlabels <- paste(names(wtp1), names(wtp2), sep = "-->")
cross.freqs.gcm <- table(tlabels)
wtnames.gcm <- unique(names(wts.gcm))
# Fixed WT ordering (in decreasing order of annual frequency)
wtnames.ref <- c("A", "C", "SW", "W", "AW", "NW", "S", "ASW", "N", "ANW",
"SE", "CSW", "CS", "E", "NE", "AS", "CSE", "AN", "CW",
"CN", "CNW", "ASE", "CNE", "CE", "AE", "ANE")
ind.order <- match(wtnames.ref, wtnames.obs)
wtnames.obs <- wtnames.obs[ind.order]
ind.order <- match(wtnames.ref, wtnames.gcm)
wtnames.gcm <- wtnames.gcm[ind.order]
# Include missing transitions with zero probability
ref <- expand.grid(wtnames.obs, wtnames.obs)
allcombs <- paste(ref[ ,1], ref[ ,2], sep = "-->")
namesaux <- allcombs[which(!(allcombs %in% names(cross.freqs.obs)))]
aux <- rep(0, length(namesaux))
names(aux) <- namesaux
all.freqs.obs <- append(cross.freqs.obs, aux)
ref <- expand.grid(wtnames.gcm, wtnames.gcm)
allcombs <- paste(ref[ ,1], ref[ ,2], sep = "-->")
namesaux <- allcombs[which(!(allcombs %in% names(cross.freqs.gcm)))]
aux <- rep(0, length(namesaux))
names(aux) <- namesaux
all.freqs.gcm <- append(cross.freqs.gcm, aux)
# Compute matrix of proportion Z-test results
mat <- sapply(1:length(wtnames.obs), function(i) {
wt <- wtnames.obs[i]
all.freqs.from.obs <- all.freqs.obs[grep(paste0("^", wt, "-->"), names(all.freqs.obs))]
all.freqs.from.gcm <- all.freqs.gcm[grep(paste0("^", wt, "-->"), names(all.freqs.gcm))]
nobs <- sum(all.freqs.from.obs)
nsim <- sum(all.freqs.from.gcm)
arg.list[["n"]] <- c(nobs, nsim)
a <- sapply(names(all.freqs.from.obs), USE.NAMES = TRUE, function(j) {
arg.list[["x"]] <- c(all.freqs.from.obs[j], all.freqs.from.gcm[j])
suppressWarnings({
do.call("prop.test", args = arg.list) %>% extract2(what)
})
})
ind <- match(paste(wt, wtnames.obs, sep = "-->"), names(a))
# print(ind)
return(a[ind])
})
mat <- t(mat)
colnames(mat) <- rownames(mat) <- wtnames.obs
return(mat)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.