In statlab/permuter: permuter: Permutation Tests

Vignette Info

This example comes from the textbook section 5.5 (p 273-276).

We analyze the mult data, previously discussed in Westfall and Wolfinger (2000), to test for equality of the multivariate distribution of three variables $Y_1, Y_2$, and $Y_3$ in the two groups labeled by the binary variable $X$ (two independent samples test). The aim of this example is to show how the closed testing procedure performs when different combining functions are applied.

The procedure is as follows: let $p_{(1)} \leq p_{(2)} \leq p_{(3)}$ be the (increasingly) ordered raw p-values; choose a combining function (whose arguments are raw p-values), and obtain a global test involving all three variables. The adjusted p-value related to $p_{(1)}$ is the p-value of the global test on all the three variables. At second step, obtain a global test involving the variables corresponding to $p_{(2)} \leq p_{(3)}$ only; the adjusted p-value related to $p_{(2)}$ is the p-value of the global test. At third step, obtain a global test involving the variable generating $p_{(3)}$ only; there is no need to perform the procedure on the variable with largest raw p-value. A monotonicity condition must be satisfied, i.e. the adjusted p-value at each step is not smaller than the adjusted p-value at previous step.

First, we perform the testing procedure on the three variables and obtain the raw p-values. We consider the difference of means as the test statistic, and its null distribution can be obtained by calling the two_sample function. We consider a two-sided alternative and B = 1000 random permutations.

library(permuter)
set.seed(60)
data(mult)

B <- 1000
X <- mult[, 1]
group0 <- mult[X == 0, -1]
group1 <- mult[X == 1, -1]
p <- ncol(group0)

observed <- sapply(1:p, function(x) mean(group1[, x]) - mean(group0[, x]))
distr <- sapply(1:p, function(x) two_sample(group1[, x], group0[, x], reps = B))
p_raw <- sapply(1:p, function(x) t2p(observed[x], distr[, x], alternative = "two-sided"))
names(p_raw) <- colnames(group0)
p_raw

Order the raw p-values increasingly

The vector p_raw contains the raw p-values related to each variable. Now we order the raw p-values increasingly. The matrix perm_pvalues contains the null distribution of the raw p-values, and its columns are ordered with respect to the ordering of p_ord (i.e. the observed raw p-values).

p_ord <- sort(p_raw, decreasing = FALSE)

perm_pvalues <- apply(distr, 2, pvalue_distr, alternative = "two-sided")
perm_pvalues_ord <- perm_pvalues[, order(p_raw)]

Perform the step-up procedure with Liptak's combining function

Now we obtain the adjusted p-values as the global p-values obtained by combining the last $3-j$ columns of perm_pvalues_ord, $j = 0,1,2$. We can do this by choosing one of the three combining function proposed (Tippett's, Fisher's, and Liptak's). The following code computes the adjusted p-values by applying each combining function.

t_tippett <- apply(perm_pvalues_ord, 1, tippett)
obs_tippett <- tippett(p_ord)
p_ris <- rep(NA, p)
p_ris[1] <- t2p(obs_tippett, t_tippett, alternative = "greater")
if (p > 2) {
    for (j in 2:(p - 1)) {
        obs_tippett <- tippett(p_ord[j:p])
        t_tippett <- apply(perm_pvalues_ord[, j:p], 1, tippett)
        p_ris[j] <- max(t2p(obs_tippett, t_tippett, alternative = "greater"), p_ris[(j - 
            1)])
    }
}
p_ris[p] <- max(p_ord[p], p_ris[p - 1])
p_ris[order(p_raw)] <- p_ris
names(p_ris) <- colnames(group0)
p_ris_tippett <- p_ris


t_fisher <- apply(perm_pvalues_ord, 1, fisher)
obs_fisher <- fisher(p_ord)
p_ris[1] <- t2p(obs_fisher, t_fisher, alternative = "greater")
if (p > 2) {
    for (j in 2:(p - 1)) {
        obs_fisher <- fisher(p_ord[j:p])
        t_fisher <- apply(perm_pvalues_ord[, j:p], 1, fisher)
        p_ris[j] <- max(t2p(obs_fisher, t_fisher, alternative = "greater"), p_ris[(j - 
            1)])
    }
}
p_ris[p] <- max(p_ord[p], p_ris[p - 1])
p_ris[order(p_raw)] <- p_ris
p_ris_fisher <- p_ris


t_liptak <- apply(perm_pvalues_ord, 1, liptak)
obs_liptak <- liptak(p_ord)
p_ris <- rep(NA, p)
p_ris[1] <- t2p(obs_liptak, t_liptak, alternative = "greater")
if (p > 2) {
    for (j in 2:(p - 1)) {
        obs_liptak <- liptak(p_ord[j:p])
        t_liptak <- apply(perm_pvalues_ord[, j:p], 1, liptak)
        p_ris[j] <- max(t2p(obs_liptak, t_liptak, alternative = "greater"), p_ris[(j - 
            1)])
    }
}
p_ris[p] <- max(p_ord[p], p_ris[p - 1])
p_ris[order(p_raw)] <- p_ris
p_ris_liptak <- p_ris

library(knitr)
kable(cbind(p_ris_fisher, p_ris_liptak, p_ris_tippett), col.names = c("Fisher", 
    "Liptak", "Tippett"))

This procedure has been implemented in the fwe_minp function.

p_ris_liptak2 <- fwe_minp(p_raw, distr, combine = "liptak")
p_ris_fisher2 <- fwe_minp(p_raw, distr, combine = "fisher")
p_ris_tippett2 <- fwe_minp(p_raw, distr, combine = "tippett")
kable(cbind(p_ris_fisher2, p_ris_liptak2, p_ris_tippett2), col.names = c("Fisher", 
    "Liptak", "Tippett"))