R/tree-simulation-functions.R

Defines functions curveVstar_tree gen.p.values gen.mu.leaves gauss_bloc gen.mu.noleaves

Documented in curveVstar_tree gauss_bloc gen.mu.leaves gen.mu.noleaves gen.p.values

#' Generate an unstructured signal
#' 
#' @param m An integer value, the number of hypotheses
#' @param pi0 A numeric value in \eqn{[0,1]}, the proportion of true null signals
#' @param barmu A numeric value, the strength of the signal 
#' @return A numeric vector of length \code{m}, the signal values for each hypothesis
#' @export
#' @examples
#' m <- 100
#' pi0 <- 0.7
#' barmu <- 4
#' mu <- gen.mu.noleaves(m = m, pi0 = pi0, barmu = barmu) 
#' plot(mu)
gen.mu.noleaves <- function(m, pi0, barmu) {
    mu <- numeric(m)
    m0 <- floor(m * pi0)
    m1 <- m - m0
    mu[sample(seq(1, m), m1)] <- barmu
    mu
}

#' Generate a block of signal of Gaussian shape
#' 
#' @param barmu A numeric value, the strength of the signal 
#' @param len An integer value, the length of the block
#' @return A numeric vector of length \code{len}, the signal values in the block
gauss_bloc <- function(barmu, len) {
    # this choice of sigma2 is so that the values at the edge of the vector 'out' are = 1/200
    sigma2 <- len^2/(8 * log(200))
    out <- exp(-(seq_len(len) - floor(len/2))^2/(2 * sigma2))
    return(barmu * out/mean(out))
}

#' Generate the signal in the leaves of the tree
#' 
#' @param m An integer value, the number of hypotheses
#' @param K1 An integer value, the number of non-null leaves
#' @param d A numeric value in \eqn{[0,1]}, the proportion of non-null signals in each non-null leaf
#' @param grouped A boolean value, whether the non-null leaves are contiguous or not
#' @param setting A character value specifying the shape of the signal in each non-null leaf
#' @param barmu A numeric value, the strength of the signal 
#' @param leaf_list A list of leaves as generated by \code{dyadic.from.Nnn} functions
#' @return A numeric vector of length \code{m}, the signal values for each hypothesis
#' @details If \code{setting == "const"} the non-null signal is constant equal to \code{barmu}. If \code{setting == "gauss"} and \eqn{d=1} the signal in an active leaf has the shape of a Gaussian bell with mean \code{barmu}, see the \code{\link{gauss_bloc}} function which generates it. If \eqn{d<1} it has the same shape but randomly pruned. If \code{setting == "poisson"} the non-null signal is randomly drawn according to a Poisson distribution of mean \code{barmu} (slightly modified to not yield 0). If \code{setting == "rgauss"} the non-null signal is randomly drawn according to a Normal distribution of mean \code{barmu} and variance \code{barmu}.
#' @seealso \code{\link{gauss_bloc}}
#' @export
#' @importFrom stats rpois rnorm
#' @examples
#' m <- 160
#' s <- 10
#' K1 <- floor(m/(s * 4))
#' d <- 1
#' barmu <- 4
#' dd <- dyadic.from.window.size(m, s, method = 2)
#' leaf_list <- dd$leaf_list
#' muC <- gen.mu.leaves(m = m, K1 = K1, d = d, grouped = FALSE, 
#'                     setting = "const", barmu = barmu, leaf_list =leaf_list)
#' muC <- gen.mu.leaves(m = m, K1 = K1, d = d, grouped = TRUE, 
#'                     setting = "const", barmu = barmu, leaf_list =leaf_list)
#' muG <- gen.mu.leaves(m = m, K1 = K1, d = d, grouped = FALSE, 
#'                     setting = "gauss", barmu = barmu, leaf_list =leaf_list)
#' muP <- gen.mu.leaves(m = m, K1 = K1, d = d, grouped = FALSE, 
#'                     setting = "poisson", barmu = barmu, leaf_list =leaf_list)
#' mu <- cbind(constant = muC, Gaussian = muG, Poisson = muP)
#' matplot(mu, t = 's')
gen.mu.leaves <- function(m, K1, d, grouped, setting, barmu, leaf_list) {
    mu <- numeric(m)
    K <- length(leaf_list)
    if (K1 > K) 
        stop("K1>K,\nwe don't have so many leaves")
    active_leaves <- numeric(K1)
    if (grouped) {
        active_leaves <- seq(1, K1) # + sample(seq(0, K - K1), 1)
    } else {
        active_leaves <- sample(seq(1, K), K1)
    }
    for (i in active_leaves) {
        length_leaf <- length(leaf_list[[i]])
        m1loc <- floor(length_leaf * d)
        signal <- sample(length_leaf, m1loc)
        mu[leaf_list[[i]][signal]] <- switch(setting, const = {
            barmu
        }, rgauss = {
            rnorm(m1loc, mean = barmu, sd = sqrt(barmu))
        }, gauss = {
            gauss_bloc(barmu, length_leaf)[signal]
          # why did I make this choice and not just gauss_bloc(barmu, m1loc) ? 
          # Strange, because in the current way the signal doesn't have mean barmu but lesser mean as soon as d<1
        }, poisson = {
            rpois(m1loc, 999 * barmu/1000) + barmu/1000
        })
    }
    return(mu)
}

#' Generate one-sided p-values associated to a given signal with equi-correlated noise
#' 
#' @param m An integer value, the number of hypotheses
#' @param mu A vector of \eqn{m} signal values
#' @param rho A numeric value in \eqn{[0,1]}, the level of equi-correlation between test statistics
#' @return A vector of \eqn{m} one-sided \eqn{p}-values
#' @export
#' @importFrom stats pnorm
#' @examples
#' m <- 100
#' s <- 10
#' K1 <- floor(m/(s * 4))
#' d <- 1
#' barmu <- 4
#' dd <- dyadic.from.window.size(m, s, method = 2)
#' leaf_list <- dd$leaf_list
#' mu <- gen.mu.leaves(m = m, K1 = K1, d = d, grouped = FALSE, 
#'                     setting = "const", barmu = barmu, leaf_list =leaf_list)
#' pvals <- gen.p.values(m = m, mu = mu, rho = 0)
#' plot(-log(pvals), t = 'b')
gen.p.values <- function(m, mu = rep(0, length(mu)), rho = 0) {
    stopifnot(m == length(mu)) ## sanity check
    Z <- rnorm(m + 1, 0, 1)
    Y <- sqrt(1 - rho) * Z[seq_len(m)] + sqrt(rho) * Z[m + 1]
    return(pnorm(Y + mu, lower.tail = FALSE))
}


#' Generate one-sided p-values associated to a given signal with equi-correlated
#' noise
#'
#' @param treeFam A tree-based reference family, see example below
#' @param ordering A permutation of \code{1, ..., m}, the ordering of the
#'   \eqn{m} null hypotheses
#' @return A vector of length \eqn{m}, whose \eqn{k}-th element is a lower
#'   bound \eqn{V^*(S_k)} on the number of true positives in the set \eqn{S_k}
#'   of the first \eqn{k} hypotheses according to the specified ordering
#' @references Durand, G., Blanchard, G., Neuvial, P., & Roquain, E. (2020). Post hoc false positive control for structured hypotheses. Scandinavian Journal of Statistics, 47(4), 1114-1148.
#' @export
#' @examples
#' m <- 250
#' s <- 25
#' K1 <- floor(m/(s * 4))
#' d <- 1
#' m1 <- s*K1*d
#' barmu <- 4
#' dd <- dyadic.from.window.size(m, s, method = 2)
#' leaf_list <- dd$leaf_list
#' mu <- gen.mu.leaves(m = m, K1 = K1, d = d,
#'   grouped = TRUE, setting = "const",
#'   barmu = barmu, leaf_list =leaf_list)
#' pvals <- gen.p.values(m = m, mu = mu, rho = 0)
#' alpha <- 0.1
#' ZL <- zetas.tree(dd$C, leaf_list, zeta.DKWM, pvals, alpha = alpha)
#' treeFam <- list(tree = dd$C, leaves = leaf_list, zetas = ZL)
#' # order by p-value (favorable to Simes)
#' op <- order(pvals)
#' Vp <- curveVstar_tree(treeFam, op)
#'
#' # Simes
#' VpS <- sapply(1:m, FUN=function(kk) posthocBySimes(pvals, op[1:kk], alpha))
#'
#' plot(1:m, 1:m-Vp, t = 's',
#'      xlim = c(0, 2*m1), ylim = c(0, m1), 
#'      ylab = "Lower bound on true positives")
#' lines(1:m, 1:m-VpS, t = 's', col = 3)
#'
#' # order by 'mu' (favorable to DKWM)
#' omu <- order(mu, decreasing = TRUE)
#' Vmu <- curveVstar_tree(treeFam, omu)
#' thrSimes <- t_linear(alpha, seq_len(m), m)
#' SmuS <- sapply(1:m, FUN=function(kk) posthocBySimes(pvals, omu[1:kk], alpha))
#'
#' plot(1:m, 1:m-Vmu, t = 's',
#'      xlim = c(0, 2*m1), ylim = c(0, m1),
#'      ylab = "Lower bound on true positives")
#' lines(1:m, SmuS, t = 's', col = 3)
curveVstar_tree <- function(treeFam, ordering) {
    C <- treeFam$tree
    leaf_list <- treeFam$leaves
    ZL <- treeFam$zetas
    
    m <- length(ordering)
    stopifnot(length(unlist(leaf_list)) == m) ## sanity check
    
    vecVstar <- numeric(m)
    for (ii in 1:m) {
        vecVstar[ii] <- V.star(ordering[1:ii], C, ZL, leaf_list)
    }
    vecVstar
}
pneuvial/sanssouci documentation built on Feb. 12, 2024, 4:18 a.m.