R/getEfronp.R
In GGMridge: Gaussian Graphical Models Using Ridge Penalty Followed by Thresholding and Reestimation

Documented in getEfronp

#' Estimation of empirical null distribution.
#' 
#' Estimation of empirical null distribution using Efron's central matching.
#'
#' @param z A numeric vector of z values following the theoretical normal null 
#'   distribution.
#' @param bins The number of intervals for density estimation of the marginal 
#'   density of z.
#' @param maxQ The maximum degree of the polynomial to be considered for density
#'   estimation of the marginal density of z.
#' @param pct Low and top (pct*100)% tails of z values are excluded to estimate
#'   f(z).
#' @param pct0 Low and top (pct0*100)% tails of z values are excluded to 
#'   estimate f0(z).
#' @param cc The central parts 
#'   \deqn{(\mu - \sigma cc, \mu + \sigma cc)}{(mu - sigma*cc, mu + sigma*cc)}
#'    of the empirical distribution z are used for an estimate of the null 
#'    proportion (eta). 
#' @param plotIt TRUE if density plot is to be produced.
#' 
#' @returns A list containing
#'     \item{correctz }{The corrected z values to follow empirically 
#'                    standard normal distribution.}
#'    \item{correctp }{The corrected p values using the correct z values.}
#'    \item{q }{The chosen degree of polynomial for the estimated 
#'                    marginal density.}
#'    \item{mu0hat }{The location parameter for the normal null 
#'                    distribution.}
#'    \item{sigma0hat }{The scale parameter for the normal null 
#'                    distribution.}
#'    \item{eta }{The estimated null proportion.}
#' @references
#'   Efron, B. 
#'   (2004). 
#'   Large-scale simultaneous hypothesis testing. 
#'   Journal of the American Statistical Association, 99, 96--104.
#'
#'   Ha, M. J. and Sun, W. 
#'   (2014).
#'   Partial correlation matrix estimation using ridge penalty 
#'   followed by thresholding and re-estimation.
#'   Biometrics, 70, 762--770.
#'
#' @author Min Jin Ha
#' @examples
#'  p <- 100 # number of variables
#'  n <- 50 # sample size
#'	
#'  ###############################
#'  # Simulate data
#'  ###############################
#'  simulation <- simulateData(G = p, etaA = 0.02, n = n, r = 1)
#'  data <- simulation$data[[1]]
#'  stddata <- scale(x = data, center = TRUE, scale = TRUE)
#'    
#'  ###############################
#'  # estimate ridge parameter
#'  ###############################
#'  lambda.array <- seq(from = 0.1, to = 20, by = 0.1) * (n - 1.0)
#'  fit <- lambda.cv(x = stddata, lambda = lambda.array, fold = 10L)
#'  lambda <- fit$lambda[which.min(fit$spe)] / (n - 1.0)
#'    
#'  ###############################
#'  # calculate partial correlation
#'  # using ridge inverse
#'  ###############################
#'  w.upper <- which(upper.tri(diag(p)))
#'  partial <- solve(lambda * diag(p) + cor(data))
#'  partial <- (-scaledMat(x = partial))[w.upper]
#'    
#'  ###############################
#'  # get p-values from empirical 
#'  # null distribution 
#'  ###############################
#'  efron.fit <- getEfronp(z = transFisher(x = partial))
#'  
#' @importFrom graphics hist legend lines
#' @importFrom stats coef dnorm glm ks.test lm pnorm quantile
#' @include ksStat.R
#' @export
getEfronp <- function(z, 
                      bins = 120L,
                      maxQ = 9.0,
                      pct = 0.0,
                      pct0 = 0.25,
                      cc = 1.2,
                      plotIt = FALSE) {
  
  stopifnot(
    "`bins` must be > 1" = bins > 1,
    "`pct0` must be >= pct" = pct0 >= pct,
    "`pct must be >= 0" = pct >= 0.0
  )

  pvalues <- 2.0 * {1.0 - stats::pnorm(abs(z))}

  # Z values for marginal density estimation
  v <- stats::quantile(z, probs = c(pct, 1.0 - pct))
  lo <- v[1L]
  hi <- v[2L]

  zz <- pmax(pmin(z, hi), lo)
  
  # X values for marginal density estimation
  breaks <- seq(from = lo, to = hi, length = bins + 1L)
  hcomb <- graphics::hist(x = zz, breaks = breaks, plot = FALSE) 
  counts <- hcomb$counts

  X <- sapply(1L:maxQ, FUN = function(j, x) { x^j }, x = hcomb$mids)
    
  kstest <- rep(NA, maxQ)
  mu0hat <- rep(NA, maxQ)
  sigma0hat <- rep(NA, maxQ)
  eta <- rep(NA, maxQ)

  kstest[1L] <- stats::ks.test(x = pvalues, y = "punif")$statistic
  mu0hat[1L] <- 0.0
  sigma0hat[1L] <- 1.0

  cof <- 0.0
  for (p in 2L:maxQ) {

    fit <- try(stats::glm(counts ~ X[,1L:p], family = "poisson"), 
               silent = TRUE)

    if (inherits(fit, "try-error")) next

    cof <- stats::coef(fit)

    if (any(is.na(cof))) next

    temp <- sapply(0L:p, 
                   FUN = function(i, zz) { zz^i }, 
                   zz = zz)

    fz <- which.max(exp(temp %*% cof))

    mu0hat[p] <- z[fz]

    temp <- sapply(0L:{p-2L}, 
                   FUN = function(i, mu){ {i + 2L} * {i + 1L} * mu^i},
                   mu = mu0hat[p])

    logfzdiff2 <- temp %*% cof[-{1L:2L}]

    if (logfzdiff2 < 0.0) {
      sigma0hat[p] <- {-logfzdiff2}^(-0.5)
      correctZ <- {z - mu0hat[p]} / sigma0hat[p]
      correctp <- 2.0 * {1.0 - stats::pnorm(abs(correctZ))}
      kstest[p] <- ksStat(p = correctp)
    }  
  }

  qq <- which.min(kstest[-1L]) + 1L
  correctZ <- {z - mu0hat[qq]} / sigma0hat[qq]
  correctp <- 2.0 * {1.0 - stats::pnorm(q = abs(correctZ))}
  mu <- mu0hat[qq]
  sigma <- sigma0hat[qq]
  
  # Eta calculation (null proportion)
  fit <- tryCatch(stats::glm(counts ~ X[,1:qq], family = 'poisson'),
                  error = function(e) {
                    stop("glm fit not successful\n\t",
                         e$message, call. = FALSE)
                  })

  logf <- log(fit$fitted.values)

  xmax <- hcomb$mids[which.max(logf)]
  
  v0 <- stats::quantile(z, probs = c(pct0, 1.0 - pct0))
  lo0 <- v0[1L]
  hi0 <- v0[2L]
  idx0 <- which({hcomb$mids > lo0} & {hcomb$mids < hi0})

  if (length(idx0) > 10L) {
    x0 <- hcomb$mids[idx0]
    y0 <- logf[idx0]
    X0 <- cbind(x0 - mu, (x0 - mu)^2)
    fit.null <- tryCatch(stats::lm(y0 ~ X0),
                         error = function(e) {
                           stop("lm fit of null model not successful\n\t",
                                e$message, call. = FALSE)
                         })
    coef.null <- fit.null$coef
  } else {
    idz0 <- which({z > lo0} & {z < hi0})
    z0 <- z[idz0]
    temp <- sapply(0L:qq, FUN = function(i, z) { z^i }, z = z)
    lz <- temp %*% coef(fit)
    y0 <- lz[idz0]
    Z0 <- cbind(z0-mu,(z0-mu)^2)
    fit.null <- tryCatch(stats::lm(y0 ~ Z0), 
                         error = function(e) {
                           stop("lm fit of null model not successful\n\t",
                                e$message, call. = FALSE)
                         })
    coef.null <- fit.null$coef
  }

  # Correct mu and sigma
  mu <- -coef.null[2L] / (2.0 * coef.null[3L]) + mu
  sigma <- sqrt(-0.5 / coef.null[3L])
  lo00 <- mu - cc * sigma
  hi00 <- mu + cc * sigma
  pi <- sum( {z > lo00} & {z < hi00} ) / length(z)
  G0 <- 1.0 - 2.0 * stats::pnorm(lo00, mean = mu, sd = sigma)
  eta <- max(min(pi / G0, 1.0), 0.0)
  
  if (plotIt) {
    oz <- order(zz)
    h <- graphics::hist(x = zz, 
                        breaks = breaks, 
                        xlab = expression(psi), 
                        main = "")
    graphics::lines(x = hcomb$mids, y = fit$fitted.values, col = 3L, lwd = 2.0)
    graphics::lines(x = zz[oz], 
                    y = stats::dnorm(x = zz, mean = mu, sd = sigma)[oz] * 
                      length(zz) * mean(diff(h$breaks)),
                    col = 2L,
                    lty = 2L,
                    lwd = 1.5)
    graphics::legend(x = "topright",
                     legend = c("f", expression(f[0])),
                     col = c(3L,2L),
                     lty = 1L:2L,
                     lwd = c(2.0, 1.5),
                     bty = "n",
                     seg.len = 3L)
  }
  
  list("correctz" = correctZ,
       "correctp" = correctp,
       "q" = qq,
       "mu0hat" = mu,
       "sigma0hat" = sigma,
       "etahat" = eta)
}

Any scripts or data that you put into this service are public.

GGMridge documentation built on June 8, 2025, 10:39 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

GGMridge
Gaussian Graphical Models Using Ridge Penalty Followed by Thresholding and Reestimation

R/getEfronp.R
In GGMridge: Gaussian Graphical Models Using Ridge Penalty Followed by Thresholding and Reestimation

Defines functions getEfronp

Documented in getEfronp

Try the GGMridge package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

GGMridge Gaussian Graphical Models Using Ridge Penalty Followed by Thresholding and Reestimation

R/getEfronp.R In GGMridge: Gaussian Graphical Models Using Ridge Penalty Followed by Thresholding and Reestimation

Defines functions getEfronp

Documented in getEfronp

Try the GGMridge package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

GGMridge
Gaussian Graphical Models Using Ridge Penalty Followed by Thresholding and Reestimation

R/getEfronp.R
In GGMridge: Gaussian Graphical Models Using Ridge Penalty Followed by Thresholding and Reestimation