# R/jointMeanCovStability.R In jointMeanCov: Joint Mean and Covariance Estimation for Matrix-Variate Data

#### Documented in jointMeanCovStability

#' Estimate Mean and Correlation Structure Using Stability Selection
#'
#' Given a data matrix, this function performs stability
#' selection as described in the section "Stability of Gene
#' Sets" in the paper Joint mean and covariance estimation with
#' unreplicated matrix-variate data (2018),
#' M. Hornstein, R. Fan, K. Shedden, and S. Zhou;
#' Journal of the American Statistical Association.
#'
#' Let \code{m[i]} denote the number of group-centered genes on
#' the ith iteration of stability selection (where \code{m[i]}
#' is a decreasing sequence).
#' Estimated group means are initialized using unweighted
#' sample means.  Then, for each iteration of stability
#' selection:
#' 1. The top \code{m[i]} genes are selected for group centering
#' by ranking the estimated group differences from the previous
#' iteration.
#' 2. Group means and global means are estimated using
#' GLS, using the inverse row covariance matrix from the
#' previous iteration.  The centered data matrix is then
#' used as input to Gemini to estimate the inverse row covariance
#' matrix B.hat.inv.
#' 3. Group means are estimated using GLS with B.hat.inv.
#'
#' @param X Data matrix of size n by m.
#' @param group.one.indices Vector of indices denoting rows in group one.
#' @param rowpen Glasso penalty for estimating B, the row-row correlation
#' matrix.
#' @param n.genes.to.group.center Vector specifying the number of
#' genes to group center on each iteration of the stability
#' selection algorithm.  The length of this vector is equal to
#' the number of iterations of stability selection.  If this
#' argument is not provided, the default is a decreasing
#' sequence starting with m, followed
#' @return
#' \item{n.genes.to.group.center}{Number of group centered genes
#' on each iteration of stability selection.}
#' \item{betaHat.init}{Matrix of size 2 by m, containing
#' sample means for each group.  Row 1 contains sample means for
#' group one, and row 2 contains sample means for group two.}
#' \item{gammaHat.init}{Vector of length m containing differences
#' in sample means.}
#' \item{B.inv.list}{List of estimated row-row inverse covariance
#' matrices, where \code{B.inv.list[[i]]} corresponds
#' to the estimate from the ith iteration of the algorithm, in
#' which the number of group-centered genes is
#' \code{n.genes.to.group.center[i]}.  For identifiability,
#' A and B are scaled so that A has trace m.}
#' \item{corr.B.inv.list}{List of inverse correlation matrices
#' corresponding to the inverse covariance matrices
#' \code{B.inv.list}.}
#' \item{betaHat}{List of matrices of size 2 by m, where m is
#' the number of columns of X.  For each matrix, entry (i, j) contains the
#' estimated mean of the jth column for an individual in
#' group i, with i = 1,2, and j = 1, ..., m.  The matrix
#' \code{betaHat[[i]]} contains the estimates for the ith
#' iteration of stability selection.}
#' \item{gamma.hat}{List of vectors of estimated group mean
#' differences.  The vector \code{gammaHat[[i]]} contains
#' estimates for the ith iteration of stability selection.}
#' \item{design.effecs}{Vector containing the estimated design
#' effect for each iteration of stability selection.}
#' \item{gls.test.stats}{List of vectors of test statistics for
#' each iteration of stability selection.}
#' \item{p.vals}{List of vectors of two-sided p-values, calculated using the
#' standard normal distribution.}
#' @examples
#' # Generate matrix-variate data.
#' n1 <- 5
#' n2 <- 5
#' n <- n1 + n2
#' group.one.indices <- 1:5
#' group.two.indices <- 6:10
#' m <- 20
#' M <- matrix(0, nrow=n, ncol=m)
#' # In this example, the first three variables have nonzero
#' # mean differences.
#' M[1:n1, 1:3] <- 3
#' M[(n1 + 1):n2, 1:3] <- -3
#' X <- matrix(rnorm(n * m), nrow=n, ncol=m) + M
#'
#' # Apply the stability algorithm.
#' rowpen <- sqrt(log(m) / n)
#' n.genes.to.group.center <- c(10, 5, 2)
#' out <- jointMeanCovStability(
#'  X, group.one.indices, rowpen, c(2e3, n.genes.to.group.center))
#'
#' # Make quantile plots of the test statistics for each
#' # iteration of the stability algorithm.
#' opar <- par(mfrow=c(2, 2), pty="s")
#' qqnorm(out$gammaHat.init, #' main=sprintf("%d genes group centered", m)) #' abline(a=0, b=1) #' qqnorm(out$gammaHat[],
#'   main=sprintf("%d genes group centered",
#'    n.genes.to.group.center))
#' abline(a=0, b=1)
#' qqnorm(out$gammaHat[], #' main=sprintf("%d genes group centered", #' n.genes.to.group.center)) #' abline(a=0, b=1) #' qqnorm(out$gammaHat[],
#'   main=sprintf("%d genes group centered",
#'    n.genes.to.group.center))
#' abline(a=0, b=1)
#' par(opar)
#' @export
jointMeanCovStability <- function(
X, group.one.indices, rowpen, n.genes.to.group.center=NULL) {

# Sample sizes and group indices
n <- nrow(X)
m <- ncol(X)
group.two.indices <- setdiff(1:n, group.one.indices)

# Design matrix for two group means
D.group <- twoGroupDesignMatrix(group.one.indices, group.two.indices)
# Design matrix for global mean
D.global <- matrix(1, n, 1)

B.inv.list <- list()
corr.B.inv.list <- list()
betaHat <- list()
gammaHat <- list()
design.effects <- rep(NA, length(n.genes.to.group.center))
gls.test.stats <- list()

group.cen.indices <- list()
global.cen.indices <- list()
gls.means.forCentering <- list()

delta <- c(1, -1)

betaHat.init <- GLSMeans(X, D.group, diag(n))
gammaHat.init <- betaHat.init[1, ] - betaHat.init[2, ]

if (is.null(n.genes.to.group.center)) {
pow <- floor(log2(m))
n.genes.to.group.center <- c(m, round(m / 2^(1:pow)))
} else {
if (any(diff(n.genes.to.group.center) > 0)) {
stop("n.genes.to.group.center must be a decreasing sequence")
}
}

for (i in 1:length(n.genes.to.group.center)) {
# Rank the columns by group mean differences from previous
# iteration; select a subset of genes to group center.
if (i == 1) {
gammaHat.prev <- gammaHat.init
} else {
gammaHat.prev <- gammaHat[[i - 1]]
}
ranks <- order(abs(gammaHat.prev), decreasing=TRUE)
group.cen.indices[[i]] <- sort(
ranks[1:n.genes.to.group.center[i]])
global.cen.indices[[i]] <- setdiff(
1:m, group.cen.indices[[i]])

# Center the data by subtracting GLS group means or global
# means from the group-centered and globally centered
# columns, respectively.
if (i == 1) {
B.inv.prev <- diag(n)
} else {
B.inv.prev <- B.inv.list[[i - 1]]
}
cen <- centerDataGLSModelSelection(
X, B.inv.prev, group.one.indices, group.two.indices,
group.cen.indices[[i]])
X.modSelCen <- cen$X.cen # Apply Gemini to the centered data matrix. gem.B <- GeminiB(X.modSelCen, rowpen) B.inv.list[[i]] <- gem.B$B.hat.inv
corr.B.inv.list[[i]] <- gem.B$corr.B.hat.inv # Estimate mean differences with GLS betaHat[[i]] <- GLSMeans(X, D.group, gem.B$B.hat.inv)
gammaHat[[i]] <- betaHat[[i]][1, ] - betaHat[[i]][2, ]

# Calculate test statistics
design.effects[i] <- (
t(delta) %*% solve(t(D.group) %*% gem.B$B.hat.inv %*% D.group) %*% delta) gls.test.stats[[i]] <- gammaHat[[i]] / sqrt(design.effects[i]) } p.vals.gls <- lapply( gls.test.stats, function(x) 2 * stats::pnorm(q=abs(x), lower.tail=FALSE)) p.vals.gls.adjusted <- lapply( X=p.vals.gls, FUN=stats::p.adjust, method="BH") out <- list() out$n.genes.to.group.center <- n.genes.to.group.center
out$betaHat.init <- betaHat.init out$gammaHat.init <- gammaHat.init
out$B.inv.list <- B.inv.list out$corr.B.inv.list <- corr.B.inv.list
out$betaHat <- betaHat out$gammaHat <- gammaHat
out$design.effects <- design.effects out$gls.test.stats <- gls.test.stats
out$p.vals.gls <- p.vals.gls out$p.vals.gls.adjusted <- p.vals.gls.adjusted

return(out)
}


## Try the jointMeanCov package in your browser

Any scripts or data that you put into this service are public.

jointMeanCov documentation built on May 6, 2019, 1:09 a.m.