#' Cluster multiple traits via polygenic scores
#'
#' This function clusters traits that share some genetic component via polygenic scores (PSs). It fits a sparse precision matrix via graphical lasso. The regularisation path is computed for the lasso penalty at a grid of values for the regularisation parameter rho. The clusters are used to specify the structure of the error covariance matrix of a GLS model and the feasible GLS estimator is used for estimating the unknown parameters in a linear regression model with a certain unknown degree of correlation between the residuals.
#'
#' ...
#'
#' @param x Input matrix, of dimension nobs x nvars of PGSs with nvars = nouts. Can be in sparse matrix format.
#' @param y Quantitative response matrix, of dimension nobs x nouts.
#' @param rho (Non-negative) regularisation parameter for lasso passed to glasso. \code{rho=0} means no regularisation. Can be a scalar (usual) or a symmetric nouts by nouts matrix, or a vector of length nouts. In the latter case, the penalty matrix has jkth element sqrt(rho[j]*rho[k]).
#' @param nrho The number of rho values - default is 40.
#' @param logrho Logical flag for log transformation of the rho sequence. Default is \code{logrho = FALSE}.
#' @param rho.min.ratio Smallest value for rho, as a fraction of rho.max, the (data derived) entry value (i.e. the smallest value for which all coefficients are zero) - default is 10e-04.
#'
#' @return An object of class "geneJAM" is returned. \item{call}{The call that produced this object.} \item{xi}{A matrix of intercepts of dimension nouts x length(rho)} \item{beta}{A matrix of coefficients for the PSs of dimension nouts x length(rho)} \item{A}{A length(rho) list of estimated adjacency matrices A of 0s and 1s, where A_{ij} is equal to 1 iff edges i and j are adjacent and A_{ii} is 0.} \item{P}{A length(rho) list of estimated precision matrices (matrix inverse of correlation matrices).} \item{Sigma}{A length(rho) list of estimated correlation matrices.} \item{rho}{The actual sequence of rho values used.} \item{PS}{Polygenic scores used. If \code{scores = FALSE} they are computed by \code{\link{ps.geneJAM}}} \item{logrho}{Logical flag for log transformation of the rho sequence. Default is \code{logrho = FALSE}.} \item{nobs}{Number of observations.} \item{xiStderr}{Standard error of coefficients \code{xi}.} \item{betaStderr}{Standard error of coefficients \code{beta}.} \item{betaSD}{Standard error of the mean of coefficients \code{beta} for clustered traits.} \item{betaSD0}{Standard error of the mean of all coefficients \code{beta}.} \item{rho.min}{Value of rho that gives minimum non-zero betaSD.}
#'
#' @examples
#' N <- 1000 #
#' q <- 10 #
#' p <- 1000 #
#' set.seed(1)
#' # Sample 1
#' X0 <- matrix(rbinom(n = N*p, size = 2, prob = 0.3), nrow=N, ncol=p)
#' B <- matrix(0, nrow = p, ncol = q)
#' B[1, 1:2] <- 1
#' B[3, 3] <- 2
#' y0 <- X0 %*% B + matrix(rnorm(N*q), nrow = N, ncol = q)
#' #y0 <- apply(y0, 2, scale)
#' beta <- ps.geneJAM(X0, y0)$beta
#' # Sample 2
#' X <- matrix(rbinom(n = N*p, size = 2, prob = 0.3), nrow=N, ncol=p)
#' y <- X %*% B + matrix(rnorm(N*q), nrow = N, ncol = q)
#' #y <- apply(y, 2, scale)
#' #Sigma <- diag(1, q)
#' #Sigma[1, 2] <- Sigma[2, 1] <- .8
#' #y <- X %*% B + MASS::mvrnorm(n = N, mu = rep(0, q), Sigma = Sigma)
#' x <- X %*% beta
#' #x <- MASS::mvrnorm(n = N, mu = rep(0, q), Sigma = diag(1, q))
#' ###
#' pc <- geneJAM(x, y)
#'
#' @export
#'
geneJAM <- function(x, y, rho = NULL,
nrho = ifelse(is.null(rho), 20, length(rho)),
logrho = FALSE,
rho.min.ratio = 10e-04
) {
this.call <- match.call()
x <- as.matrix(x)
y <- as.matrix(y)
np <- dim(x)
if (is.null(np) | (np[2] <= 1))
stop("x should be a matrix with 2 or more columns")
nobs <- as.integer(np[1])
nvars <- as.integer(np[2])
dimy <- dim(y)
nrowy <- ifelse(is.null(dimy), length(y), dimy[1])
nouts <- ifelse(is.null(dimy), 1, dimy[2])
if (nrowy != nobs)
stop(paste("number of observations in y (", nrowy, ") not equal to the number of rows of x (",
nobs, ")", sep = ""))
if (nvars != nouts)
stop(paste("number of scores (", nvars, ") not equal to number of outcome components (", nouts, ")", sep = ""))
vnames <- colnames(x)
if (is.null(vnames))
vnames <- paste("V", seq(nvars), sep = "")
if (is.null(rho)) {
if (rho.min.ratio >= 1)
stop("rho.min.ratio should be less than 1")
flmin <- as.double(rho.min.ratio)
urho <- double(1)
} else {
nrho = as.integer(nrho)
flmin <- as.double(1)
if (any(rho < 0))
stop("rhos should be non-negative")
urho <- as.double(rev(sort(rho)))
nrho <- as.integer(length(rho))
rholist <- as.double(rho)
}
xScale <- scale(x, scale = FALSE) #apply(x, 2, scale) #
#yScale <- apply(y, 2, scale) #y #scale(y) #
####################
# STEP 2
####################
# Covariance matrix (qxq) of PSs
SigmaPS <- cov(xScale) # cor(x) # cov(x) #
## Covariance matrix (qxq) of NOISE
#Omega <- SigmaPS - cov(yScale)
# Calculate rho path (first get rho_max):
if (is.null(rho)) {
rho_max <- max(colSums(SigmaPS))
rho_min <- min(abs(SigmaPS)) #rho_max*flmin,
if(logrho == 0) {
rholist <- round(seq(rho_min,
rho_max,
length.out = nrho), digits = 10)
} else if (logrho == 1) {
rholist <- round(exp(seq(log(rho_min),
log(rho_max),
length.out = nrho)), digits = 10)
} else if (logrho == 2) {
rholist <- round(log(seq(exp(rho_min),
exp(rho_max),
length.out = nrho)), digits = 10)
}
} else {
rholist <- rho
}
# Graphical lasso
# Estimates a sparse inverse covariance matrix using a lasso (L1) penalty
#glPS <- glasso::glassopath(s = SigmaPS, rho = rholist,
# penalize.diagonal = FALSE, trace = 0)
glPS2 <- lapply(seq_along(rholist), FUN = function(r) {
glasso::glasso(s = SigmaPS, rho = rholist[r],
penalize.diagonal = FALSE,
trace = 0)
})
rho <- rholist
P <- lapply(seq_along(rho), function(r) glPS2[[r]]$wi)
Sigma <- lapply(seq_along(rho), function(r) glPS2[[r]]$w)
A <- lapply(seq_along(rho), function(r) {
AA <- P[[r]]
AA[abs(AA) > 1.5e-8] <- 1
diag(AA) <- 0
AA
})
xi <- matrix(NA, nouts, nrho)
beta <- matrix(NA, nouts, nrho)
xiStderr <- matrix(NA, nouts, nrho)
betaStderr <- matrix(NA, nouts, nrho)
summarylist <- vector(mode = "list", length = nrho)
clusterlist <- vector(mode = "list", length = nrho)
lmfit <- vector("list", length = nouts)
res <- matrix(NA, nrow = nobs, ncol = nouts)
for (l in seq(nouts)) {
xl <- x[, l]
lmfit[[l]] <- lm(y[ ,l] ~ xl)
res[, l] <- resid(lmfit[[l]])
}
# 1.b
yVec <- as.vector(t(y)) #as.vector(t(y[, members])) #as.vector(y) #
#xVec <- as.vector(t(x[, members]))
#xList0 <- lapply(apply(x, 1, as.list), unlist) #apply(x, 2, as.list)
#xList <- #lapply(xList0, FUN = function(x) cbind(1, x[1]))
xList <- apply(x, 1, function(xi) Matrix::bdiag(lapply(xi, function(i) cbind(1, i))))
xMat <- do.call(rbind, xList)
rm(xList)
betaSD <- vector("numeric", length = nrho)
betaSD0 <- vector("numeric", length = nrho)
NULLrho <- NULL #c()
for (j in seq(nrho)) {
lowerA <- A[[j]]
lowerA[lower.tri(lowerA)] <- 0
g <- igraph::graph.adjacency(lowerA)
clu <- clusterlist[[j]] <- igraph::components(g)
# 2.
fglsfit <- .fgls.geneJAM(res, nouts, clu, nobs, xMat, yVec, y)
betaCov <- fglsfit$betaCov
betaHat <- fglsfit$betaHat
if (all(clu$csize == 1)) {
betaSD[j] <- sqrt(mean(Matrix::diag(betaCov)[c(FALSE, TRUE)]))
} else {
groupedTraits <- which(clu$membership %in% which(clu$csize > 1))
betaSD[j] <- sqrt(mean(Matrix::diag(betaCov)[c(FALSE, TRUE)][groupedTraits]))
}
betaSD0[j] <- sqrt(mean(Matrix::diag(betaCov)[c(FALSE, TRUE)]))
xi[, j] <- betaHat[c(TRUE, FALSE)]
beta[, j] <- betaHat[c(FALSE, TRUE)]
xiStderr[, j] <- sqrt(Matrix::diag(betaCov)[c(TRUE, FALSE)])
betaStderr[, j] <- sqrt(Matrix::diag(betaCov)[c(FALSE, TRUE)])
if (all(lowerA == 0)) break
}
fit <- list(call = this.call,
clusters = clusterlist,
A = A, P = P, Sigma = Sigma,
rho = rho, PS = x, logrho = logrho,
nobs = nobs,
xi = xi, beta = beta,
xiStderr = xiStderr,
betaStderr = betaStderr,
betaSD = betaSD,
betaSD0 = betaSD0)
idx <- which(fit$betaSD > 0)
fit$rho.min <- fit$rho[which.min(fit$betaSD[idx])]
class(fit) <- "geneJAM"
fit
}
#' @export
#'
#'
.fgls.geneJAM <- function(res, nouts, clu, nobs, xMat, yVec, y) {
newres <- res
for (iter in 1:10) {
covMat <- matrix(0, nouts, nouts)
for (m in seq(clu$no)) {
csize <- clu$csize[m]
members <- which(clu$membership == m)
if (csize > 1) {
covMat[members, members] <- cov(newres[, members])
} else {
covMat[members, members] <- var(newres[, members])
}
}
invMat <- Matrix::Matrix(solve(covMat))
OmegaInv <- Matrix::bdiag(replicate(nobs, invMat))
b1 <- Matrix::crossprod(xMat, OmegaInv) %*% xMat
b2 <- solve(b1)
betaHat <- Matrix::tcrossprod(b2, xMat) %*% OmegaInv %*% yVec
# Convergence
newres <- y - matrix(xMat %*% betaHat, ncol = nouts, byrow = TRUE)
if (iter > 1) {
thr <- abs(Matrix::mean(betaHat - oldbetaHat))
}
oldbetaHat <- betaHat
if (iter > 1 && thr < 1e-6) break
}
betaCov <- Matrix::solve(Matrix::crossprod(xMat, OmegaInv) %*% xMat)
return(list(betaCov = betaCov, betaHat = betaHat))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.