#' Perform spectral regularized inverse-variance weighted (SRIVW) estimator for summary-data multivariable Mendelian randomization
#'
#'
#' @param beta.exposure A data.frame or matrix. Each row contains the estimated marginal effect of a SNP on K exposures, usually obtained from a GWAS
#' @param se.exposure A data.frame or matrix of estimated standard errors of beta.exposure
#' @param beta.outcome A vector of the estimated marginal effect of a SNP on outcome, usually obtained from a GWAS
#' @param se.outcome A vector of estimated standard errors of beta.outcome
#' @param phi_cand A vector of tuning parameters for SRIVW estimator. Default is 0. To use the recommended set for the tuning parameter, simply set phi_cand = NULL.
#' @param over.dispersion Should the model consider balanced horizontal pleiotropy? Default is FALSE.
#' @param overlap Should the model consider overlapping exposure and outcome datasets? Default is FALSE.
#' @param gen_cor If overlap = FALSE, provide a K-by-K matrix for the estimated shared correlation matrix between the effect of the genetic variants on each exposure, where K is the number of exposure. If overlap = TRUE, provide a (K+1)-by-(K+1) matrix for the estimated shared correlation matrix between the effect of the genetic variants on each exposure and the outcome, where the last index position corresponds to the outcome. The correlations can either be estimated, be assumed to be zero, or fixed at zero. Default input is NULL, meaning that an identity matrix is used as the correlation matrix.
#'
#' @return A list with elements
#' \item{beta.hat}{Estimated direct effects of each exposure on the outcome}
#' \item{beta.se}{Estimated standard errors of beta.hat}
#' \item{iv_strength_parameter}{The minimum eigenvalue of the sample IV strength matrix, which quantifies the IV strength in the sample}
#' \item{phi_selected}{The selected tuning parameter for the SRIVW estimator}
#' \item{tau.square}{Overdispersion parameter if \code{over.dispersion=TRUE}}
#' @export
#'
#' @examples
#' data("hdl_subfractions")
#' # We are going to estimate the effect of S-HDL-P on the risk of CAS with adjustments of HDL, LDL, and TG levels
#' beta.exposure <- hdl_subfractions$data[,c("gamma_exp1","gamma_exp2","gamma_exp3","gamma_exp4")] # corresponding to SNP effects on respectively HDL, LDL, TG, and S-HDL-P
#' se.exposure <- hdl_subfractions$data[,c("se_exp1","se_exp2","se_exp3","se_exp4")]
#' beta.outcome <- hdl_subfractions$data$gamma_out1
#' se.outcome <- hdl_subfractions$data$se_out1
#' P <- hdl_subfractions$cor.mat[c(1:4,10),c(1:4,10)] # make sure the last index corresponds to the outcome
#' mvmr.srivw(beta.exposure = beta.exposure,
#' se.exposure = se.exposure,
#' beta.outcome = beta.outcome,
#' se.outcome = se.outcome,
#' gen_cor = P,
#' phi_cand = NULL,
#' over.dispersion = FALSE,
#' overlap = TRUE)
#'
mvmr.srivw <- function(beta.exposure, se.exposure, beta.outcome, se.outcome, phi_cand=0, over.dispersion = FALSE, overlap = FALSE, gen_cor = NULL) {
if (ncol(beta.exposure) <= 1 | ncol(se.exposure) <= 1) {stop("this function is developed for multivariable MR")}
K <- ncol(beta.exposure)
if (is.null(gen_cor)) {
if (overlap == FALSE) {P <- diag(K)} else {P <- diag(K+1)}
} else {
P <- as.matrix(gen_cor)
}
if (overlap == FALSE & (ncol(P) != ncol(beta.exposure))) {stop("The shared correlation matrix has a different number of columns than the input beta.exposure")}
if (overlap == TRUE & (ncol(P) != (ncol(beta.exposure) + 1))) {stop("If the exposure and outcome datasets are overlapping, please provide a (K+1)-by-(K+1) correlation matrix.")}
if (nrow(beta.exposure) != length(beta.outcome)) {stop("The number of SNPs in beta.exposure and beta.outcome is different")}
if (over.dispersion == TRUE & overlap == TRUE) {stop("The function allowing for both balanced horizontal pleiotropy and overlapping datasets is still under development")}
beta.exposure <- as.matrix(beta.exposure)
se.exposure <- as.matrix(se.exposure)
# the number of SNPs
p <- nrow(beta.exposure)
# diagonal W matrix
W<- diag(se.outcome^(-2))
# create a list of Sigma Xj matrices
if (overlap == FALSE) {
Vj <- lapply(1:p, function(j) diag(se.exposure[j,]) %*% P %*% diag(se.exposure[j,]))
} else {
Vj <- lapply(1:p, function(j) diag(c(se.exposure[j,],se.outcome[j])) %*% P %*% diag(c(se.exposure[j,],se.outcome[j])))
}
# calculate square root inverse of P
P_eigen <- eigen(P[1:K,1:K])
P_root_inv <- P_eigen$vectors %*% diag(1/sqrt(P_eigen$values)) %*% t(P_eigen$vectors)
Vj_root_inv <- lapply(1:p, function(j) {
P_root_inv %*% diag(1/se.exposure[j,])
})
# calcualte IV strenght parameter
IV_strength_matrix <- Reduce("+",lapply(1:p, function(j) {
beta.exposure.V <- Vj_root_inv[[j]] %*% beta.exposure[j,]
beta.exposure.V %*% t(beta.exposure.V)})) - p*diag(K)
iv_strength_parameter <- min(eigen(IV_strength_matrix/sqrt(p))$values)
# get V matrix
V <- Reduce("+",lapply(1:p, function(j) {Vj[[j]][1:K,1:K] * (se.outcome[j]^(-2))}))
# get M matrix
M <- t(beta.exposure)%*%W%*%beta.exposure
# get M-V matrix
MV <- M-V
# perform eigen-decomposition on MV
MV_eigvalues <- eigen(MV)$values
MV_eigen <- eigen(MV)
# multivariable (a)dIVW estimator
if (is.null(phi_cand)) {
phi_cand <- c(0, exp(seq(0, 17, by = 0.5) - min(iv_strength_parameter)))
}
phi_length <- length(phi_cand)
MV.l.inv.long <- Reduce(rbind, lapply(1:phi_length, function(l) {
MV_eigen$vectors %*% diag(1/(MV_eigvalues + phi_cand[l]/MV_eigvalues)) %*% t(MV_eigen$vectors)}
))
if (overlap == FALSE) {# (a)dIVW for independent datasets
beta.est <- MV.l.inv.long %*% t(beta.exposure) %*% W %*% (beta.outcome)
prof.lik <- sapply(1:phi_length, function(l) {
beta.hat <- beta.est[(1+(l-1)*K):(l*K)]
bvb <- sapply(1:p, function(j) t(beta.hat) %*% Vj[[j]] %*% beta.hat)
if (over.dispersion) {
tau2 <- Reduce("+",lapply(1:p, function(j) {
v <- Vj[[j]] * (se.outcome[j]^(-2))
(beta.outcome[j] - beta.exposure[j,] %*% beta.hat)^2*se.outcome[j]^(-2) - 1 - as.numeric(t(beta.hat) %*% v %*% beta.hat)
}))/sum(diag(W))
tau2 <- as.numeric(tau2)
} else {
tau2 <- 0
}
S <- diag(1/(se.outcome^2 + bvb + tau2))
1/p * t(beta.outcome - beta.exposure %*% beta.hat) %*% S %*%
(beta.outcome - beta.exposure %*% beta.hat)})
phi_selected <- phi_cand[which.min(prof.lik)]
MV.l.inv <- MV_eigen$vectors %*% diag(1/(MV_eigvalues + phi_selected/MV_eigvalues)) %*% t(MV_eigen$vectors)
mvmr.adIVW <- MV.l.inv %*% t(beta.exposure) %*% W %*% beta.outcome
if (over.dispersion) {
tau2_adivw <- Reduce("+",lapply(1:p, function(j) {
v <- Vj[[j]] * (se.outcome[j]^(-2))
(beta.outcome[j] - beta.exposure[j,] %*% mvmr.adIVW)^2*se.outcome[j]^(-2) - 1 - as.numeric(t(mvmr.adIVW) %*% v %*% mvmr.adIVW)
}))/sum(diag(W))
tau2_adivw <- as.numeric(tau2_adivw)
} else {
tau2_adivw <- 0
}
adIVW_Vt <- Reduce("+",lapply(1:p, function(j) {
m <- beta.exposure[j,] %*% t(beta.exposure[j,]) * (se.outcome[j]^(-2))
v <- Vj[[j]]*(se.outcome[j]^(-2))
bvb <- as.numeric(t(mvmr.adIVW) %*% v %*% mvmr.adIVW)
vbbv <- v %*% mvmr.adIVW %*% t(mvmr.adIVW) %*% v
m*(1+bvb+tau2_adivw*se.outcome[j]^(-2)) + vbbv
}))
mvmr.adIVW.se <- sqrt(diag(MV.l.inv%*%adIVW_Vt%*%MV.l.inv))
} else {
# (a)dIVW allowing for overlapping datasets
Adj_term <- Reduce("+",lapply(1:p, function(j) {Vj[[j]][1:K,(K+1)] * se.outcome[j]^{-2}}))
beta.est <- MV.l.inv.long %*% t(beta.exposure) %*% W %*% (beta.outcome) - MV.l.inv.long %*% Adj_term
prof.lik <- sapply(1:phi_length, function(l) {
beta.hat <- beta.est[(1+(l-1)*K):(l*K)]
bvb.test <- sapply(1:p, function(j) t(beta.hat) %*% Vj[[j]][1:K,1:K] %*% beta.hat)
bvxy <- sapply(1:p, function(j) {
sigmaxy <- Vj[[j]][1:K,K+1]
(t(beta.hat) %*% sigmaxy)[1,1]
})
S <- diag(1/(se.outcome^2 + bvb.test - 2 * bvxy))
1/p * t(beta.outcome - beta.exposure %*% beta.hat) %*% S %*%
(beta.outcome - beta.exposure %*% beta.hat)})
phi_selected <- phi_cand[which.min(prof.lik)]
MV.l.inv <- MV_eigen$vectors %*% diag(1/(MV_eigvalues + phi_selected/MV_eigvalues)) %*% t(MV_eigen$vectors)
mvmr.adIVW <- MV.l.inv %*% t(beta.exposure) %*% W %*% beta.outcome - MV.l.inv %*% Adj_term
adIVW_Vt_overlap <- Reduce("+",lapply(1:p, function(j) {
m <- beta.exposure[j,] %*% t(beta.exposure[j,]) * (se.outcome[j]^(-2))
v <- Vj[[j]][1:K,1:K] * (se.outcome[j]^(-2))
bvb <- as.numeric(t(mvmr.adIVW) %*% v %*% mvmr.adIVW)
vbbv <- v %*% mvmr.adIVW %*% t(mvmr.adIVW) %*% v
sigmaxy <- Vj[[j]][1:K,K+1]
A1 <- sigmaxy %*% t(sigmaxy) * se.outcome[j]^(-4)
A2 <- Vj[[j]][1:K,1:K] %*% mvmr.adIVW %*% t(sigmaxy) * se.outcome[j]^(-4)
A3 <- sigmaxy %*% t(mvmr.adIVW) %*% Vj[[j]][1:K,1:K] * se.outcome[j]^(-4)
A4 <- (t(mvmr.adIVW) %*% sigmaxy * se.outcome[j]^(-2))[1,1] * m
A5 <- (t(mvmr.adIVW) %*% sigmaxy * se.outcome[j]^(-2))[1,1] * sigmaxy %*% t(sigmaxy) * se.outcome[j]^(-4)
m*(1+bvb) + vbbv + A1 - A2 - A3 - 2 * A4 + 4 * A5
}))
mvmr.adIVW.se <- sqrt(diag(MV.l.inv%*%adIVW_Vt_overlap%*%MV.l.inv))
tau2_adivw <- NULL # current version does not allow for over-dispersion and overlapping datasets at the same time.
}
return(list(beta.hat = mvmr.adIVW,
beta.se = mvmr.adIVW.se,
iv_strength_parameter = iv_strength_parameter,
phi_selected = phi_selected,
tau.square = tau2_adivw))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.