knitr::opts_chunk$set(echo = TRUE, warning = F,message = F) options(knitr.kable.NA = '') options(scipen = 999)
library(graph) library(kableExtra) library(mediateR) set.seed(280920181)
\newcommand{\Var}{\text{Var}} \newcommand{\E}{\mathbb{E}} \newcommand{\argmin}[1]{\underset{#1}{\operatorname{argmin}}\text{ }} \newcommand{\argmax}[1]{\underset{#1}{\operatorname{argmax}}} \newcommand{\ind}[1]{\mathbf{1}_{#1}} \newcommand{\todo}[1]{\color{red}{\text{TODO: #1}}}
The code contains functions to simulate data for mediation analysis.
params <- mediateR:::QuickSim(5,3,2,"gaussian") dat <- SimulateData(params) dat
The dat
list with named elements (r names(dat)
) is used as an argument for functions which estimate the direct and indirect effects.
dat$y
is the response,dat$mm
is the vector of the mediators, dat$xx
is the vector of the exposures, dat$co
is the vector of the covariates, and dat$family
denotes the type of response.dat$path
is p x q matrix of 0s and 1s denoting paths to be fit. path[i,j]=0 indicates a priori knowledge of no path xx[i] and mm[j].One can manually create a list with these elements (such as when fitting models to new data) or rely on the simulation function SimulateData
to create the structure. The argument to SimulateData
has parameters controlling the sample size, coefficient estimates, number of mediators, etc.
The goal is to assess the causal impact of dat$xx
on dat$y
and quantify how much of this effect passes through dat$mm
, adjusting for covariates dat$co
.
We simulate n
observations from the "True Graph" using independent, binary SNPs drawn from Bernoulli(1/2) and path coefficients given below.
n <- 500 sim_params <- mediateR:::QuickSim(n,3,2,"gaussian") dat <- SimulateData(sim_params)
gR <- mediateR:::MakeGraphNELObject(sim_params$path,sim_params$xx_direct,sim_params$mm_direct) attrs <- list() attrs$edge <- list() attrs$edge$fontsize <- 12 edgeAttrs <- mediateR:::MakeedgeAttrs(sim_params$path_model,sim_params$xx_direct,sim_params$mm_direct)
ComputePath
is used to fit the mediation analysis framework for various response, where dat
are the datalist with the format mentioned above, mmn=F
assuming independent mediators and reg=T
is used for high-dimensional mediators (default is false).
If one makes the assumption of conditionally independent mediators given $\mathbf{x}$ and $\mathbf{c}$, i.e., $m_j \perp !!! \perp m_k | \mathbf{x},\mathbf{c}$, for all $j,k$, then $\mathbf{\Sigma}_\epsilon$ is diagonal and can be estimated using the error variances from separate univariate regressions, $m_j | (x,c)$ for all $j$. This estimate will be more efficient, but carries more assumptions.
fit <- ComputePath(dat, mmn = T, reg = F) eff_est <- ComputeEffectsLinear(fit)
In linear models, the direct effect is the path coefficient $x \rightarrow m$ (xx_direct
), and indirect effect is the product of path coefficients $x\rightarrow m$ (path_model
), and $m \rightarrow y$ (mm_direct
) .
gR2 <- mediateR:::MakeGraphNELObject(fit$path_model,fit$xx_direct,fit$mm_direct) attrs2 <- list() attrs2$edge <- list() attrs2$edge$fontsize <- 12 edgeAttrs2 <- mediateR:::MakeedgeAttrs(fit$path_model,fit$xx_direct,fit$mm_direct) par(mfcol=c(1,2)) plot(gR,edgeAttrs=edgeAttrs,attrs=attrs,main="True Graph") plot(gR2,edgeAttrs=edgeAttrs2,attrs=attrs2,main="Estimated Graph")
eff <- ComputeEffectsLinear(sim_params) eff_comb <- matrix(0,nrow(eff),ncol=2*ncol(eff)) for(ii in 1:ncol(eff)){ eff_comb[,2*ii-1] <- eff_est[,ii] eff_comb[,2*ii] <- eff[,ii] } for(ii in (ncol(dat$xx)+1):(ncol(dat$xx)+ncol(dat$mm))){ eff_comb[ii,3:ncol(eff_comb)] <- NA } colnames(eff_comb) <- rep(c("est","true"),ncol(eff)) rownames(eff_comb) <- rownames(eff)
Table below summarizes the Indirect, Direct, and Total effects.
kable(eff_comb,digits=2) %>% kable_styling(bootstrap_options = "striped", full_width = F) %>% add_header_above(c(" ", "direct" = 2, "indirect" = 2, "total" = 2))
We propose computing confidence intervals and hypothesis tests using bootstrap sampling quantiles.
B <- 1000 eff_est_boot <- vector("list",length=B) for(ii in 1:B){ ix <- sample(1:sim_params$n,replace=TRUE) dat_sub <- mediateR:::SubsetDat(dat,ix) fit <- ComputePath(dat_sub, mmn = T) eff_est_boot[[ii]] <- ComputeEffectsLinear(fit) } total_xx1 <- vapply(eff_est_boot,function(x){x[1,3]},c(0)) L95 = round(quantile(total_xx1, 0.025),2) U95 = round(quantile(total_xx1, 1 - 0.025),2)
Figure below shows the histgram of total effects of Bootstrap samples for SNP1. The 95\% CI of the total effect for SNP1 is (r L95
, r U95
)
hist(total_xx1,main="Bootstrap Samples SNP1 Total Effect",xlab="Estimate") abline(v=eff[1,3],lwd=3,col='red') abline(v=eff_est[1,3],lwd=3,col='blue') legend("topright",c("Truth","Point Estimate"),col=c("red","blue"),lwd=2)
Same as simulation 1 but with binomial link function connecting y with snps and gene sets.
sim_params$family <- "binomial" dat <- SimulateData(sim_params)
gR <- mediateR:::MakeGraphNELObject(sim_params$path,sim_params$xx_direct,sim_params$mm_direct) attrs <- list() attrs$edge <- list() attrs$edge$fontsize <- 12 edgeAttrs <- mediateR:::MakeedgeAttrs(sim_params$path_model,sim_params$xx_direct,sim_params$mm_direct)
fit <- ComputePath(dat, mmn = T)
gR2 <- mediateR:::MakeGraphNELObject(fit$path_model,fit$xx_direct,fit$mm_direct) attrs2 <- list() attrs2$edge <- list() attrs2$edge$fontsize <- 12 edgeAttrs2 <- mediateR:::MakeedgeAttrs(fit$path_model,fit$xx_direct,fit$mm_direct) par(mfcol=c(1,2)) plot(gR,edgeAttrs=edgeAttrs,attrs=attrs,main="True Graph") plot(gR2,edgeAttrs=edgeAttrs2,attrs=attrs2,main="Estimated Graph")
Given the simulation setting, we can approximate the true effects by approximating integral with large sample size by setting n
to million observations.
sim_params2 <- sim_params sim_params2$n <- 1e6 dat2 <- SimulateData(sim_params2) direct_t <- ComputeEffectxx(dat2,sim_params,"direct") indirect_t <- ComputeEffectxx(dat2,sim_params,"indirect") total_t <- ComputeEffectxx(dat2,sim_params,"total")
ComputeEffectxx
provides estimation of direct, indirect and total effect.
direct <- ComputeEffectxx(dat,fit,"direct") indirect <- ComputeEffectxx(dat,fit,"indirect") total <- ComputeEffectxx(dat,fit,"total") eff_comb <- cbind(direct,direct_t,indirect,indirect_t,total,total_t) rownames(eff_comb) <- names(fit$xx_direct) colnames(eff_comb) <- c("est","true","est","true","est","true") xx1total_sample <- eff_comb[1,5] xx1total_true <- eff_comb[1,6]
Table below summarizes the Indirect, Direct, and Total effects.
kable(eff_comb,digits=2) %>% kable_styling(bootstrap_options = "striped", full_width = F) %>% add_header_above(c(" ", "direct" = 2, "indirect" = 2, "total" = 2))
B <- 1000 total_xx1 <- rep(0,length=B) for(ii in 1:B){ ix <- sample(1:sim_params$n,replace=TRUE) dat_boot <- mediateR:::SubsetDat(dat,ix) fit_boot <- ComputePath(dat_boot, mmn = T) total_xx1[ii] <- ComputeEffectxx(dat_boot,fit_boot,"total")[1] } L95 = round(quantile(total_xx1, 0.025),2) U95 = round(quantile(total_xx1, 1 - 0.025),2)
Figure below shows the histgram of total effects of Bootstrap samples for SNP1. The 95\% CI of the total effect for SNP1 is (r L95
, r U95
)
hist(total_xx1,main="Bootstrap Samples SNP1 Total Effect",xlab="Estimate") abline(v=xx1total_sample,lwd=3,col='blue') abline(v=xx1total_true,lwd=3,col='red') legend("topright",c("Point Estimate","True"),col=c("blue","red"),lwd=2)
Same as simulation 1 and 3 but with exponentially distributed survival times. The rate is $e^{\beta^Tx}$ where $x$ are the covariates. This model can be fit with Cox Proportional Hazards.
sim_params$family <- "cox" dat <- SimulateData(sim_params) rmean <- max(as.matrix(dat$y)[,1])
Same as simulation 1 and 3 but with exponentially distributed survival times. The rate is $e^{\beta^Tx}$ where $x$ are the covariates. This model can be fit with Cox Proportional Hazards. We compute the effects on mean survival restricted to r round(rmean)
.
gR <- mediateR:::MakeGraphNELObject(sim_params$path,sim_params$xx_direct,sim_params$mm_direct) attrs <- list() attrs$edge <- list() attrs$edge$fontsize <- 12 edgeAttrs <- mediateR:::MakeedgeAttrs(sim_params$path_model,sim_params$xx_direct,sim_params$mm_direct)
fit <- ComputePath(dat, mmn = T)
gR2 <- mediateR:::MakeGraphNELObject(fit$path_model,fit$xx_direct,fit$mm_direct) attrs2 <- list() attrs2$edge <- list() attrs2$edge$fontsize <- 12 edgeAttrs2 <- mediateR:::MakeedgeAttrs(fit$path_model,fit$xx_direct,fit$mm_direct) par(mfcol=c(1,2)) plot(gR,edgeAttrs=edgeAttrs,attrs=attrs,main="True Graph") plot(gR2,edgeAttrs=edgeAttrs2,attrs=attrs2,main="Estimated Graph")
Given the simulation setting, we approximate true effects by approximating integral with large sample size use the maximum observed time as the restricted mean time.
sim_params2 <- sim_params sim_params2$n <- 1e4 # use many observations to approximate effects dat2 <- SimulateData(sim_params2) fit2 <- ComputePath(dat2, mmn = T) direct_t <- ComputeEffectxx(dat2,fit2,"direct",rmean=rmean) indirect_t <- ComputeEffectxx(dat2,fit2,"indirect",rmean=rmean) total_t <- ComputeEffectxx(dat2,fit2,"total",rmean=rmean)
Similarly, ComputeEffectxx
provides estimation of direct, indirect and total effect. rmean
, the mean survival restricted to r round(rmean)
, needs to be provided to calculate the effect.
direct <- ComputeEffectxx(dat,fit,"direct",rmean=rmean) indirect <- ComputeEffectxx(dat,fit,"indirect",rmean=rmean) total <- ComputeEffectxx(dat,fit,"total",rmean=rmean) eff_comb <- cbind(direct,direct_t,indirect,indirect_t,total,total_t) rownames(eff_comb) <- names(fit$xx_direct) colnames(eff_comb) <- c("est","true","est","true","est","true") xx1total_sample <- eff_comb[1,5] xx1total_true <- eff_comb[1,6]
Table below summarizes the Indirect, Direct, and Total effects.
kable(eff_comb,digits=2) %>% kable_styling(bootstrap_options = "striped", full_width = F) %>% add_header_above(c(" ", "direct" = 2, "indirect" = 2, "total" = 2))
B <- 1000 total_xx1 <- rep(0,length=B) for(ii in 1:B){ ix <- sample(1:sim_params$n,replace=TRUE) dat_boot <- mediateR:::SubsetDat(dat,ix) fit_boot <- ComputePath(dat_boot, mmn = T) total_xx1[ii] <- ComputeEffectxx(dat_boot,fit_boot,"total",rmean=rmean)[1] } L95 = round(quantile(total_xx1, 0.025),2) U95 = round(quantile(total_xx1, 1 - 0.025),2)
Figure below shows the histgram of total effects of Bootstrap samples for SNP1. The 95\% CI of the total effect for SNP1 is (r L95
, r U95
)
hist(total_xx1,main="Bootstrap Samples SNP1 Total Effect",xlab="Estimate") abline(v=xx1total_sample,lwd=3,col='blue') abline(v=xx1total_true,lwd=3,col='red') legend("topright",c("Point Estimate","True"),col=c("blue","red"),lwd=2)
Partial correlation between mediators adjusting for exposures and covariates
sim_params <- mediateR:::QuickSim(n,4,5,"gaussian") sim_params$family <- "cox" dat <- SimulateData(sim_params) dat <- SimulateData(sim_params) rmean <- max(as.matrix(dat$y)[,1]) fit <- ComputePath(dat, mmn = T) par1 = cov2cor(fit$cov_mm) colnames(par1) = rownames(par1) = names(fit$mm_direct) kable(par1,digits=2) %>% kable_styling(bootstrap_options = "striped", full_width = F)
Partial correlation between mediators adjusting for other mediators, exposures and covariates
par2 = fit$ivcor_mm colnames(par2) = rownames(par2) = names(fit$mm_direct) kable(par2,digits=2) %>% kable_styling(bootstrap_options = "striped", full_width = F)
Here we show how to compute confidence intervals and hypothesis tests using bootstrap sampling quantiles.
B <- 100 corrmm <- array(0,dim = c(B, ncol(dat$mm),ncol(dat$mm))) for(ii in 1:B){ ix <- sample(1:sim_params$n,replace=TRUE) dat_boot <- mediateR:::SubsetDat(dat,ix) fit_boot <- ComputePath(dat_boot, mmn = T) corrmm[ii,,] <- fit_boot$ivcor_mm } L95 = round(quantile(corrmm[,1,2], 0.025),2) U95 = round(quantile(corrmm[,1,2], 1 - 0.025),2)
The 95\% CI of partial correlation adjusting for other mediators, exposures and covariates between m1
and m2
is (r L95
, r U95
). 0 is included in this interval suggesting that the partial correlation between m1
and m2
is not significant.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.