knitr::opts_chunk$set(echo = TRUE, warning = F,message = F)
options(knitr.kable.NA = '')
options(scipen = 999)
library(graph)
library(kableExtra)
library(mediateR)
set.seed(280920181)

\newcommand{\Var}{\text{Var}} \newcommand{\E}{\mathbb{E}} \newcommand{\argmin}[1]{\underset{#1}{\operatorname{argmin}}\text{ }} \newcommand{\argmax}[1]{\underset{#1}{\operatorname{argmax}}} \newcommand{\ind}[1]{\mathbf{1}_{#1}} \newcommand{\todo}[1]{\color{red}{\text{TODO: #1}}}

Data Structure

The code contains functions to simulate data for mediation analysis.

params <- mediateR:::QuickSim(5,3,2,"gaussian")
dat <- SimulateData(params)
dat

The dat list with named elements (r names(dat)) is used as an argument for functions which estimate the direct and indirect effects.

One can manually create a list with these elements (such as when fitting models to new data) or rely on the simulation function SimulateData to create the structure. The argument to SimulateData has parameters controlling the sample size, coefficient estimates, number of mediators, etc.

Mediation Analysis

The goal is to assess the causal impact of dat$xx on dat$y and quantify how much of this effect passes through dat$mm, adjusting for covariates dat$co.

Linear

We simulate n observations from the "True Graph" using independent, binary SNPs drawn from Bernoulli(1/2) and path coefficients given below.

n <- 500
sim_params <- mediateR:::QuickSim(n,3,2,"gaussian")
dat <- SimulateData(sim_params)
gR <- mediateR:::MakeGraphNELObject(sim_params$path,sim_params$xx_direct,sim_params$mm_direct)
attrs <- list()
attrs$edge <- list()
attrs$edge$fontsize <- 12
edgeAttrs <- mediateR:::MakeedgeAttrs(sim_params$path_model,sim_params$xx_direct,sim_params$mm_direct)

Model fitting

ComputePath is used to fit the mediation analysis framework for various response, where dat are the datalist with the format mentioned above, mmn=F assuming independent mediators and reg=T is used for high-dimensional mediators (default is false).

If one makes the assumption of conditionally independent mediators given $\mathbf{x}$ and $\mathbf{c}$, i.e., $m_j \perp !!! \perp m_k | \mathbf{x},\mathbf{c}$, for all $j,k$, then $\mathbf{\Sigma}_\epsilon$ is diagonal and can be estimated using the error variances from separate univariate regressions, $m_j | (x,c)$ for all $j$. This estimate will be more efficient, but carries more assumptions.

fit <- ComputePath(dat, mmn = T, reg = F)
eff_est <- ComputeEffectsLinear(fit)

In linear models, the direct effect is the path coefficient $x \rightarrow m$ (xx_direct), and indirect effect is the product of path coefficients $x\rightarrow m$ (path_model), and $m \rightarrow y$ (mm_direct) .

gR2 <- mediateR:::MakeGraphNELObject(fit$path_model,fit$xx_direct,fit$mm_direct)
attrs2 <- list()
attrs2$edge <- list()
attrs2$edge$fontsize <- 12
edgeAttrs2 <- mediateR:::MakeedgeAttrs(fit$path_model,fit$xx_direct,fit$mm_direct)
par(mfcol=c(1,2))
plot(gR,edgeAttrs=edgeAttrs,attrs=attrs,main="True Graph")
plot(gR2,edgeAttrs=edgeAttrs2,attrs=attrs2,main="Estimated Graph")
eff <- ComputeEffectsLinear(sim_params)
eff_comb <- matrix(0,nrow(eff),ncol=2*ncol(eff))
for(ii in 1:ncol(eff)){
  eff_comb[,2*ii-1] <- eff_est[,ii]
  eff_comb[,2*ii] <- eff[,ii]
}
for(ii in (ncol(dat$xx)+1):(ncol(dat$xx)+ncol(dat$mm))){
  eff_comb[ii,3:ncol(eff_comb)] <- NA
}
colnames(eff_comb) <- rep(c("est","true"),ncol(eff))
rownames(eff_comb) <- rownames(eff)

Table below summarizes the Indirect, Direct, and Total effects.

kable(eff_comb,digits=2) %>%
  kable_styling(bootstrap_options = "striped", full_width = F) %>%
  add_header_above(c(" ", "direct" = 2, "indirect" = 2, "total" = 2))

Bootstrap

We propose computing confidence intervals and hypothesis tests using bootstrap sampling quantiles.

B <- 1000
eff_est_boot <- vector("list",length=B)
for(ii in 1:B){
  ix <- sample(1:sim_params$n,replace=TRUE)
  dat_sub <- mediateR:::SubsetDat(dat,ix)
  fit <- ComputePath(dat_sub, mmn = T)
  eff_est_boot[[ii]] <- ComputeEffectsLinear(fit)
}

total_xx1 <- vapply(eff_est_boot,function(x){x[1,3]},c(0))
L95 = round(quantile(total_xx1, 0.025),2)
U95 = round(quantile(total_xx1, 1 - 0.025),2)

Figure below shows the histgram of total effects of Bootstrap samples for SNP1. The 95\% CI of the total effect for SNP1 is (r L95, r U95)

hist(total_xx1,main="Bootstrap Samples SNP1 Total Effect",xlab="Estimate")
abline(v=eff[1,3],lwd=3,col='red')
abline(v=eff_est[1,3],lwd=3,col='blue')
legend("topright",c("Truth","Point Estimate"),col=c("red","blue"),lwd=2)

Logistic

Same as simulation 1 but with binomial link function connecting y with snps and gene sets.

sim_params$family <- "binomial"
dat <- SimulateData(sim_params)
gR <- mediateR:::MakeGraphNELObject(sim_params$path,sim_params$xx_direct,sim_params$mm_direct)
attrs <- list()
attrs$edge <- list()
attrs$edge$fontsize <- 12
edgeAttrs <- mediateR:::MakeedgeAttrs(sim_params$path_model,sim_params$xx_direct,sim_params$mm_direct)

Model Fitting

fit <- ComputePath(dat, mmn = T)
gR2 <- mediateR:::MakeGraphNELObject(fit$path_model,fit$xx_direct,fit$mm_direct)
attrs2 <- list()
attrs2$edge <- list()
attrs2$edge$fontsize <- 12
edgeAttrs2 <- mediateR:::MakeedgeAttrs(fit$path_model,fit$xx_direct,fit$mm_direct)
par(mfcol=c(1,2))
plot(gR,edgeAttrs=edgeAttrs,attrs=attrs,main="True Graph")
plot(gR2,edgeAttrs=edgeAttrs2,attrs=attrs2,main="Estimated Graph")

Given the simulation setting, we can approximate the true effects by approximating integral with large sample size by setting n to million observations.

sim_params2 <- sim_params
sim_params2$n <- 1e6 
dat2 <- SimulateData(sim_params2)
direct_t <- ComputeEffectxx(dat2,sim_params,"direct")
indirect_t <- ComputeEffectxx(dat2,sim_params,"indirect")
total_t <- ComputeEffectxx(dat2,sim_params,"total")

ComputeEffectxx provides estimation of direct, indirect and total effect.

direct <- ComputeEffectxx(dat,fit,"direct")
indirect <- ComputeEffectxx(dat,fit,"indirect")
total <- ComputeEffectxx(dat,fit,"total")
eff_comb <- cbind(direct,direct_t,indirect,indirect_t,total,total_t)
rownames(eff_comb) <- names(fit$xx_direct)
colnames(eff_comb) <- c("est","true","est","true","est","true")
xx1total_sample <- eff_comb[1,5]
xx1total_true <- eff_comb[1,6]

Table below summarizes the Indirect, Direct, and Total effects.

kable(eff_comb,digits=2) %>%
  kable_styling(bootstrap_options = "striped", full_width = F) %>%
  add_header_above(c(" ", "direct" = 2, "indirect" = 2, "total" = 2))

Bootstrap

B <- 1000
total_xx1 <- rep(0,length=B)
for(ii in 1:B){
  ix <- sample(1:sim_params$n,replace=TRUE)
  dat_boot <- mediateR:::SubsetDat(dat,ix)
  fit_boot <- ComputePath(dat_boot, mmn = T)
  total_xx1[ii] <- ComputeEffectxx(dat_boot,fit_boot,"total")[1]
}
L95 = round(quantile(total_xx1, 0.025),2)
U95 = round(quantile(total_xx1, 1 - 0.025),2)

Figure below shows the histgram of total effects of Bootstrap samples for SNP1. The 95\% CI of the total effect for SNP1 is (r L95, r U95)

hist(total_xx1,main="Bootstrap Samples SNP1 Total Effect",xlab="Estimate")
abline(v=xx1total_sample,lwd=3,col='blue')
abline(v=xx1total_true,lwd=3,col='red')
legend("topright",c("Point Estimate","True"),col=c("blue","red"),lwd=2)

Survival

Same as simulation 1 and 3 but with exponentially distributed survival times. The rate is $e^{\beta^Tx}$ where $x$ are the covariates. This model can be fit with Cox Proportional Hazards.

sim_params$family <- "cox"
dat <- SimulateData(sim_params)
rmean <- max(as.matrix(dat$y)[,1])

Model Fitting

Same as simulation 1 and 3 but with exponentially distributed survival times. The rate is $e^{\beta^Tx}$ where $x$ are the covariates. This model can be fit with Cox Proportional Hazards. We compute the effects on mean survival restricted to r round(rmean).

gR <- mediateR:::MakeGraphNELObject(sim_params$path,sim_params$xx_direct,sim_params$mm_direct)
attrs <- list()
attrs$edge <- list()
attrs$edge$fontsize <- 12
edgeAttrs <- mediateR:::MakeedgeAttrs(sim_params$path_model,sim_params$xx_direct,sim_params$mm_direct)
fit <- ComputePath(dat, mmn = T)
gR2 <- mediateR:::MakeGraphNELObject(fit$path_model,fit$xx_direct,fit$mm_direct)
attrs2 <- list()
attrs2$edge <- list()
attrs2$edge$fontsize <- 12
edgeAttrs2 <- mediateR:::MakeedgeAttrs(fit$path_model,fit$xx_direct,fit$mm_direct)
par(mfcol=c(1,2))
plot(gR,edgeAttrs=edgeAttrs,attrs=attrs,main="True Graph")
plot(gR2,edgeAttrs=edgeAttrs2,attrs=attrs2,main="Estimated Graph")

Given the simulation setting, we approximate true effects by approximating integral with large sample size use the maximum observed time as the restricted mean time.

sim_params2 <- sim_params
sim_params2$n <- 1e4 # use many observations to approximate effects
dat2 <- SimulateData(sim_params2)
fit2 <- ComputePath(dat2, mmn = T) 

direct_t <- ComputeEffectxx(dat2,fit2,"direct",rmean=rmean)
indirect_t <- ComputeEffectxx(dat2,fit2,"indirect",rmean=rmean)
total_t <- ComputeEffectxx(dat2,fit2,"total",rmean=rmean)

Similarly, ComputeEffectxx provides estimation of direct, indirect and total effect. rmean, the mean survival restricted to r round(rmean), needs to be provided to calculate the effect.

direct <- ComputeEffectxx(dat,fit,"direct",rmean=rmean)
indirect <- ComputeEffectxx(dat,fit,"indirect",rmean=rmean)
total <- ComputeEffectxx(dat,fit,"total",rmean=rmean)
eff_comb <- cbind(direct,direct_t,indirect,indirect_t,total,total_t)
rownames(eff_comb) <- names(fit$xx_direct)
colnames(eff_comb) <- c("est","true","est","true","est","true")
xx1total_sample <- eff_comb[1,5]
xx1total_true <- eff_comb[1,6]

Table below summarizes the Indirect, Direct, and Total effects.

kable(eff_comb,digits=2) %>%
  kable_styling(bootstrap_options = "striped", full_width = F) %>%
  add_header_above(c(" ", "direct" = 2, "indirect" = 2, "total" = 2))

Bootstrap

B <- 1000
total_xx1 <- rep(0,length=B)
for(ii in 1:B){
  ix <- sample(1:sim_params$n,replace=TRUE)
  dat_boot <- mediateR:::SubsetDat(dat,ix)
  fit_boot <- ComputePath(dat_boot, mmn = T)
  total_xx1[ii] <- ComputeEffectxx(dat_boot,fit_boot,"total",rmean=rmean)[1]
}
L95 = round(quantile(total_xx1, 0.025),2)
U95 = round(quantile(total_xx1, 1 - 0.025),2)

Figure below shows the histgram of total effects of Bootstrap samples for SNP1. The 95\% CI of the total effect for SNP1 is (r L95, r U95)

hist(total_xx1,main="Bootstrap Samples SNP1 Total Effect",xlab="Estimate")
abline(v=xx1total_sample,lwd=3,col='blue')
abline(v=xx1total_true,lwd=3,col='red')
legend("topright",c("Point Estimate","True"),col=c("blue","red"),lwd=2)

Estimation of mediator-mediator relations.

Partial correlation between mediators adjusting for exposures and covariates

sim_params <- mediateR:::QuickSim(n,4,5,"gaussian")
sim_params$family <- "cox"
dat <- SimulateData(sim_params)

dat <- SimulateData(sim_params)
rmean <- max(as.matrix(dat$y)[,1])

fit <- ComputePath(dat, mmn = T)
par1 = cov2cor(fit$cov_mm)
colnames(par1) = rownames(par1) = names(fit$mm_direct)
kable(par1,digits=2) %>%
  kable_styling(bootstrap_options = "striped", full_width = F)

Partial correlation between mediators adjusting for other mediators, exposures and covariates

par2 = fit$ivcor_mm
colnames(par2) = rownames(par2) = names(fit$mm_direct)
kable(par2,digits=2) %>%
  kable_styling(bootstrap_options = "striped", full_width = F)

Here we show how to compute confidence intervals and hypothesis tests using bootstrap sampling quantiles.

B <- 100
corrmm <- array(0,dim = c(B, ncol(dat$mm),ncol(dat$mm)))
for(ii in 1:B){
  ix <- sample(1:sim_params$n,replace=TRUE)
  dat_boot <- mediateR:::SubsetDat(dat,ix)
  fit_boot <- ComputePath(dat_boot, mmn = T)
  corrmm[ii,,] <- fit_boot$ivcor_mm
}

L95 = round(quantile(corrmm[,1,2], 0.025),2)
U95 = round(quantile(corrmm[,1,2], 1 - 0.025),2)

The 95\% CI of partial correlation adjusting for other mediators, exposures and covariates between m1 and m2 is (r L95, r U95). 0 is included in this interval suggesting that the partial correlation between m1 and m2 is not significant.



longjp/mediateR documentation built on May 24, 2023, 12:30 p.m.