#' This is the script to directly simulate GWAS summary statistics without simulating the individual genotypes or phenotypes.
#' Effect size beta's are generated using the 4-component mixture of T distributions.
#' For a description, see Qi and Chatterjee, Nature Communcations 2019 - Methods - Simulation setup - Scenario A.
#'
#' Usage: run the following line on Terminal command line
#' Rscript scenarioC_4comp_Tdistri.R arg1 arg2 arg3 arg4 arg5
#'
#' arg1-5 are command line arguments to be passed to the script.
#' arg1 indexes Nvec and can take values 1, 2, 3, 4, 5. N=Nvec[arg1]. For example, arg1=2 means N=1e5 and arg1=3 means N=2e5.
#' arg2 indexes pi1vec and can take values 1, 2. pi1=pi1vec[arg2]. arg2=1 means pi1=0.005 and arg2=2 means pi1=0.01.
#' arg3 indexes pthrvec and can take values 1, 2, 3, 4. pthr=pthrvec[arg3]. For example, arg3=1 means pthr=0.005 and arg3=4 means pthr=5e-8.
#' arg4 indexes thetavec and can take values 1, 2. theta=thetavec[arg4]. For example, arg4=2 means theta=0.2.
#' arg5 indexes NxNy_ratio_vec and can take values 1, 2. NxNy_ratio=NxNy_ratio_vec[arg5]. For example, arg5=2 means NxNy_ratio=3.
#'
#' Examples of usage:
#' N=1e5, 50% valid IVs (pi1=0.01), instrument selection threshold p<5e-8, causal effect 0.2, ny=nx/3
#' Rscript scenarioC_4comp_Tdistri.R 2 2 4 2 2
#'
#' One hundred simulations are generated by this script. We recommend running it on a computing cluster due to substantial computation time.
rm(list = ls())
library(MASS)
library(MendelianRandomization)
library(mvtnorm)
library(MRMix)
Nvec = c(5e4, 1e5, 2e5, 5e5, 1e6)
pi1vec = c(0.005, 0.01)
pthrvec = c(0.005, 5e-4, 5e-6, 5e-8)
thetavec = c(-0.2, 0.2)
NxNy_ratio_vec = c(1,3)
temp = as.integer(commandArgs(trailingOnly = TRUE))
N = Nvec[temp[1]] # Sample size of exposure X
pi1 = pi1vec[temp[2]] # Proportion of valid instruments in the genome
pthr = pthrvec[temp[3]] # P-value threshold for instrument selection
theta = thetavec[temp[4]] # Causal effect of X on Y
NxNy_ratio = NxNy_ratio_vec[temp[5]] # Ratio of sample size for X and Y: nx/ny
M = 2e5 # Number of independent SNPs
# Simlation model parameters
pi2 = 0.02-pi1; pi3 = 0.01; pi4 = 1-pi1-pi2-pi3
sigma2x = 5e-5; sigma2y = 5e-5; rho = 0.5
print(paste("N", N, "pthr", pthr, "pi1", pi1, "theta", theta, "NxNy_ratio", NxNy_ratio))
nx = N; ny = N/NxNy_ratio # Sample size of X and Y
nx.rev = ny; ny.rev = nx # Sample size for reverse directional analysis
est = matrix(NA, nrow = 100, ncol = 12)
colnames(est) = c("MRmix", "IVW", "w_median", "w_mode", "Egger", "numIV",
"MRmix_rev", "IVW_rev", "w_median_rev", "w_mode_rev", "Egger_rev", "numIV_rev")
for (repind in 1:100){
set.seed(1245*repind)
print(paste("rep",repind))
# Generate SNP indices of the 4 components
ind1 = sample(M, round(M*pi1))
ind2 = sample(setdiff(1:M,ind1), round(M*pi2))
ind3 = sample(setdiff(1:M,c(ind1,ind2)), round(M*pi3))
# Generate true effect size
beta = matrix(rep(0,2*M), ncol = 2)
beta[ind1,1] = sqrt(sigma2x)*rt(length(ind1), df=20)
beta[ind1,2] = theta*beta[ind1,1]
beta[ind2,] = rmvt(length(ind2), sigma = matrix(c(sigma2x,rho*sqrt(sigma2x*sigma2y),rho*sqrt(sigma2x*sigma2y),sigma2y),nrow=2), df = 20)
beta[ind2,2] = beta[ind2,2] + theta*beta[ind2,1]
beta[ind3,2] = sqrt(sigma2y)*rt(length(ind3), df=20)
# Generate observed GWAS summary statistics
betahat.x = beta[,1] + rnorm(M, mean = 0, sd = sqrt(1/nx))
betahat.y = beta[,2] + rnorm(M, mean = 0, sd = sqrt(1/ny))
betahat.x.rev = betahat.y
betahat.y.rev = betahat.x
# Instrument selection
ind_filter = which(2*(1-pnorm(sqrt(nx)*abs(betahat.x)))<pthr)
betahat.x = betahat.x[ind_filter]
betahat.y = betahat.y[ind_filter]
print(paste("numIV",length(ind_filter)))
est[repind,6] = length(ind_filter)
# MR analysis
if (est[repind,6]>2) {
mr.obj = mr_input(bx = betahat.x, bxse = rep(1/sqrt(nx), length(betahat.x)),
by = betahat.y, byse = rep(1/sqrt(ny), length(betahat.y)))
IVW = mr_ivw(mr.obj)
med = mr_median(mr.obj)
mod = mr_mbe(mr.obj, iterations = 100)
egger = mr_egger(mr.obj)
est[repind,2] = IVW$Estimate
est[repind,3] = med$Estimate
est[repind,4] = mod$Estimate
est[repind,5] = egger$Estimate
theta_temp_vec = seq(-0.49,0.5,by=0.01)
theta_est = MRMix(betahat.x, betahat.y, sx=1/sqrt(nx), sy=1/sqrt(ny), theta_temp_vec, pi_init = 0.6, sigma_init = 1e-5)$theta
}
est[repind,1] = theta_est
# Reverse direction analysis
ind_filter = which(2*(1-pnorm(sqrt(nx.rev)*abs(betahat.x.rev)))<pthr)
betahat.x.rev = betahat.x.rev[ind_filter]
betahat.y.rev = betahat.y.rev[ind_filter]
print(paste("Reverse numIV",length(ind_filter)))
est[repind,12] = length(ind_filter)
if (est[repind,12]>2) {
mr.obj = mr_input(bx = betahat.x.rev, bxse = rep(1/sqrt(nx.rev), length(betahat.x.rev)),
by = betahat.y.rev, byse = rep(1/sqrt(ny.rev), length(betahat.y.rev)))
IVW = mr_ivw(mr.obj)
med = mr_median(mr.obj)
mod = mr_mbe(mr.obj, iterations = 100)
egger = mr_egger(mr.obj)
est[repind,8] = IVW$Estimate
est[repind,9] = med$Estimate
est[repind,10] = mod$Estimate
est[repind,11] = egger$Estimate
theta_temp_vec = seq(-0.49,0.5,by=0.01)
theta_est = MRMix(betahat.x.rev, betahat.y.rev, sx=1/sqrt(nx.rev), sy=1/sqrt(ny.rev), theta_temp_vec, pi_init = 0.6, sigma_init = 1e-5)$theta
est[repind,7] = theta_est
}
if (repind%%5==0){
save(est, file = paste0("MRmix_est_N",N,"_pi1_",pi1,"_pthr",pthr,"_theta",theta,"_NxNy",NxNy_ratio,".rda"))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.