simulations/scenarioA_4comp.R

#' This is the script to directly simulate GWAS summary statistics without simulating the individual genotypes or phenotypes.
#' Effect size beta's are generated using the 4-component model.
#' For a description, see Qi and Chatterjee, Nature Communcations 2019 - Methods - Simulation setup - Scenario A.
#'
#' Usage: run the following line on Terminal command line
#' Rscript scenarioA_4comp.R arg1 arg2 arg3 arg4 arg5
#'
#' arg1-5 are command line arguments to be passed to the script.
#' arg1 indexes Nvec and can take values 1, 2, 3, 4, 5. N=Nvec[arg1]. For example, arg1=2 means N=1e5 and arg1=3 means N=2e5.
#' arg2 indexes pi1vec and can take values 1, 2. pi1=pi1vec[arg2]. arg2=1 means pi1=0.005 and arg2=2 means pi1=0.01.
#' arg3 indexes pthrvec and can take values 1, 2, 3, 4. pthr=pthrvec[arg3]. For example, arg3=1 means pthr=0.005 and arg3=4 means pthr=5e-8.
#' arg4 indexes thetavec and can take values 1, 2, 3. theta=thetavec[arg4]. For example, arg4=3 means theta=0.2.
#' arg5 indexes NxNy_ratio_vec and can take values 1, 2, 3, 4, 5. NxNy_ratio=NxNy_ratio_vec[arg5]. For example, arg5=4 means NxNy_ratio=4.
#'
#' Examples of usage:
#' N=1e5, 50% valid IVs (pi1=0.01), instrument selection threshold p<5e-8, causal effect 0.2, ny=nx/3
#' Rscript scenarioA_4comp.R 2 2 4 3 3
#'
#' One hundred simulations are generated by this script. We recommend running it on a computing cluster due to substantial computation time.

rm(list = ls())
library(MASS)
library(MendelianRandomization)
library(MRMix)

Nvec = c(5e4, 1e5, 2e5, 5e5, 1e6)
pi1vec = c(0.005, 0.01)
pthrvec = c(0.005, 5e-4, 5e-6, 5e-8)
thetavec = c(-0.2, 0, 0.2)
NxNy_ratio_vec = 1:5

temp = as.integer(commandArgs(trailingOnly = TRUE))
N = Nvec[temp[1]] # Sample size of exposure X
pi1 = pi1vec[temp[2]] # Proportion of valid instruments in the genome
pthr = pthrvec[temp[3]] # P-value threshold for instrument selection
theta = thetavec[temp[4]] # Causal effect of X on Y
NxNy_ratio = NxNy_ratio_vec[temp[5]] # Ratio of sample size for X and Y: nx/ny

M = 2e5 # Number of independent SNPs
# Simlation model parameters
pi2 = 0.02-pi1; pi3 = 0.01; pi4 = 1-pi1-pi2-pi3
sigma2x = 5e-5; sigma2y = 5e-5; rho = 0.5

print(paste("N", N, "pthr", pthr, "pi1", pi1, "theta", theta, "NxNy_ratio", NxNy_ratio))

nx = N; ny = N/NxNy_ratio # Sample size of X and Y
nx.rev = ny; ny.rev = nx # Sample size for reverse directional analysis

est = matrix(NA, nrow = 100, ncol = 12)
colnames(est) = c("MRmix", "IVW", "w_median", "w_mode", "Egger", "numIV",
                  "MRmix_rev", "IVW_rev", "w_median_rev", "w_mode_rev", "Egger_rev", "numIV_rev")

for (repind in 1:100){
    set.seed(1245*repind)
    print(paste("rep",repind))

    # Generate SNP indices of the 4 components
    ind1 = sample(M, round(M*pi1))
    ind2 = sample(setdiff(1:M,ind1), round(M*pi2))
    ind3 = sample(setdiff(1:M,c(ind1,ind2)), round(M*pi3))

    # Generate true effect size
    beta = matrix(rep(0,2*M), ncol = 2)
    beta[ind1,1] = rnorm(length(ind1), mean = 0, sd = sqrt(sigma2x))
    beta[ind1,2] = theta*beta[ind1,1]
    beta[ind2,] = mvrnorm(length(ind2), mu = c(0,0), Sigma = matrix(c(sigma2x,rho*sqrt(sigma2x*sigma2y),rho*sqrt(sigma2x*sigma2y),sigma2y),nrow=2))
    beta[ind2,2] = beta[ind2,2] + theta*beta[ind2,1]
    beta[ind3,2] = rnorm(length(ind3), mean = 0, sd = sqrt(sigma2y))

    # Generate observed GWAS summary statistics
    betahat.x = beta[,1] + rnorm(M, mean = 0, sd = sqrt(1/nx))
    betahat.y = beta[,2] + rnorm(M, mean = 0, sd = sqrt(1/ny))
    betahat.x.rev = betahat.y
    betahat.y.rev = betahat.x

    # Instrument selection
    ind_filter = which(2*(1-pnorm(sqrt(nx)*abs(betahat.x)))<pthr)
    betahat.x = betahat.x[ind_filter]
    betahat.y = betahat.y[ind_filter]

    print(paste("numIV",length(ind_filter)))

    est[repind,6] = length(ind_filter)

    # MR analysis
    if (est[repind,6]>2) {
        mr.obj = mr_input(bx = betahat.x, bxse = rep(1/sqrt(nx), length(betahat.x)),
                          by = betahat.y, byse = rep(1/sqrt(ny), length(betahat.y)))
        IVW = mr_ivw(mr.obj)
        med = mr_median(mr.obj)
        mod = mr_mbe(mr.obj, iterations = 100)
        egger = mr_egger(mr.obj)

        est[repind,2] = IVW$Estimate
        est[repind,3] = med$Estimate
        est[repind,4] = mod$Estimate
        est[repind,5] = egger$Estimate

        theta_temp_vec = seq(-0.49,0.5,by=0.01)
        theta_est = MRMix(betahat.x, betahat.y, 1/sqrt(nx), 1/sqrt(ny), theta_temp_vec, pi_init = 0.6, sigma_init = 1e-5)
    }
    est[repind,1] = theta_est$theta

    # Reverse direction analysis
    ind_filter = which(2*(1-pnorm(sqrt(nx.rev)*abs(betahat.x.rev)))<pthr)
    betahat.x.rev = betahat.x.rev[ind_filter]
    betahat.y.rev = betahat.y.rev[ind_filter]

    print(paste("Reverse numIV",length(ind_filter)))

    est[repind,12] = length(ind_filter)

    if (est[repind,12]>2) {
        mr.obj = mr_input(bx = betahat.x.rev, bxse = rep(1/sqrt(nx.rev), length(betahat.x.rev)),
                          by = betahat.y.rev, byse = rep(1/sqrt(ny.rev), length(betahat.y.rev)))
        IVW = mr_ivw(mr.obj)
        med = mr_median(mr.obj)
        mod = mr_mbe(mr.obj, iterations = 100)
        egger = mr_egger(mr.obj)

        est[repind,8] = IVW$Estimate
        est[repind,9] = med$Estimate
        est[repind,10] = mod$Estimate
        est[repind,11] = egger$Estimate

        theta_temp_vec = seq(-0.49,0.5,by=0.01)
        theta_est = MRMix(betahat.x.rev, betahat.y.rev, 1/sqrt(nx.rev), 1/sqrt(ny.rev), theta_temp_vec, pi_init = 0.6, sigma_init = 1e-5)
        est[repind,7] = theta_est$theta
    }

    save(est, file = paste0("MRmix_est_N",N,"_pi1_",pi1,"_pthr",pthr,"_theta",theta,"_NxNy",NxNy_ratio,".rda"))
}
gqi/MRMix documentation built on Jan. 30, 2020, 1:35 a.m.