knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(MASS) ## calculation of asymptotic covariance matrix for Odds ratio ### convert logit values to probabilities expit <- function(x){exp(x)/(1+exp(x))} ## calculate asymptotic version of (X^t W X) asymp_var_logistic <- function(n,G_prob,Gamma_0,Gamma_1){ N <- length(Gamma_0) disease_probs <- expit(outer(Gamma_0,c(1,1,1)) + outer(Gamma_1,c(0,1,2))) diag_weights <- disease_probs*(1-disease_probs) a <- n*apply(G_prob*diag_weights,1,sum) b <- n*apply(G_prob*diag_weights*matrix(rep(c(0,1,2),N),byrow=TRUE,nrow=N),1,sum) c <- b d <- n*apply(G_prob*diag_weights*matrix(rep(c(0,1,4),N),byrow=TRUE,nrow=N),1,sum) ## invert matrix and take element for Gamma1 return(a/(a*d-b*c)) } asymp_var_linear <- function(n,G_prob,sigma=1){ N <- nrow(G_prob) a <- n*apply(G_prob,1,sum) b <- n*apply(G_prob*matrix(rep(c(0,1,2),N),byrow=TRUE,nrow=N),1,sum) c <- b d <- n*apply(G_prob*matrix(rep(c(0,1,4),N),byrow=TRUE,nrow=N),1,sum) ## invert matrix and take element for Gamma1 return(a/(a*d-b*c)*sigma^2) } asymp_cov_linear_linear <- function(n_overlap,n_gx,n_gy,G_prob,sigma_x=1,sigma_y=1,cor_xy=.5){ N <- nrow(G_prob) cov_xy <- cor_xy*sigma_x*sigma_y a <- apply(G_prob,1,sum) b <- apply(G_prob*matrix(rep(c(0,1,2),N),byrow=TRUE,nrow=N),1,sum) c <- b d <- apply(G_prob*matrix(rep(c(0,1,4),N),byrow=TRUE,nrow=N),1,sum) ## invert matrix and take element for Gamma1 return(n_overlap*(a/(a*d-b*c))*cov_xy/(n_gx*n_gy)) } asymp_cov_linear_logistic <- function(n_overlap,n_gx,n_gy,G_prob,cor_xy=0,sigma=1,Gamma_0,Gamma_1,prev=0.01){ N <- length(Gamma_0) cov_xy <- cor_xy*sigma*sqrt(prev*(1-prev)) disease_probs <- expit(outer(Gamma_0,c(1,1,1)) + outer(Gamma_1,c(0,1,2))) diag_weights <- disease_probs*(1-disease_probs) a <- apply(G_prob*diag_weights,1,sum) b <- apply(G_prob*diag_weights*matrix(rep(c(0,1,2),N),byrow=TRUE,nrow=N),1,sum) c <- b d <- apply(G_prob*diag_weights*matrix(rep(c(0,1,4),N),byrow=TRUE,nrow=N),1,sum) ## invert matrix and take element for Gamma1 return(n_overlap*(a/(a*d-b*c))*cov_xy/(n_gx*n_gy)) } ################################################## Linear Logistic case ################################################### ## parameters N <- 100000 # number of snps to simulate (assumed in LD and HWE) prev <- 0.1 # disease prevalence gamma1 <- rnorm(N,mean=0,sd=0.3) # true instrument strengths beta <- 0.1 # causal effect OR <- exp(beta*gamma1) ## true odds ratio (G-Y association) MAF <- seq(from=.01,to=.5,length=N) # minor allele frequency n_G_X <- 100000 # sample size G-X (# individuals genotyped on G with X phenotype) n_G_Y <- 200000 # sample size G-Y (# individuals genotyped on G with Y phenotype) overlap <- 100000 # shared sample size sigma_x=1 #variation in trait cor_xy=0.1 # correlation between x and y on same person ## assuming HWE G_prob <- cbind(MAF^2,2*MAF*(1-MAF),(1-MAF)^2) ## find Gamma0 for logistic model so prevalence is 0.1 Gamma1 <- log(OR) # G, Y association on log-odds scale myfunc <- function(MAF,Gamma0, Gamma1, prev){ MAF^2*expit(Gamma0+Gamma1*2)+2*MAF*(1-MAF)*expit(Gamma0+Gamma1)+(1-MAF)^2*expit(Gamma0)-prev } Gamma0 <- numeric(N) for(i in 1:N) Gamma0[i] <- uniroot(myfunc,Gamma1=Gamma1[i],MAF=MAF[i],prev=prev,lower=-10, upper=10)$root var_Gamma_y <- asymp_var_logistic(n_G_Y,G_prob,Gamma0,Gamma1) var_gamma_x <- asymp_var_linear(n_G_X,G_prob,sigma=sigma_x) cov_gamma_x_Gamma_y <- asymp_cov_linear_logistic(overlap, n_G_X,n_G_Y,G_prob,cor_xy=cor_xy,sigma=sigma_x,Gamma_0=Gamma0,Gamma_1=Gamma1,prev=prev) ## now what if G-X and G-Y GWASs share sample overlap (simulate joint association) ## simulate summary statistics ## create array, first 2 dimensions represent individual covariance matrices, 3rd dimension indexing SNPs library(MASS) cov_array <- array(dim=c(2,2,length(var_gamma_x))) cov_array[1,1,] <- var_gamma_x cov_array[2,1,] <- cov_gamma_x_Gamma_y cov_array[1,2,] <- cov_array[2,1,] cov_array[2,2,] <- var_Gamma_y summary_stats <- apply(cov_array,3,function(x){mvrnorm(n=1,mu=c(0,0),Sigma=x)}) summary_stats <- t(summary_stats + rbind(gamma1,Gamma1)) ## checking correlation cor(summary_stats-cbind(gamma1,Gamma1)) ################################################## Linear Linear case ################################################### ## parameters N <- 100000 # number of snps to simulate (assumed in LD and HWE) prev <- 0.1 # disease prevalence beta <- 0.1 # causal effect of X and Y gamma1 <- rnorm(N,mean=0,sd=0.3) # true instrument strengths Gamma1 <- beta*gamma1 # true G/Y associations MAF <- seq(from=.01,to=.5,length=N) # minor allele frequency n_G_X <- 100000 # sample size G-X n_G_Y <- 200000 # sample size G-Y overlap <- 100000 # shared sample size (must be less than or equal to min(n_G_X,n_G_Y)) sigma_x=1 #variation in trait sigma_y=1 #variation in outcome cor_xy=0.1 # correlation between x and y on same person # note if there is no confounding this will be beta*sqrt(var_X)/sqrt(var_Y) ## assuming HWE G_prob <- cbind(MAF^2,2*MAF*(1-MAF),(1-MAF)^2) var_Gamma_y <- asymp_var_linear(n_G_Y,G_prob,sigma=sigma_y) var_gamma_x <- asymp_var_linear(n_G_X,G_prob,sigma=sigma_x) cov_gamma_x_Gamma_y <- asymp_cov_linear_linear(overlap, n_G_X,n_G_Y,G_prob,sigma_x=sigma_x,sigma_y=sigma_y,cor_xy=cor_xy) ## now what if G-X and G-Y GWASs share sample overlap (simulate joint association) ## simulate summary statistics ## create array, first 2 dimensions represent individual covariance matrices, 3rd dimension indexing SNPs cov_array <- array(dim=c(2,2,length(var_gamma_x))) cov_array[1,1,] <- var_gamma_x cov_array[2,1,] <- cov_gamma_x_Gamma_y cov_array[1,2,] <- cov_array[2,1,] cov_array[2,2,] <- var_Gamma_y summary_stats <- apply(cov_array,3,function(x){mvrnorm(n=1,mu=c(0,0),Sigma=x)}) summary_stats <- t(summary_stats + rbind(gamma1,Gamma1)) ## checking correlation cor(summary_stats-cbind(gamma1,Gamma1))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.