knitr::opts_chunk$set(echo = FALSE,warning = FALSE) set.seed(1)
## Load packages library(devtools) library(dplyr) library(ggplot2) library(cowplot) library(xtable) load_all('.') # library('gws') library(moiR) library(bigsnpr) library(bigstatsr)
### Real data # genomes<-readRDS("genome.rda") data(genomes) G <- genomes$genotypes X.random = inputena.mat(G[,sample(1:ncol(G),size = 600)]) N=nrow(X.random) M=ncol(X.random) qplot(fn(X.random), xlab="allele dossage",main="")
X.window =inputena.mat(G[,c(1:600)]) # Instead of sampling, I get them consecutive N=nrow(X.window) M=ncol(X.window) qplot(fn(X.window), xlab="allele dossage",main="")
eff.gauss=rnorm(M,mean = 0,sd = 1) qplot(eff.gauss,geom="histogram",main="Simulated weak Gaussian SNP effects") Y.random.gauss=meanvarcent(X.random %*% eff.gauss) Y.window.gauss=meanvarcent(X.window %*% eff.gauss)
prob=0.005 eff.skewed=sample(c(-1,0,1),replace = TRUE, prob = c(prob,1-2*prob,prob),size = M) # eff.skewed=rexp(n=M,rate = 10e6) qplot(eff.skewed,geom="histogram",main="Simulated discrete large SNP effects") Y.random.skewed=meanvarcent(X.random %*% eff.skewed) Y.window.skewed=meanvarcent(X.window %*% eff.skewed) Y.window.skewed2=meanvarcent((X.window %*% eff.skewed ) *(X.window %*% eff.skewed )) Y.window.skewed2=meanvarcent((X.window %*% eff.skewed ) *(X.window %*% eff.skewed )) plot(Y.window.skewed,Y.window.skewed2) Y.window.mixture= meanvarcent((X.window %*% eff.skewed ) + (X.window %*% (eff.gauss/10) )) Y.window.mixture= meanvarcent((X.window %*% eff.skewed ) * (X.window %*% (eff.gauss/10) )) X.window %*% eff.skewed
b.random.gauss=cgwa(X.random,Y.random.gauss) b.random.skewed=cgwa(X.random,Y.random.skewed) b.window.gauss=cgwa(X.window,Y.window.gauss) b.window.skewed=cgwa(X.window,Y.window.skewed) bgwa.random.gauss=gwa(X.random,Y.random.gauss) bgwa.random.skewed=gwa(X.random,Y.random.skewed) bgwa.window.gauss=gwa(X.window,Y.random.gauss) bgwa.window.skewed=gwa(X.window,Y.random.skewed) blasso.random.gauss=lassogwa(X.random,Y.random.gauss) blasso.random.skewed=lassogwa(X.random,Y.random.skewed) blasso.window.gauss=lassogwa(X.window,Y.window.gauss) blasso.window.skewed=lassogwa(X.window,Y.window.skewed) bridge.random.gauss=ridgegwa(X.random,Y.random.gauss) bridge.random.skewed=ridgegwa(X.random,Y.random.skewed) bridge.window.gauss=ridgegwa(X.window,Y.window.gauss) bridge.window.skewed=ridgegwa(X.window,Y.window.skewed) plot(ridgegwa(X.random,Y.random.skewed) ,eff.skewed) plot(ridgegwa(X.random,Y.random.skewed,type = "ld") ,eff.skewed,lambda=100) plot( ridgegwa(X.random,Y.random.skewed,type = "penalized",k = 100,x0 = 0.99,lambda = 1) ,eff.skewed) plot( ridgegwa(X.window,Y.window.skewed,type = "penalized",k = 100,x0 = 0.9,lambda = 1) ,eff.skewed) plot( ridgegwa(X.window,Y.window.gauss,type = "penalized",k = 10,x0 = 0.5,lambda = 1) ,eff.gauss) plot( ridgegwa(X.random,Y.random.gauss,type = "penalized",k=1,x0 = 0.5,lambda=.1) ,eff.gauss) plot( # ridgegwa(X.window,Y.window.skewed,type = "ldpenalized") ridgegwa(X.window,Y.window.skewed,type = "penalized") ,eff.skewed) plot( ridgegwa(X.window,Y.window.gauss,type = "penalized") # ridgegwa(X.window,Y.window.gauss,type = "ldpenalized") ,eff.gauss) plot( # ridgegwa(X.random,Y.random.gauss,type = "penalized") ridgegwa(X.random,Y.random.gauss,type = "ldpenalized") ,eff.gauss) plot( ridgegwa(X.random,Y.random.gauss,type = "penalized") ,gwa(X.random,Y.random.gauss) ) plot( ridgegwa(X.random,Y.random.gauss,type = "penalized") ,gwa(X.random,Y.random.gauss) ) plot( ridgegwa(X.window,Y.window.gauss,type = "penalized") ,gwa(X.window,Y.window.gauss) ) plot( ridgegwa(X.window,Y.window.mixture,type = "penalized") ,eff.skewed*eff.gauss/10) cgwa(X.random,Y.random.gauss) %>% hist cgwa(X.random,Y.random.skewed) %>% hist cgwa(X.window,Y.window.gauss) %>% hist cgwa(X.window,Y.window.skewed) %>% hist plot(cgwa(X.random,Y.random.skewed) ,eff.skewed) warning(FALSE) bridge.random.gauss=ridgegwa(X.random,Y.random.gauss,lambda = 1) cor.test(eff.gauss,bridge.random.gauss)$estimate bridge.random.gauss=ridgegwa(X.random,Y.random.gauss,lambda = 0.1) cor.test(eff.gauss,bridge.random.gauss)$estimate bridge.random.gauss=ridgegwa(X.random,Y.random.gauss,lambda = .01) cor.test(eff.gauss,bridge.random.gauss)$estimate bridge.random.gauss=ridgegwa(X.random,Y.random.gauss,lambda = .001) cor.test(eff.gauss,bridge.random.gauss)$estimate bridge.random.gauss=ridgegwa(X.random,Y.random.gauss,lambda = .0001) cor.test(eff.gauss,bridge.random.gauss)$estimate bridge.random.gauss=regularridgegwa(X.random,Y.random.gauss,lambda = .0001) bridge.random.gauss=regularridgegwa(X.random,Y.random.gauss,lambda = .001) cor.test(eff.gauss,bridge.random.gauss) lm(eff.gauss~bridge.random.gauss) %>% summary bridge.random.gauss=cgwa(X.random,Y.random.gauss) lm(eff.gauss~bridge.random.gauss) %>% summary # sum(abs(eff.gauss)) # sum(abs(bgwa.random.gauss)) # sum(abs(b.random.gauss)) # # sum(abs(eff.skewed)) # sum(abs(bgwa.random.skewed)) # sum(abs(b.random.skewed)) # # # sum(abs(eff.gauss)) # sum(abs(bgwa.window.gauss)) # sum(abs(b.window.gauss)) # # sum(abs(eff.skewed)) # sum(abs(bgwa.window.skewed)) # sum(abs(b.window.skewed))
The plot below indicates that the best performing for a sparse genetic architecture is Lasso, while for a polygenic architecture, with many Gaussian effects, the conditional model is still the best.
grid1<-plot_grid(ncol=2, addggregression(ggdotscolor(x=eff.skewed,y=bgwa.random.skewed) + ggtitle('marginal | skewed')) , addggregression(ggdotscolor(x=eff.gauss,y=bgwa.random.gauss) + ggtitle('marginal | gauss')) , addggregression(ggdotscolor(x=eff.skewed,y=b.random.skewed) + ggtitle('conditional | skewed')) , addggregression(ggdotscolor(x=eff.gauss,y=b.random.gauss) + ggtitle('conditional | gauss')) , addggregression(ggdotscolor(x=eff.skewed,y=blasso.random.skewed) + ggtitle('lasso | skewed')) , addggregression(ggdotscolor(x=eff.gauss,y=blasso.random.gauss) + ggtitle('lasso | gauss')) , addggregression(ggdotscolor(x=eff.skewed,y=bridge.random.skewed) + ggtitle('ridge | skewed')) , addggregression(ggdotscolor(x=eff.gauss,y=bridge.random.gauss) + ggtitle('ridge | gauss')) ) + ggtitle('Random SNPs') grid1 save_plot(file='figs/random_snps_simulationgwa.pdf', grid1,base_width = 10,base_height = 12)
The plot below indicates that once LD is high, only the conditional model performs relatively well. The marginal and the lasso perform poorly.
grid2<-plot_grid(ncol=2, addggregression(ggdotscolor(x=eff.skewed,y=bgwa.window.skewed) + ggtitle('marginal | skewed')) , addggregression(ggdotscolor(x=eff.gauss,y=bgwa.window.gauss) + ggtitle('marginal | gauss')) , addggregression(ggdotscolor(x=eff.skewed,y=b.window.skewed) + ggtitle('conditional | skewed')) , addggregression(ggdotscolor(x=eff.gauss,y=b.window.gauss) + ggtitle('conditional | gauss')) , addggregression(ggdotscolor(x=eff.skewed,y=blasso.window.skewed) + ggtitle('lasso | skewed')) , addggregression(ggdotscolor(x=eff.gauss,y=blasso.window.gauss) + ggtitle('lasso | gauss')) , addggregression(ggdotscolor(x=eff.skewed,y=bridge.window.skewed) + ggtitle('bridge | skewed')) , addggregression(ggdotscolor(x=eff.gauss,y=bridge.window.gauss) + ggtitle('bridge | gauss')) ) grid2 save_plot(file='figs/window_snps_simulationgwa.pdf', grid2,base_width = 10,base_height = 12)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.