This vignette requires both a local machine and the BBMRI virtual machine. On the virtual machine, execute this chunk to set the library path, update the R package spliceQTL, and set the working directory.
lib <- "/virdir/Scratch/arauschenberger/library" .libPaths(lib) devtools::install_github("rauschenberger/spliceQTL",lib=lib) library("spliceQTL",lib.loc=lib) path <- "/virdir/Scratch/arauschenberger/spliceQTL" setwd(path)
(Please always execute this chunk when starting a new R session on the virtual machine!)
On a local machine with PLINK, execute this chunk to obtain the Geuvadis SNP data. Then move the files from the local to the virtual machine.
for(chr in 1:22){ spliceQTL::get.snps.geuvadis(chr=chr,data="N:/semisup/data/eQTLs", path="N:/spliceQTL/data/Geuvadis") }
On the virtual machine, execute this chunk to obtain the BBMRI SNP data, the Geuvadis exon data, and the BBMRI exon data. Choose one out of the six biobanks (CODAM, LL, LLS, NTR, PAN, RS).
for(chr in 1:22){ spliceQTL::get.snps.bbmri(chr=chr,biobank="LLS",path=path,size=500*10^3) } spliceQTL::get.exons.geuvadis(path=path) spliceQTL::get.exons.bbmri(path=path)
On the virtual machine, execute this chunk to prepare the data. See the documentation of the R package spliceQTL for further information. (It seems that lme4::lmer in spliceQTL::adjust.variables fails to release memory. Restart R after each chromosome.)
for(chr in 1:22){ for(data in c("Geuvadis","LLS")){ rm(list=setdiff(ls(),c("data","chr","path"))); gc() set.seed(1) cat("Analysing",data,chr,":",as.character(Sys.time()),"\n") if(data=="Geuvadis"){ load(file.path(path,"Geuvadis.exons.RData"),verbose=TRUE) } else { load(file.path(path,"BBMRI.exons.RData"),verbose=TRUE) cond <- sapply(strsplit(x=rownames(exons),split=":"),function(x) x[[1]]==data) exons <- exons[cond,] } load(file.path(path,paste0(data,".chr",chr,".RData")),verbose=TRUE) cat("Matching samples:","\n") list <- spliceQTL::match.samples(exons,snps) exons <- list$exons; snps <- list$snps; rm(list) cat("Adjusting samples:","\n") exons <- spliceQTL::adjust.samples(x=exons) # slow! exons <- asinh(x=exons) cat("Adjusting covariates:","\n") names <- strsplit(x=colnames(exons),split="_") # exon names length <- sapply(names,function(x) as.integer(x[[3]])-as.integer(x[[2]])) # exon length exons <- spliceQTL::adjust.variables(x=exons,group=gene_id,offset=length) # slow! # subset chromosome cond <- sapply(strsplit(x=colnames(exons),split="_"),function(x) x[[1]]==chr) exons <- exons[,cond] gene_id <- gene_id[cond] cat("Mapping exons:","\n") map <- list() map$genes <- spliceQTL::map.genes(chr=chr,path=path) map$exons <- spliceQTL::map.exons(gene=as.character(map$genes$gene_id),exon=gene_id) cat("Mapping SNPs:","\n") names <- strsplit(x=colnames(snps),split=":") snp.chr <- sapply(names,function(x) as.integer(x[[1]])) snp.pos <- sapply(names,function(x) as.integer(x[[2]])) map$snps <- spliceQTL::map.snps(gene.chr=map$genes$chr, gene.start=map$genes$start, gene.end=map$genes$end, snp.chr=snp.chr,snp.pos=snp.pos) cat("Dropping genes:","\n") map <- spliceQTL::drop.trivial(map=map) cat("Testing:",as.character(Sys.time())," -> ") rm(list=setdiff(ls(),c("exons","snps","map","data","chr","path"))); gc() save(list=c("exons","snps","map"),file=file.path(path,paste0("temp.",data,".chr",chr,".RData"))) } #q() #n #exit }
On the virtual machine, execute this chunk to test for alternative splicing.
trial <- "/virdir/Scratch/arauschenberger/spliceQTL/trial" for(chr in 1:22){ for(data in c("Geuvadis","LLS")){ cat("Analysing",data,chr,":",as.character(Sys.time()),"\n") rm(list=setdiff(ls(),c("data","chr","path"))); gc(); cat(".") load(file.path(path,paste0("temp.",data,".chr",chr,".RData"))); cat(".") if(FALSE){ # only for development exons <- exons[1:3,] snps <- snps[1:3,] map$genes <- map$genes[1:3,] map$exons <- map$exons[1:3] map$snps <- map$snps[1:3,] } pvalue <- spliceQTL::test.multiple(Y=exons, X=snps, map=map,spec=16,w.type="cov"); cat(".") save(object=pvalue,file=file.path(trial,paste0("pval.",data,".chr",chr,".RData"))); cat("\n") } }
On the virtual machine, execute this chunk to compare the results between the Geuvadis and the BBMRI project.
table <- data.frame(chr=seq_len(22)) list <- list() for(i in 1:22){ # load p-values load(file.path(path,paste0("pval.Geuvadis.chr",i,".RData"))) a <- pvalue; pvalue <- NA load(file.path(path,paste0("pval.LLS.chr",i,".RData"))) b <- pvalue; pvalue <- NA # filter tests (original) #sel <- "rho=1" #names <- intersect(rownames(a),rownames(b)) #a <- a[names,sel] #b <- b[names,sel] # filter tests (trial) names <- intersect(rownames(a),rownames(b)) a <- a[names] b <- b[names] if(FALSE){ # "Manhattan" plot graphics::plot.new() graphics::plot.window(xlim=c(0.5,length(a)+0.5),ylim=c(-1,1)*max(-log10(c(a,b)))) graphics::box() graphics::axis(side=1) graphics::axis(side=2) graphics::segments(x0=seq_along(a),y0=0,y1=-log10(a),col="darkred") graphics::segments(x0=seq_along(b),y0=0,y1=log10(b),col="darkblue") graphics::abline(h=0,col="grey") } # correlation #file <- file.path(path,"plots",paste0("cor.chr",i,".pdf")) # activate! #grDevices::pdf(file=file,width=4,height=4) # activate! #spliceQTL::grid(x=-log(a),y=-log(b),n=20) # activate! #grDevices::dev.off() # activate! table$cor[i] <- round(stats::cor(-log(a),-log(b),method="pearson"),digits=2) table$cor.test[i] <- signif(stats::cor.test(-log(a),-log(b),method="pearson")$p.value,digits=2) # significance a <- stats::p.adjust(a)<0.05 b <- stats::p.adjust(b)<0.05 table$none[i] <- sum(!a & !b) table$onlyGEU[i] <- sum(a & !b) table$onlyLLS[i] <- sum(!a & b) table$both[i] <- sum(a & b) table$chisq.test[i] <- suppressWarnings(signif(stats::chisq.test(table(a,b))$p.value,digits=2)) list[[i]] <- data.frame(chr=i,gene=names(a),Geuvadis=1*a,LLS=1*b,row.names=seq_along(a)) } save(table,file="table.RData") list <- do.call(what="rbind",args=list) names <- spliceQTL::map.genes(chr=NULL) list$symbol <- names$gene_name[match(x=list$gene,names$gene_id)] save(list,file="list.RData")
On the virtual machine, execute this chunk to plot correlations between SNPs and exons.
data <- "LLS" # "Geuvadis" or "LLS" chr <- 1 # 1,2,3,...,22 load(file.path(path,paste0("temp.",data,".chr",chr,".RData"))) # i <- which(map$genes$gene_id=="...") # gene name (e.g. ENSG00000172967) spliceQTL::visualise(Y=exons,X=snps,map=map,i=i)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.