This vignette requires both a local machine and the BBMRI virtual machine. On the virtual machine, execute this chunk to set the library path, update the R package spliceQTL, and set the working directory.

lib <- "/virdir/Scratch/arauschenberger/library"
.libPaths(lib)
devtools::install_github("rauschenberger/spliceQTL",lib=lib)
library("spliceQTL",lib.loc=lib)
path <- "/virdir/Scratch/arauschenberger/spliceQTL"
setwd(path)

(Please always execute this chunk when starting a new R session on the virtual machine!)

Prepare data

On a local machine with PLINK, execute this chunk to obtain the Geuvadis SNP data. Then move the files from the local to the virtual machine.

for(chr in 1:22){
    spliceQTL::get.snps.geuvadis(chr=chr,data="N:/semisup/data/eQTLs",
                                 path="N:/spliceQTL/data/Geuvadis")
}

On the virtual machine, execute this chunk to obtain the BBMRI SNP data, the Geuvadis exon data, and the BBMRI exon data. Choose one out of the six biobanks (CODAM, LL, LLS, NTR, PAN, RS).

for(chr in 1:22){
    spliceQTL::get.snps.bbmri(chr=chr,biobank="LLS",path=path,size=500*10^3)
}
spliceQTL::get.exons.geuvadis(path=path)
spliceQTL::get.exons.bbmri(path=path)

On the virtual machine, execute this chunk to prepare the data. See the documentation of the R package spliceQTL for further information. (It seems that lme4::lmer in spliceQTL::adjust.variables fails to release memory. Restart R after each chromosome.)

for(chr in 1:22){
  for(data in c("Geuvadis","LLS")){

    rm(list=setdiff(ls(),c("data","chr","path"))); gc()
    set.seed(1)

    cat("Analysing",data,chr,":",as.character(Sys.time()),"\n")
    if(data=="Geuvadis"){
      load(file.path(path,"Geuvadis.exons.RData"),verbose=TRUE)
    } else {
      load(file.path(path,"BBMRI.exons.RData"),verbose=TRUE)
      cond <- sapply(strsplit(x=rownames(exons),split=":"),function(x) x[[1]]==data)
      exons <- exons[cond,]
    }
    load(file.path(path,paste0(data,".chr",chr,".RData")),verbose=TRUE)

    cat("Matching samples:","\n")
    list <- spliceQTL::match.samples(exons,snps)
    exons <- list$exons; snps <- list$snps; rm(list)

    cat("Adjusting samples:","\n")
    exons <- spliceQTL::adjust.samples(x=exons) # slow!
    exons <- asinh(x=exons)

    cat("Adjusting covariates:","\n")
    names <- strsplit(x=colnames(exons),split="_") # exon names
    length <- sapply(names,function(x) as.integer(x[[3]])-as.integer(x[[2]])) # exon length
    exons <- spliceQTL::adjust.variables(x=exons,group=gene_id,offset=length) # slow!

    # subset chromosome
    cond <- sapply(strsplit(x=colnames(exons),split="_"),function(x) x[[1]]==chr)
    exons <- exons[,cond]
    gene_id <- gene_id[cond]

    cat("Mapping exons:","\n")
    map <- list()
    map$genes <- spliceQTL::map.genes(chr=chr,path=path)
    map$exons <- spliceQTL::map.exons(gene=as.character(map$genes$gene_id),exon=gene_id)

    cat("Mapping SNPs:","\n")
    names <- strsplit(x=colnames(snps),split=":")
    snp.chr <- sapply(names,function(x) as.integer(x[[1]]))
    snp.pos <- sapply(names,function(x) as.integer(x[[2]]))
    map$snps <- spliceQTL::map.snps(gene.chr=map$genes$chr,
                                    gene.start=map$genes$start,
                                    gene.end=map$genes$end,
                                    snp.chr=snp.chr,snp.pos=snp.pos)

    cat("Dropping genes:","\n")
    map <- spliceQTL::drop.trivial(map=map)

    cat("Testing:",as.character(Sys.time())," -> ")
    rm(list=setdiff(ls(),c("exons","snps","map","data","chr","path"))); gc()

    save(list=c("exons","snps","map"),file=file.path(path,paste0("temp.",data,".chr",chr,".RData")))
  }
#q()
#n
#exit
}

Test hypotheses

On the virtual machine, execute this chunk to test for alternative splicing.

trial <- "/virdir/Scratch/arauschenberger/spliceQTL/trial"
for(chr in 1:22){
    for(data in c("Geuvadis","LLS")){
        cat("Analysing",data,chr,":",as.character(Sys.time()),"\n")
        rm(list=setdiff(ls(),c("data","chr","path"))); gc(); cat(".")
        load(file.path(path,paste0("temp.",data,".chr",chr,".RData"))); cat(".")
        if(FALSE){ # only for development
            exons <- exons[1:3,]
            snps <- snps[1:3,]
            map$genes <- map$genes[1:3,]
            map$exons <- map$exons[1:3]
            map$snps <- map$snps[1:3,]
        }
        pvalue <- spliceQTL::test.multiple(Y=exons, X=snps, map=map,spec=16,w.type="cov"); cat(".")
        save(object=pvalue,file=file.path(trial,paste0("pval.",data,".chr",chr,".RData"))); cat("\n")
    }
}

Analyse results

On the virtual machine, execute this chunk to compare the results between the Geuvadis and the BBMRI project.

table <- data.frame(chr=seq_len(22))
list <- list()
for(i in 1:22){
  # load p-values
  load(file.path(path,paste0("pval.Geuvadis.chr",i,".RData")))
  a <- pvalue; pvalue <- NA
  load(file.path(path,paste0("pval.LLS.chr",i,".RData")))
  b <- pvalue; pvalue <- NA

  # filter tests (original)
  #sel <- "rho=1"
  #names <- intersect(rownames(a),rownames(b))
  #a <- a[names,sel]
  #b <- b[names,sel]

  # filter tests (trial)
  names <- intersect(rownames(a),rownames(b))
  a <- a[names]
  b <- b[names]

  if(FALSE){
   # "Manhattan" plot
   graphics::plot.new()
   graphics::plot.window(xlim=c(0.5,length(a)+0.5),ylim=c(-1,1)*max(-log10(c(a,b))))
   graphics::box()
   graphics::axis(side=1)
   graphics::axis(side=2)
   graphics::segments(x0=seq_along(a),y0=0,y1=-log10(a),col="darkred")
   graphics::segments(x0=seq_along(b),y0=0,y1=log10(b),col="darkblue")
   graphics::abline(h=0,col="grey")
  }

  # correlation
  #file <- file.path(path,"plots",paste0("cor.chr",i,".pdf")) # activate!
  #grDevices::pdf(file=file,width=4,height=4) # activate!
  #spliceQTL::grid(x=-log(a),y=-log(b),n=20) # activate!
  #grDevices::dev.off() # activate!
  table$cor[i] <- round(stats::cor(-log(a),-log(b),method="pearson"),digits=2)
  table$cor.test[i] <- signif(stats::cor.test(-log(a),-log(b),method="pearson")$p.value,digits=2)

  # significance
  a <- stats::p.adjust(a)<0.05
  b <- stats::p.adjust(b)<0.05
  table$none[i] <- sum(!a & !b)
  table$onlyGEU[i] <- sum(a & !b)
  table$onlyLLS[i] <- sum(!a & b)
  table$both[i] <- sum(a & b)
  table$chisq.test[i] <- suppressWarnings(signif(stats::chisq.test(table(a,b))$p.value,digits=2))

  list[[i]] <- data.frame(chr=i,gene=names(a),Geuvadis=1*a,LLS=1*b,row.names=seq_along(a))

}
save(table,file="table.RData")

list <- do.call(what="rbind",args=list)
names <- spliceQTL::map.genes(chr=NULL)
list$symbol <- names$gene_name[match(x=list$gene,names$gene_id)]
save(list,file="list.RData")

On the virtual machine, execute this chunk to plot correlations between SNPs and exons.

data <- "LLS" # "Geuvadis" or "LLS"
chr <- 1 # 1,2,3,...,22
load(file.path(path,paste0("temp.",data,".chr",chr,".RData")))
# i <- which(map$genes$gene_id=="...") # gene name (e.g. ENSG00000172967)
spliceQTL::visualise(Y=exons,X=snps,map=map,i=i)


rauschenberger/spliceQTL documentation built on May 13, 2019, 3:02 a.m.