set.seed(0) knitr::opts_chunk$set( out.extra = 'style="display:block; margin: auto"', fig.align = "center", fig.path = "pQTLtools/", collapse = TRUE, comment = "#>", dev = "png")
The examples here are based on the SCALLOP work @scallop_inf.
pkgs <- c("GenomeInfoDb", "GenomicRanges", "TwoSampleMR", "biomaRt", "coloc", "dplyr", "gap", "ggplot2", "gwasvcf", "httr", "ieugwasr", "karyoploteR", "circlize", "knitr", "meta", "plotly", "pQTLdata", "pQTLtools", "rGREAT", "readr", "regioneR", "seqminer") for (p in pkgs) if (length(grep(paste("^package:", p, "$", sep=""), search())) == 0) { if (!requireNamespace(p)) warning(paste0("This vignette needs package `", p, "'; please install")) } invisible(suppressMessages(lapply(pkgs, require, character.only=TRUE)))
We start with results on osteoprotegerin (OPG) @kwan14,
data(OPG,package="gap.datasets") meta::settings.meta(method.tau="DL") gap::METAL_forestplot(OPGtbl,OPGall,OPGrsid,width=6.75,height=5,digits.TE=2,digits.se=2, col.diamond="black",col.inside="black",col.square="black") gap::METAL_forestplot(OPGtbl,OPGall,OPGrsid,package="metafor",method="FE",xlab="Effect", showweights=TRUE)
involving both a cis and a trans pQTLs. As meta
inherently includes random effects, we use a fixed effects (FE) model from metafor
.
options(width=200) f <- file.path(find.package("pQTLtools"),"tests","INF1.merge") merged <- read.delim(f,as.is=TRUE) hits <- merge(merged[c("CHR","POS","MarkerName","prot","log10p")], pQTLdata::inf1[c("prot","uniprot")],by="prot") %>% dplyr::mutate(log10p=-log10p) names(hits) <- c("prot","Chr","bp","SNP","log10p","uniprot") cistrans <- gap::cis.vs.trans.classification(hits,pQTLdata::inf1,"uniprot") cis.vs.trans <- with(cistrans,data) knitr::kable(with(cistrans,table),caption="cis/trans classification") with(cistrans,total) T <- with(cistrans,table) H <- T[rownames(T)!="total","total"] merge <- merged[c("Chrom","Start","End","prot","MarkerName")] merge_cvt <- merge(merge,cis.vs.trans,by.x=c("prot","MarkerName"),by.y=c("prot","SNP")) ord <- with(merge_cvt,order(Chr,bp)) merge_cvt <- merge_cvt[ord,]
This is visualised via a circos plot, highlighting the likely causal genes for pQTLs,
pQTLs <- transmute(merge_cvt,chr=paste0("chr",Chr),start=bp,end=bp,log10p) cis.pQTLs <- subset(merge_cvt,cis) %>% dplyr::transmute(chr=paste0("chr",p.chr),start=p.start,end=p.end,gene=p.gene,cols="red") pQTL_genes <- read.table(file.path(find.package("pQTLtools"),"tests","pQTL_genes.txt"), col.names=c("chr","start","end","gene")) %>% dplyr::mutate(chr=gsub("hs","chr",chr)) %>% dplyr::left_join(cis.pQTLs) %>% dplyr::mutate(cols=ifelse(is.na(cols),"blue",cols)) par(cex=0.7) gap::circos.mhtplot2(pQTLs,pQTL_genes,ticks=0:3*10,ymax=30) par(cex=1)
where the red and blue colours indicate cis/trans classifications.
barplot(table(H),xlab="No. of pQTL regions",ylab="No. of proteins", ylim=c(0,25),col="darkgrey",border="black",cex=0.8,cex.axis=2,cex.names=2,las=1)
gap::circos.cis.vs.trans.plot(hits=f,pQTLdata::inf1,"uniprot")
The circos plot is based on target genes (those encoding proteins) and somewhat too busy.
Here we focus on SH2B3.
HOTSPOT <- "chr12:111884608_C_T" a <- data.frame(chr="chr12",start=111884607,end=111884608,gene="SH2B3") b <- dplyr::filter(cis.vs.trans,SNP==HOTSPOT) %>% dplyr::mutate(p.chr=paste0("chr",p.chr)) %>% dplyr::rename(chr=p.chr,start=p.start,end=p.end,gene=p.gene,cistrans=cis.trans) cols <- rep(12,nrow(b)) cols[b[["cis"]]] <- 10 labels <- dplyr::bind_rows(b[c("chr","start","end","gene")],a) circlize::circos.clear() circlize::circos.par(start.degree=90, track.height=0.1, cell.padding=c(0,0,0,0)) circlize::circos.initializeWithIdeogram(species="hg19", track.height=0.05, ideogram.height=0.06) circlize::circos.genomicLabels(labels, labels.column=4, cex=1.1, font=3, side="inside") circlize::circos.genomicLink(bind_rows(a,a,a,a,a,a), b[c("chr","start","end")], col=cols, directional=1, border=10, lwd=2)
A more recent implementation is the qtlClassifier
function. For this example we have,
options(width=200) geneSNP <- merge(merged[c("prot","MarkerName")],pQTLdata::inf1[c("prot","gene")],by="prot")[c("gene","MarkerName","prot")] SNPPos <- merged[c("MarkerName","CHR","POS")] genePos <- pQTLdata::inf1[c("gene","chr","start","end")] cvt <- gap::qtlClassifier(geneSNP,SNPPos,genePos,1e6) knitr::kable(head(cvt)) cistrans.check <- merge(cvt[c("gene","MarkerName","Type")],cis.vs.trans[c("p.gene","SNP","cis.trans")], by.x=c("gene","MarkerName"),by.y=c("p.gene","SNP")) with(cistrans.check,table(Type,cis.trans))
t2d <- gap::qtl2dplot(cis.vs.trans,xlab="pQTL position",ylab="Gene position")
The pQTL-gene plot above can be also viewed in a 2-d plotly style, fig2d.html,
fig2d <- gap::qtl2dplotly(cis.vs.trans,xlab="pQTL position",ylab="Gene position") htmlwidgets::saveWidget(fig2d,file="fig2d.html") htmltools::tags$iframe(src = "fig2d.html", width = "100%", height = "650px")
and 3-d counterpart, fig3d.html,
fig3d <- gap::qtl3dplotly(cis.vs.trans,zmax=300,qtl.prefix="pQTL:",xlab="pQTL position",ylab="Gene position") htmlwidgets::saveWidget(fig3d,file="fig3d.html") htmltools::tags$iframe(src = "fig3d.html", width = "100%", height = "600px")
Both plots are responsive.
As biomaRt
is not always on, we keep a copy of hgnc.
set_config(config(ssl_verifypeer = 0L)) mart <- biomaRt::useMart(biomart = "ensembl", dataset = "hsapiens_gene_ensembl") attrs <- c("hgnc_symbol", "chromosome_name", "start_position", "end_position", "band") hgnc <- vector("character",180) for(i in 1:180) { v <- with(merge_cvt[i,],paste0(Chr,":",bp,":",bp)) g <- subset(getBM(attributes = attrs, filters="chromosomal_region", values=v, mart=mart),!is.na(hgnc_symbol)) hgnc[i] <- paste(g[["hgnc_symbol"]],collapse=";") cat(i,g[["hgnc_symbol"]],hgnc[i],"\n") } save(hgnc,file="hgnc.rda",compress="xz")
We now proceed with
load(file.path(find.package("pQTLtools"),"tests","hgnc.rda")) merge_cvt <- within(merge_cvt,{ hgnc <- hgnc hgnc[cis] <- p.gene[cis] }) with(merge_cvt, { sentinels <- regioneR::toGRanges(Chr,bp-1,bp,labels=hgnc) cis.regions <- regioneR::toGRanges(Chr,cis.start,cis.end) loci <- toGRanges(Chr,Start,End) colors <- c("red","blue") GenomeInfoDb::seqlevelsStyle(sentinels) <- "UCSC" kp <- karyoploteR::plotKaryotype(genome="hg19",chromosomes=levels(seqnames(sentinels))) karyoploteR::kpAddBaseNumbers(kp) karyoploteR::kpPlotRegions(kp, data=loci,r0=0.05,r1=0.15,border="black") karyoploteR::kpPlotMarkers(kp, data=sentinels, labels=hgnc, text.orientation="vertical", cex=0.5, y=0.3*seq_along(hgnc)/length(hgnc), srt=30, ignore.chromosome.ends=TRUE, adjust.label.position=TRUE, label.color=colors[2-cis], label.dist=0.002, cex.axis=3, cex.lab=3) legend("bottomright", bty="n", pch=c(19,19), col=colors, pt.cex=0.4, legend=c("cis", "trans"), text.col=colors, cex=0.8, horiz=FALSE) # panel <- toGRanges(p.chr,p.start,p.end,labels=p.gene) # kpPlotLinks(kp, data=loci, data2=panel, col=colors[2-cis]) })
It is now considerably easier with Genomic Regions Enrichment of Annotations Tool (GREAT).
post <- function(regions) { job <- rGREAT::submitGreatJob(get(regions), species="hg19", version="3.0.0") et <- rGREAT::getEnrichmentTables(job,download_by = 'tsv') tb <- do.call('rbind',et) write.table(tb,file=paste0(regions,".tsv"),quote=FALSE,row.names=FALSE,sep="\t") invisible(list(job=job,tb=tb)) } M <- 1e+6 merge <- merged %>% dplyr::mutate(chr=Chrom, start=POS-M, end=POS+M) %>% dplyr::mutate(start=if_else(start<1,1,start)) %>% dplyr::select(prot,MarkerName,chr,start,end) cistrans <- dplyr::select(merge, chr,start,end) %>% dplyr::arrange(chr,start,end) %>% dplyr::distinct() # All regions cistrans.post <- post("cistrans") job <- with(cistrans.post,job) rGREAT::plotRegionGeneAssociationGraphs(job) rGREAT::availableOntologies(job) # plot of the top term par(mfcol=c(3,1)) rGREAT::plotRegionGeneAssociationGraphs(job, ontology="GO Molecular Function") rGREAT::plotRegionGeneAssociationGraphs(job, ontology="GO Biological Process") rGREAT::plotRegionGeneAssociationGraphs(job, ontology="GO Cellular Component") # Specific regions IL12B <- dplyr::filter(merge,prot=="IL.12B") %>% dplyr::select(chr,start,end) KITLG <- dplyr::filter(merge,prot=="SCF") %>% dplyr::select(chr,start,end) TNFSF10 <- dplyr::filter(merge,prot=="TRAIL") %>% dplyr::select(chr,start,end) tb_all <- data.frame() for (r in c("IL12B","KITLG","TNFSF10")) { r.post <- post(r) tb_all <- rbind(tb_all,data.frame(gene=r,with(r.post,tb))) }
The top terms at Binomial p=1e-5 could be extracted as follows,
options(width=100) great3 <- filter(tb_all,BinomP<=1e-5) %>% dplyr::mutate(ID=gsub(":",": ",ID)) knitr::kable(great3,caption="GREAT IL12B-KITLG-TNFSF10 results",digits=3) great <- read.delim("cistrans.tsv") %>% dplyr::filter(BinomBonfP<=1e-8) %>% dplyr::mutate(ID=gsub(":",": ",ID)) unlink("IL12B.tsv") unlink("KITLG.tsv") unlink("TNFSF10.tsv") unlink("cistrans.tsv")
See example associated with import_eQTLCatalogue()
. A related function is import_OpenGWAS()
used to fetch data from OpenGWAS. The cis-pQTLs and 1e+6
flanking regions were considered and data are actually fetched from files stored locally. Only the first sentinel was used (r=1).
liftRegion <- function(x,chain,flanking=1e6) { require(GenomicRanges) gr <- with(x,GenomicRanges::GRanges(seqnames=chr,IRanges::IRanges(start,end))+flanking) GenomeInfoDb::seqlevelsStyle(gr) <- "UCSC" gr38 <- rtracklayer::liftOver(gr, chain) chr <- gsub("chr","",colnames(table(seqnames(gr38)))) start <- min(unlist(start(gr38))) end <- max(unlist(end(gr38))) invisible(list(chr=chr[1],start=start,end=end,region=paste0(chr[1],":",start,"-",end))) } sumstats <- function(prot,chr,region37) { cat("GWAS sumstats\n") vcf <- file.path(INF,"METAL/gwas2vcf",paste0(prot,".vcf.gz")) gwas_stats <- gwasvcf::query_gwas(vcf, chrompos = region37) %>% gwasvcf::vcf_to_granges() %>% GenomeInfoDb::keepSeqlevels(chr) %>% GenomeInfoDb::renameSeqlevels(paste0("chr",chr)) gwas_stats_hg38 <- rtracklayer::liftOver(gwas_stats, chain) %>% unlist() %>% dplyr::as_tibble() %>% dplyr::transmute(chromosome = seqnames, position = start, REF, ALT, AF, ES, SE, LP, SS) %>% dplyr::mutate(id = paste(chromosome, position, sep = ":")) %>% dplyr::mutate(MAF = pmin(AF, 1-AF)) %>% dplyr::group_by(id) %>% dplyr::mutate(row_count = n()) %>% dplyr::ungroup() %>% dplyr::filter(row_count == 1) %>% dplyr::mutate(chromosome=gsub("chr","",chromosome)) s <- ggplot2::ggplot(gwas_stats_hg38, aes(x = position, y = LP)) + ggplot2::theme_bw() + ggplot2::geom_point() + ggplot2::ggtitle(with(sentinel,paste0(prot,"-",SNP," association plot"))) s gwas_stats_hg38 } gtex <- function(gwas_stats_hg38,ensGene,region38) { cat("c. GTEx_v8 imported eQTL datasets\n") fp <- file.path(find.package("pQTLtools"),"eQTL-Catalogue","tabix_ftp_paths_gtex.tsv") web <- read.delim(fp, stringsAsFactors = FALSE) %>% dplyr::as_tibble() local <- within(web %>% dplyr::as_tibble(), { f <- lapply(strsplit(ftp_path,"/imported/|/ge/"),"[",3); ftp_path <- paste0("~/rds/public_databases/GTEx/csv/",f) }) gtex_df <- dplyr::filter(local, quant_method == "ge") %>% dplyr::mutate(qtl_id = paste(study, qtl_group, sep = "_")) ftp_path_list <- setNames(as.list(gtex_df$ftp_path), gtex_df$qtl_id) hdr <- file.path(find.package("pQTLtools"),"eQTL-Catalogue","column_names.GTEx") column_names <- names(read.delim(hdr)) safe_import <- purrr::safely(import_eQTLCatalogue) summary_list <- purrr::map(ftp_path_list, ~safe_import(., region38, selected_gene_id = ensGene, column_names)) result_list <- purrr::map(summary_list, ~.$result) result_list <- result_list[!unlist(purrr::map(result_list, is.null))] result_filtered <- purrr::map(result_list[lapply(result_list,nrow)!=0], ~dplyr::filter(., !is.na(se))) purrr::map_df(result_filtered, ~run_coloc(., gwas_stats_hg38), .id = "qtl_id") } gtex_coloc <- function(prot,chr,ensGene,chain,region37,region38,out) { gwas_stats_hg38 <- sumstats(prot,chr,region37) df_gtex <- gtex(gwas_stats_hg38,ensGene,region38) if (!exists("df_gtex")) return saveRDS(df_gtex,file=paste0(out,".RDS")) dplyr::arrange(df_gtex, -PP.H4.abf) p <- ggplot2::ggplot(df_gtex, aes(x = PP.H4.abf)) + ggplot2::theme_bw() + geom_histogram() + ggtitle(with(sentinel,paste0(prot,"-",SNP," PP4 histogram"))) + xlab("PP4") + ylab("Frequency") p } single_run <- function() { chr <- with(sentinel,Chr) ss <- subset(inf1,prot==sentinel[["prot"]]) ensRegion37 <- with(ss, { start <- start-M if (start<0) start <- 0 end <- end+M paste0(chr,":",start,"-",end) }) ensGene <- ss[["ensembl_gene_id"]] ensRegion38 <- with(liftRegion(ss,chain),region) cat(chr,ensGene,ensRegion37,ensRegion38,"\n") f <- with(sentinel,paste0(prot,"-",SNP)) gtex_coloc(sentinel[["prot"]],chr,ensGene,chain,ensRegion37,ensRegion38,f) } options(width=200) HOME <- Sys.getenv("HOME") HPC_WORK <- Sys.getenv("HPC_WORK") INF <- Sys.getenv("INF") M <- 1e6 sentinels <- subset(cis.vs.trans,cis) f <- file.path(find.package("pQTLtools"),"eQTL-Catalogue","hg19ToHg38.over.chain") chain <- rtracklayer::import.chain(f) gwasvcf::set_bcftools(file.path(HPC_WORK,"bin","bcftools")) r <- 1 sentinel <- sentinels[r,] single_run() ktitle <- with(sentinel,paste0("Colocalization results for ",prot,"-",SNP))
The results are also loadable as follows.
coloc_df <- readRDS(file.path(find.package("pQTLtools"),"tests","OPG-rs2247769.RDS")) %>% dplyr::rename(Tissue=qtl_id, H0=PP.H0.abf,H1=PP.H1.abf, H2=PP.H2.abf,H3=PP.H3.abf,H4=PP.H4.abf) %>% mutate(Tissue=gsub("GTEx_V8_","",Tissue), H0=round(H0,2),H1=round(H1,2),H2=round(H2,2),H3=round(H3,2),H4=round(H4,2)) %>% dplyr::arrange(-H4) knitr::kable(coloc_df,caption="Colocalization results for OPG-chr8:120081031_C_T")
The function sumstats()
obtained meta-analysis summary statistics (in build 37 and therefore lifted over to build 38) to be used in colocalization
analysis. The output are saved in the .RDS
files. Note that ftp_path
changes from eQTL Catalog to local files.
The function pqtlMR()
has an attractive feature that multiple pQTLs
can be used together for conducting MR with a list of outcomes from MR-Base, e.g.,
outcome <- extract_outcome_data(snps=with(exposure,SNP),outcomes=c("ieu-a-7","ebi-a-GCST007432"))
.
For generic applications, the run_TwoSampleMR()
function can be used.
f <- file.path(system.file(package="pQTLtools"),"tests","Ins.csv") exposure <- TwoSampleMR::format_data(read.csv(f)) caption4 <- "IL6R variant and diseases" knitr::kable(exposure, caption=paste(caption4,"(instruments)"),digits=3) f <- file.path(system.file(package="pQTLtools"),"tests","Out.csv") outcome <- TwoSampleMR::format_data(read.csv(f),type="outcome") pqtlMR(exposure, outcome, prefix="IL6R-") result <- read.delim("IL6R-result.txt") %>% dplyr::select(-id.exposure,-id.outcome) knitr::kable(result,caption=paste(caption4, "(result)"),digits=3) single <- read.delim("IL6R-single.txt") %>% dplyr::select(-id.exposure,-id.outcome,-samplesize) knitr::kable(subset(single,!grepl("All",SNP)), caption=paste(caption4, "(single)"),digits=3)
We carry on producing a forest plot.
IL6R <- single %>% dplyr::filter(grepl("^rs222",SNP)) %>% dplyr::select(outcome,b,se) %>% setNames(c("outcome","Effect","StdErr")) %>% dplyr::mutate(outcome=gsub("\\b(^[a-z])","\\U\\1",outcome,perl=TRUE), Effect=as.numeric(Effect),StdErr=as.numeric(StdErr)) gap::mr_forestplot(IL6R,colgap.forest.left="0.05cm", fontsize=14, leftcols=c("studlab"), leftlabs=c("Outcome"), plotwidth="3.5inch", sm="OR", rightcols=c("effect","ci","pval"), rightlabs=c("OR","95%CI","P"), digits=2, digits.pval=2, scientific.pval=TRUE, common=FALSE, random=FALSE, print.I2=FALSE, print.pval.Q=FALSE, print.tau2=FALSE, addrow=TRUE, backtransf=TRUE, at=c(1:3)*0.5, spacing=1.5, xlim=c(0.5,1.5)) invisible(sapply(c("harmonise","result","single"), function(x) unlink(paste0("IL6R-",x,".txt"))))
The documentation example is quoted here,
prot <- "MMP.10" type <- "cis" f <- paste0(prot,"-",type,".mrx") d <- read.table(file.path(system.file(package="pQTLtools"),"tests",f), header=TRUE) exposure <- TwoSampleMR::format_data(within(d,{P=10^logP}), phenotype_col="prot", snp_col="rsid", chr_col="Chromosome", pos_col="Posistion", effect_allele_col="Allele1", other_allele_col="Allele2", eaf_col="Freq1", beta_col="Effect", se_col="StdErr", pval_col="P", log_pval=FALSE, samplesize_col="N") clump <- exposure[sample(1:nrow(exposure),nrow(exposure)/80),] # TwoSampleMR::clump_data(exposure) outcome <- TwoSampleMR::extract_outcome_data(snps=clump$SNP,outcomes="ebi-a-GCST007432") harmonise <- TwoSampleMR::harmonise_data(clump,outcome) prefix <- paste(prot,type,sep="-") run_TwoSampleMR(harmonise, mr_plot="pQTLtools", prefix=prefix)
At time of running (12/4/2024), clump_data
fails to run so 1.25% variants is used for illustrative purpose.
The output is contained in individual .txt
files, together with the scatter, forest, funnel and leave-one-out plots.
caption5 <- "MMP.10 variants and FEV1" knitr::kable(read.delim(paste0(prefix,"-result.txt"),header=TRUE),caption=paste(caption5, "(result)"),digits=3) knitr::kable(read.delim(paste0(prefix,"-heterogeneity.txt"),header=TRUE),caption=paste(caption5,"(heterogeneity)"),digits=3) knitr::kable(read.delim(paste0(prefix,"-pleiotropy.txt"),header=TRUE),caption=paste(caption5,"(pleiotropy)"),digits=3) knitr::kable(read.delim(paste0(prefix,"-single.txt"),header=TRUE),caption=paste(caption5,"(single)"),digits=3) knitr::kable(read.delim(paste0(prefix,"-loo.txt"),header=TRUE),caption=paste(caption5,"(loo)"),digits=3) for (x in c("result","heterogeneity","pleiotropy","single","loo")) unlink(paste0(prefix,"-",x,".txt"))
This is illustrated with IL-12B.
efo <- read.delim(file.path(find.package("pQTLtools"),"tests","efo.txt")) d3 <- read.delim(file.path(find.package("pQTLtools"),"tests","IL.12B.txt")) %>% dplyr::mutate(MRBASEID=unlist(lapply(strsplit(outcome,"id:"),"[",2)),y=b) %>% dplyr::left_join(efo) %>% dplyr::mutate(trait=gsub("\\b(^[a-z])","\\U\\1",trait,perl=TRUE)) %>% dplyr::select(-outcome,-method) %>% dplyr::arrange(cistrans,desc(trait)) knitr::kable(dplyr::select(d3,MRBASEID,trait,cistrans,nsnp,b,se,pval) %>% dplyr::group_by(cistrans), caption="MR with IL-12B variants",digits=3) p <- ggplot2::ggplot(d3,aes(y = trait, x = y))+ ggplot2::theme_bw()+ ggplot2::geom_point()+ ggplot2::facet_wrap(~cistrans,ncol=3,scales="free_x")+ ggplot2::geom_segment(ggplot2::aes(x = b-1.96*se, xend = b+1.96*se, yend = trait))+ ggplot2::geom_vline(lty=2, ggplot2::aes(xintercept=0), colour = 'red')+ ggplot2::xlab("Effect size")+ ggplot2::ylab("") p
References @sun18 @suhre20 are included as EndNote libraries which is now part of pQTLdata package.
The function uniprot2ids
converts UniProt IDs to others.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.