#' Generate a locus plot
#'
#' Generate a locus-specific plot with multiple selectable tracks.
#' Users can also generate multiple zoomed in views of the plot at
#' multiple resolutions.
#' @param dataset_type Dataset type (e.g. "GWAS" or "eQTL").
#' @param color_r2 Whether to color data points (SNPs)
#' by how strongly they correlate with the lead SNP
#' (i.g. LD measured in terms of r2).
#' @param finemap_methods Fine-mapping methods to plot tracks for,
#' where the y-axis show the Posterior Probabilities (PP)
#' of each SNP being causal.
#' @param track_order The order in which tracks should appear
#' (from top to bottom).
#' @param track_heights The height of each track (from top to bottom).
#' @param plot_full_window Include a track with a Manhattan plot of the full
#' GWAS/eQTL locus (not just the zoomed-in portion).
#' @param dot_summary Include a dot-summary plot that highlights the
#' Lead, Credible Set, and Consensus SNPs.
#' @param qtl_suffixes If columns with QTL data is included in \code{dat},
#' you can indicate which columns those are with one or more string suffixes
#' (e.g. \code{qtl_suffixes=c(".eQTL1",".eQTL2")} to use the columns
#' "P.QTL1", "Effect.QTL1", "P.QTL2", "Effect.QTL2").
#' @param mean.PP Include a track showing mean Posterior Probabilities (PP)
#' averaged across all fine-mapping methods.
#' @param sig_cutoff Filters out SNPs to plot based on an (uncorrected)
#' p-value significance cutoff.
#' @param gene_track Include a track showing gene bodies.
#' @param max_transcripts Maximum number of transcripts per gene.
#' @param tx_biotypes Transcript biotypes to include in the gene model track.
#' By default (\code{NULL}), all transcript biotypes will be included.
#' See \link[echoplot]{get_tx_biotypes} for a full list of
#' all available biotypes
#' @param point_size Size of each data point.
#' @param point_alpha Opacity of each data point.
#' @param snp_group_lines Include vertical lines to help highlight
#' SNPs belonging to one or more of the following groups:
#' Lead, Credible Set, Consensus.
#' @param labels_subset Include colored shapes and RSID labels
#' to help highlight SNPs belonging to one or more of the following groups:
#' Lead, Credible Set, Consensus.
#' @param xtext Include x-axis title and text for each track
#' (not just the lower-most one).
#' @param show_legend_genes Show the legend for the \code{gene_track}.
#' @param zoom_exceptions_str Names of tracks to exclude when zooming.
#'
#' @param xgr_libnames Passed to \link[echoplot]{XGR_plot}.
#' Which XGR annotations to check overlap with.
#' For full list of libraries see
#' \href{http://XGR_r-forge.r-project.org/#annotations-at-the-genomic-region-level}{
#' here}.
#' Passed to the \code{RData.customised} argument in \link[XGR]{xRDataLoader}.
#' Examples:
#' \itemize{
#' \item{"ENCODE_TFBS_ClusteredV3_CellTypes"}
#' \item{"ENCODE_DNaseI_ClusteredV3_CellTypes"}
#' \item{"Broad_Histone"}
#' }
#' @param xgr_n_top Passed to \link[echoplot]{XGR_plot}.
#' Number of top annotations to be plotted
#' (passed to \link[echoannot]{XGR_filter_sources} and then
#' \link[echoannot]{XGR_filter_assays}).
#'
#' @param nott_epigenome Include tracks showing brain cell-type-specific
#' epigenomic data from
#' \href{https://doi.org/10.1126/science.aay0793}{Nott et al. (2019)}.
#' @param nott_regulatory_rects Include track generated by
#' \link[echoannot]{NOTT2019_epigenomic_histograms}.
#' @param nott_show_placseq Include track generated by
#' \link[echoannot]{NOTT2019_plac_seq_plot}.
#' @param nott_binwidth When including Nott et al. (2019)
#' epigenomic data in the track plots,
#' adjust the bin width of the histograms.
#' @param nott_bigwig_dir Instead of pulling Nott et al. (2019)
#' epigenomic data
#' from the \emph{UCSC Genome Browser}, use a set of local bigwig files.
#'
#' @param roadmap Find and plot annotations from Roadmap.
#' @param roadmap_n_top Passed to \link[echoplot]{ROADMAP_plot}.
#' Number of top annotations to be plotted
#' (passed to \link[echoannot]{ROADMAP_query}).
#' @param roadmap_query Only plot annotations from Roadmap whose
#' metadata contains a string or any items from a list of strings
#' (e.g. \code{"brain"} or \code{c("brain","liver","monocytes")}).
#' @param credset_thresh The minimum fine-mapped posterior probability
#' for a SNP to be considered part of a Credible Set.
#' For example, \code{credset_thresh=.95} means that all Credible Set SNPs
#' will be 95\% Credible Set SNPs.
#' @param facet_formula Formula to facet plots by.
#' See \link[ggplot2]{facet_grid} for details.
#' @param show_plot Print plot to screen.
#' @param save_plot Save plot as RDS file.
#' @param plot_format Format to save plot as
#' when saving with \link[ggplot2]{ggsave}.
#' @param save_RDS Save the tracks as an RDS file
#' (\emph{Warning}: These plots take up a lot disk space).
#' @param return_list Return a named list with each track as a separate plot
#' (default: \code{FALSE}). If \code{TRUE}, will return a merged plot using
#' \link[patchwork]{wrap_plots}.
#' @param verbose Print messages.
#'
#' @inheritParams transcript_model_track
#' @inheritParams echoLD::get_LD
#' @inheritParams echoLD::get_lead_r2
#' @inheritParams echoannot::get_window_limits
#' @inheritParams echoannot::NOTT2019_epigenomic_histograms
#' @inheritParams echodata::find_consensus_snps
#'
#' @export
#' @importFrom methods show
#' @importFrom echoLD get_lead_r2
#' @importFrom echodata find_consensus_snps fillNA_CS_PP
#' @importFrom echoannot NOTT2019_plac_seq_plot NOTT2019_epigenomic_histograms
#' @importFrom patchwork wrap_plots plot_annotation
#' @examples
#' dat1 <- echodata::BST1
#' LD_matrix <- echodata::BST1_LD_matrix
#' locus_dir <- file.path(tempdir(),echodata::locus_dir)
#' plt <- echoplot::plot_locus(dat = dat1,
#' locus_dir = locus_dir,
#' LD_matrix = LD_matrix,
#' show_plot = TRUE)
plot_locus <- function(dat,
locus_dir,
LD_matrix=NULL,
LD_reference=NULL,
facet_formula="Method~.",
dataset_type="GWAS",
color_r2=TRUE,
finemap_methods=c("ABF","FINEMAP",
"SUSIE","POLYFUN_SUSIE"),
track_order= NULL,
track_heights=NULL,
plot_full_window=TRUE,
dot_summary=FALSE,
qtl_suffixes=NULL,
mean.PP=TRUE,
credset_thresh=.95,
consensus_thresh=2,
sig_cutoff=5e-8,
gene_track=TRUE,
tx_biotypes=NULL,
point_size=1,
point_alpha=.6,
density_adjust=0.2,
snp_group_lines=c("Lead","UCS","Consensus"),
labels_subset=c("Lead","CS","Consensus"),
xtext=FALSE,
show_legend_genes=TRUE,
xgr_libnames=NULL,
xgr_n_top=5,
roadmap=FALSE,
roadmap_query=NULL,
roadmap_n_top=7,
zoom_exceptions_str="*full window$|zoom_polygon",
nott_epigenome=FALSE,
nott_regulatory_rects=TRUE,
nott_show_placseq=TRUE,
nott_binwidth=200,
nott_bigwig_dir=NULL,
save_plot=FALSE,
show_plot=TRUE,
genomic_units="Mb",
strip.text.y.angle=0,
max_transcripts=1,
zoom=c("1x"),
dpi=300,
height=12,
width=10,
plot_format="png",
save_RDS=FALSE,
return_list=FALSE,
conda_env="echoR_mini",
nThread=1,
verbose=TRUE){
# devoptera::args2vars(plot_locus)
requireNamespace("ggplot2")
requireNamespace("patchwork")
locus <- basename(locus_dir)
messager("+-------- Locus Plot: ",locus,"--------+",v=verbose)
dir.create(locus_dir, showWarnings = FALSE, recursive = TRUE)
#### Set up data ####
dat <- echodata::find_consensus_snps(
dat = dat,
credset_thresh = credset_thresh,
consensus_thresh = consensus_thresh,
verbose = FALSE)
dat <- echodata::fillNA_CS_PP(dat = dat) |> data.table::data.table()
dat <- echoannot::add_mb(dat = dat)
available_methods <- gsub("\\.PP$","",grep("*\\.PP$",colnames(dat),
value = TRUE)) |> unique()
finemap_methods <- unique(
finemap_methods[finemap_methods %in% available_methods]
)
if(mean.PP){finemap_methods <- unique(c(finemap_methods, "mean"))}
# Add LD into the dat
if(!is.null(LD_matrix)){
dat <- echoLD::get_lead_r2(
dat = dat,
LD_matrix = LD_matrix,
LD_format = "guess",
verbose = verbose)
}
# Begin constructing tracks
TRKS <- NULL;
# Track: Summary
if(isTRUE(dot_summary)){
TRKS[["Summary"]] <- dot_summary_plot(dat = dat,
credset_thresh = credset_thresh,
show_plot = FALSE,
verbose = verbose)
}
#### Track: Main (GWAS) frozen ####
full_window_name <- paste(dataset_type,"full window")
if(isTRUE(plot_full_window)){
messager("++ echoplot::",dataset_type,"full window track", v=verbose)
TRKS[[full_window_name]] <- snp_track_merged(
dat = dat,
yvar = "-log10(P)",
sig_cutoff = sig_cutoff,
genomic_units = genomic_units,
labels_subset = NULL,
xtext = TRUE,
show.legend = FALSE,
dataset_type = gsub(" ","\n",full_window_name),
strip.text.y.angle = strip.text.y.angle,
facet_formula = facet_formula,
verbose = verbose)
}
#### Track: Main (GWAS) ####
messager("++ echoplot::",dataset_type,"track", v=verbose)
TRKS[[dataset_type]] <- snp_track_merged(
dat = dat,
yvar = "-log10(P)",
genomic_units = genomic_units,
sig_cutoff = sig_cutoff,
labels_subset = NULL,
xtext = xtext,
dataset_type = "GWAS",
strip.text.y.angle = strip.text.y.angle,
facet_formula = facet_formula,
verbose = verbose)
#### Track: QTL ####
for (qtl in qtl_suffixes){
messager("++ echoplot::",qtl,"track", v=verbose)
pval_col <- guess_pvalue_col(dat, qtl_suffix = qtl)
if(length(pval_col)>0) next
TRKS[[qtl]] <- snp_track_merged(
dat = dat,
yvar = paste0("-log10(",pval_col,")"),
genomic_units = genomic_units,
sig_cutoff = sig_cutoff,
labels_subset = NULL,
xtext = xtext,
dataset_type = qtl,
strip.text.y.angle = strip.text.y.angle,
facet_formula = facet_formula,
verbose = verbose)
}
#### Track: Fine-mapping ####
messager("++ echoplot:: Merged fine-mapping track", v=verbose)
TRKS[["Fine-mapping"]] <- snp_track_merged(
dat = dat,
yvar = "PP",
genomic_units = genomic_units,
sig_cutoff = sig_cutoff,
absolute_labels = FALSE,
label_type = "rsid_only",
labels_subset = labels_subset,
show.legend = FALSE,
xtext = xtext,
strip.text.y.angle = strip.text.y.angle,
facet_formula = facet_formula,
verbose = verbose)
#### Track: Gene Models ####
# DB tutorial: https://rdrr.io/bioc/ensembldb/f/vignettes/ensembldb.Rmd
if(isTRUE(gene_track)){
messager("++ echoplot:: Adding Gene model track.",v=verbose)
TRKS[["Genes"]] <- transcript_model_track(
dat = dat,
show.legend = show_legend_genes,
xtext = xtext,
max_transcripts = max_transcripts,
tx_biotypes = tx_biotypes,
expand_x_mult=NULL,
verbose=verbose)
}
#### Track: XGR ####
palettes <- get_palettes(names_only = TRUE)
for(i in seq_len(length(xgr_libnames))){
lib_name <- xgr_libnames[[i]]
xgr_out <- XGR_plot(dat = dat,
locus_dir = locus_dir,
lib_name = lib_name,
palette = palettes[[i]],
n_top = xgr_n_top,
adjust = density_adjust,
nThread = nThread,
verbose = verbose)
TRKS[[lib_name]] <- xgr_out$plot
}
#### Track: Roadmap ####
if(isTRUE(roadmap)){
roadmap_out <- ROADMAP_plot(dat = dat,
roadmap_query = roadmap_query,
locus_dir = locus_dir,
n_top = roadmap_n_top,
adjust = density_adjust,
conda_env = conda_env,
show_plot = FALSE,
nThread = nThread,
verbose = verbose)
TRKS[["Roadmap\nchromatin marks\ncell-types"]] <- roadmap_out$plot
}
#### Track: NOTT2019 ####
if(isTRUE(nott_epigenome)){
#### Track: NOTT2019 histogram ####
track.Nott_histo <- echoannot::NOTT2019_epigenomic_histograms(
dat = dat,
locus_dir = locus_dir,
geom = "histogram",
plot_formula = "Cell_type ~.",
show_plot=FALSE,
save_plot=FALSE,
full_data=TRUE,
return_assay_track=TRUE,
binwidth=nott_binwidth,
density_adjust = density_adjust,
save_annot=TRUE,
as_ggplot=TRUE,
strip.text.y.angle = strip.text.y.angle,
xtext=xtext,
nThread=nThread,
verbose=verbose)
TRKS[["Nott (2019)\nread densities"]] <- track.Nott_histo$plot +
ggplot2::labs(y="Nott (2019)\nread densities")
#### Track: NOTT_2019 PLAC-seq ####
if(isTRUE(nott_show_placseq)){
track.Nott_plac <- echoannot::NOTT2019_plac_seq_plot(
dat = dat,
locus_dir=locus_dir,
title=locus,
show_regulatory_rects=nott_regulatory_rects,
return_interaction_track=TRUE,
save_annot=TRUE,
strip.text.y.angle = strip.text.y.angle,
xtext=xtext,
return_as = "ggplot",
show_plot = FALSE,
save_plot = FALSE,
nThread=nThread,
verbose=verbose)
TRKS[["Nott (2019)\nPLAC-seq"]] <- track.Nott_plac$PLACseq +
ggplot2::labs(y="Nott (2019)\nPLAC-seq")
}
}
#### Add vertical lines ####
if(!is.null(snp_group_lines)){
TRKS <- add_multitrack_lines(TRKS = TRKS,
dat = dat,
snp_groups = snp_group_lines,
line_alpha = .8,
remove_duplicated_UCS_Consensus = TRUE,
verbose = verbose)
}
##### Iterate over different window sizes #####
plot_list <- list()
for(pz in zoom){
# try() Allows (X11) errors to occur and still finish the loop
tryCatch({
messager("+>+>+>+>+ zoom = ",pz," +<+<+<+<+", v=verbose)
TRKS_zoom <- TRKS
window_suffix <- get_window_suffix(dat=dat,
zoom=pz)
if((plot_full_window) & (!window_suffix %in% c("1x","all"))){
#### Add zoom polygon ####
TRKS_zoom[["zoom_polygon"]] <- zoom_polygon(
dat = dat,
genomic_units = genomic_units,
zoom = pz)
TRKS_zoom[[full_window_name]] <- zoom_highlight(
gg = TRKS_zoom[[full_window_name]],
dat = dat,
zoom = pz)
}
#### Remove extra full_window plot #####
# The steps MUST come before reorder_tracks
if(window_suffix=="1x"){
# This track becomes redundant when you don't zoom in at all.
messager("+ echoplot:: Removing",full_window_name,
"track @ zoom=1x",v=verbose)
TRKS_zoom[[full_window_name]] <- NULL
}
# WARNING:: The order of these adjustment functions matters!
## Some of them reset the parameters of others
#### Remove plots margins to save space ####
TRKS_zoom <- remove_margins(TRKS = TRKS_zoom,
dat = dat,
verbose = verbose)
#### Reorder tracks ####
TRKS_zoom <- reorder_tracks(TRKS = TRKS_zoom,
track_order = track_order,
verbose = verbose)
#### Make sure last plot has xtext ####
TRKS_zoom <- add_back_xtext(TRKS = TRKS_zoom,
verbose = verbose)
#### Define zoom limits ####
TRKS_zoom <- set_window_limits(TRKS = TRKS_zoom,
dat = dat,
zoom = pz,
exceptions_str=zoom_exceptions_str,
verbose = verbose)
#### Construct title ####
n_snps <- if(dataset_type %in% names(TRKS_zoom)){
nrow(ggplot2::ggplot_build(
TRKS_zoom[[dataset_type]])$data[[2]])
} else {NULL}
title_text <- paste("Locus:",basename(locus_dir),
paste0(
" (",
"SNPs=",formatC(n_snps,big.mark = ","),
"; ",
"zoom=",window_suffix,
")"
))
#### Check track heights ####
heights <- check_track_heights(TRKS = TRKS_zoom,
track_heights = track_heights,
verbose = verbose)
#### Fuse all tracks ####
TRKS_FINAL <- patchwork::wrap_plots(TRKS_zoom,
heights = heights,
ncol = 1) +
patchwork::plot_annotation(title = title_text)
#### Add plot to list of zoomed plots ####
plot_list[[pz]] <- if(return_list) TRKS_zoom else TRKS_FINAL
#### Save plot ####
plot_path <- file.path(
locus_dir,
paste("multiview",locus,LD_reference,
window_suffix,plot_format,sep="."))
if(save_plot){
messager("+ echoplot:: Saving plot ==>",plot_path)
ggplot2::ggsave(filename = plot_path,
plot = TRKS_FINAL,
height = height,
width = width,
dpi = dpi,
bg = "transparent")
}
#### Save ggplot ####
# Only save one zoom of these since these files are very large
if(save_RDS & (pz==zoom[1])){
trk_paths <- save_tracks(locus_dir=locus_dir,
TRKS_zoom=TRKS_zoom,
LD_reference = LD_reference,
window_suffix=window_suffix,
verbose=verbose)
}
#### Show the plot ####
if(show_plot){methods::show(TRKS_FINAL)}
}, error=function(e){message(e);NULL})
} # End zoom loop
return(plot_list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.