R/plot_locus.R

Defines functions plot_locus

Documented in plot_locus

#' Generate a locus plot
#'
#' Generate a locus-specific plot with multiple selectable tracks.
#' Users can also generate multiple zoomed in views of the plot at
#'  multiple resolutions.
#' @param dataset_type Dataset type (e.g. "GWAS" or "eQTL").
#' @param color_r2 Whether to color data points (SNPs)
#'  by how strongly they correlate with the lead SNP 
#'  (i.g. LD measured in terms of r2). 
#' @param finemap_methods Fine-mapping methods to plot tracks for, 
#' where the y-axis show the Posterior Probabilities (PP) 
#' of each SNP being causal.
#' @param track_order The order in which tracks should appear
#'  (from top to bottom). 
#' @param track_heights The height of each track (from top to bottom).
#' @param plot_full_window Include a track with a Manhattan plot of the full
#' GWAS/eQTL locus (not just the zoomed-in portion). 
#' @param dot_summary Include a dot-summary plot that highlights the
#'  Lead, Credible Set, and Consensus SNPs.
#' @param qtl_suffixes If columns with QTL data is included in \code{dat},
#' you can indicate which columns those are with one or more string suffixes
#' (e.g. \code{qtl_suffixes=c(".eQTL1",".eQTL2")} to use the columns 
#' "P.QTL1", "Effect.QTL1", "P.QTL2", "Effect.QTL2").
#' @param mean.PP Include a track showing mean Posterior Probabilities (PP) 
#' averaged across all fine-mapping methods.
#' @param sig_cutoff Filters out SNPs to plot based on an (uncorrected)
#' p-value significance cutoff. 
#' @param gene_track Include a track showing gene bodies.
#' @param max_transcripts Maximum number of transcripts per gene.
#' @param tx_biotypes Transcript biotypes to include in the gene model track. 
#' By default (\code{NULL}), all transcript biotypes will be included.
#' See \link[echoplot]{get_tx_biotypes} for a full list of 
#' all available biotypes
#' @param point_size Size of each data point.
#' @param point_alpha Opacity of each data point.
#' @param snp_group_lines Include vertical lines to help highlight 
#' SNPs belonging to one or more of the following groups:
#'  Lead, Credible Set, Consensus.
#' @param labels_subset Include colored shapes and RSID labels
#' to help highlight SNPs belonging to one or more of the following groups:
#'  Lead, Credible Set, Consensus.
#' @param xtext Include x-axis title and text for each track
#' (not just the lower-most one). 
#' @param show_legend_genes Show the legend for the \code{gene_track}. 
#' @param zoom_exceptions_str Names of tracks to exclude when zooming.
#' 
#' @param xgr_libnames Passed to \link[echoplot]{XGR_plot}.
#' Which XGR annotations to check overlap with.
#' For full list of libraries see
#' \href{http://XGR_r-forge.r-project.org/#annotations-at-the-genomic-region-level}{
#'  here}.
#'  Passed to the \code{RData.customised} argument in \link[XGR]{xRDataLoader}.
#'  Examples:
#'  \itemize{
#'  \item{"ENCODE_TFBS_ClusteredV3_CellTypes"}
#'  \item{"ENCODE_DNaseI_ClusteredV3_CellTypes"}
#'  \item{"Broad_Histone"}
#'  }
#' @param xgr_n_top Passed to \link[echoplot]{XGR_plot}.
#' Number of top annotations to be plotted
#' (passed to \link[echoannot]{XGR_filter_sources} and then 
#' \link[echoannot]{XGR_filter_assays}).
#' 
#' @param nott_epigenome Include tracks showing brain cell-type-specific 
#' epigenomic data from 
#' \href{https://doi.org/10.1126/science.aay0793}{Nott et al. (2019)}. 
#' @param nott_regulatory_rects Include track generated by 
#' \link[echoannot]{NOTT2019_epigenomic_histograms}.
#' @param nott_show_placseq Include track generated by  
#' \link[echoannot]{NOTT2019_plac_seq_plot}.
#' @param nott_binwidth When including Nott et al. (2019)
#' epigenomic data in the track plots,
#' adjust the bin width of the histograms.
#' @param nott_bigwig_dir Instead of pulling Nott et al. (2019)
#'  epigenomic data
#' from the \emph{UCSC Genome Browser}, use a set of local bigwig files.
#' 
#' @param roadmap Find and plot annotations from Roadmap.
#' @param roadmap_n_top Passed to \link[echoplot]{ROADMAP_plot}.
#' Number of top annotations to be plotted
#' (passed to \link[echoannot]{ROADMAP_query}). 
#' @param roadmap_query Only plot annotations from Roadmap whose
#' metadata contains a string or any items from  a list of strings
#' (e.g. \code{"brain"} or \code{c("brain","liver","monocytes")}).
#' @param credset_thresh The minimum fine-mapped posterior probability
#'  for a SNP to be considered part of a Credible Set.
#' For example, \code{credset_thresh=.95} means that all Credible Set SNPs
#' will be 95\% Credible Set SNPs.
#' @param facet_formula Formula to facet plots by.
#'  See \link[ggplot2]{facet_grid} for details.
#' @param show_plot Print plot to screen.
#' @param save_plot Save plot as RDS file.
#' @param plot_format Format to save plot as 
#' when saving with \link[ggplot2]{ggsave}. 
#' @param save_RDS Save the tracks as an RDS file 
#' (\emph{Warning}: These plots take up a lot disk space). 
#' @param return_list Return a named list with each track as a separate plot
#'  (default: \code{FALSE}). If \code{TRUE}, will return a merged plot using
#'  \link[patchwork]{wrap_plots}. 
#' @param verbose Print messages.
#' 
#' @inheritParams transcript_model_track
#' @inheritParams echoLD::get_LD
#' @inheritParams echoLD::get_lead_r2
#' @inheritParams echoannot::get_window_limits
#' @inheritParams echoannot::NOTT2019_epigenomic_histograms
#' @inheritParams echodata::find_consensus_snps
#' 
#' @export  
#' @importFrom methods show
#' @importFrom echoLD get_lead_r2
#' @importFrom echodata find_consensus_snps fillNA_CS_PP 
#' @importFrom echoannot NOTT2019_plac_seq_plot NOTT2019_epigenomic_histograms
#' @importFrom patchwork wrap_plots plot_annotation
#' @examples
#' dat1 <- echodata::BST1
#' LD_matrix <- echodata::BST1_LD_matrix
#' locus_dir <- file.path(tempdir(),echodata::locus_dir)
#' plt <- echoplot::plot_locus(dat = dat1,
#'                             locus_dir = locus_dir,
#'                             LD_matrix = LD_matrix,
#'                             show_plot = TRUE)
plot_locus <- function(dat,
                       locus_dir,
                       LD_matrix=NULL,
                       LD_reference=NULL,
                       facet_formula="Method~.",
                       dataset_type="GWAS",
                       color_r2=TRUE,
                       finemap_methods=c("ABF","FINEMAP",
                                         "SUSIE","POLYFUN_SUSIE"),
                       track_order= NULL,
                       track_heights=NULL,
                       plot_full_window=TRUE,
                       dot_summary=FALSE,
                       qtl_suffixes=NULL,
                       mean.PP=TRUE,
                       credset_thresh=.95,
                       consensus_thresh=2,
                       sig_cutoff=5e-8,
                       gene_track=TRUE,
                       tx_biotypes=NULL,
                       point_size=1,
                       point_alpha=.6,
                       density_adjust=0.2,
                       snp_group_lines=c("Lead","UCS","Consensus"),
                       labels_subset=c("Lead","CS","Consensus"),
                       xtext=FALSE,
                       show_legend_genes=TRUE,
                       
                       xgr_libnames=NULL,
                       xgr_n_top=5,
                       
                       roadmap=FALSE,
                       roadmap_query=NULL,
                       roadmap_n_top=7,
                       zoom_exceptions_str="*full window$|zoom_polygon",
                       
                       nott_epigenome=FALSE,
                       nott_regulatory_rects=TRUE,
                       nott_show_placseq=TRUE,
                       nott_binwidth=200,
                       nott_bigwig_dir=NULL,
                       
                       save_plot=FALSE,
                       show_plot=TRUE,
                       genomic_units="Mb",
                       strip.text.y.angle=0,
                       max_transcripts=1,
                       zoom=c("1x"),
                       dpi=300,
                       height=12,
                       width=10,
                       plot_format="png",
                       save_RDS=FALSE,
                       return_list=FALSE,
                       conda_env="echoR_mini",
                       nThread=1,
                       verbose=TRUE){
    # devoptera::args2vars(plot_locus)
     
    requireNamespace("ggplot2")
    requireNamespace("patchwork")
    
    locus <- basename(locus_dir)
    messager("+-------- Locus Plot: ",locus,"--------+",v=verbose)
    dir.create(locus_dir, showWarnings = FALSE, recursive = TRUE)
    #### Set up data ####
    dat <- echodata::find_consensus_snps(
        dat = dat,
        credset_thresh = credset_thresh,
        consensus_thresh = consensus_thresh,
        verbose = FALSE)
    dat <- echodata::fillNA_CS_PP(dat = dat) |> data.table::data.table()
    dat <- echoannot::add_mb(dat = dat)
    
    available_methods <- gsub("\\.PP$","",grep("*\\.PP$",colnames(dat),
                                               value = TRUE)) |> unique()
    finemap_methods <- unique(
        finemap_methods[finemap_methods %in% available_methods]
    )
    if(mean.PP){finemap_methods <- unique(c(finemap_methods, "mean"))}
    # Add LD into the dat
    if(!is.null(LD_matrix)){
        dat <- echoLD::get_lead_r2(
            dat = dat,
            LD_matrix = LD_matrix,
            LD_format = "guess",
            verbose = verbose)
    }
    # Begin constructing tracks
    TRKS <- NULL;
    # Track: Summary
    if(isTRUE(dot_summary)){
        TRKS[["Summary"]] <- dot_summary_plot(dat = dat,
                                              credset_thresh = credset_thresh,
                                              show_plot = FALSE,
                                              verbose = verbose)
    }
    ####  Track: Main (GWAS) frozen ####
    full_window_name <- paste(dataset_type,"full window")
    if(isTRUE(plot_full_window)){
        messager("++ echoplot::",dataset_type,"full window track", v=verbose)
        TRKS[[full_window_name]] <- snp_track_merged(
            dat = dat,
              yvar = "-log10(P)",
              sig_cutoff = sig_cutoff,
              genomic_units = genomic_units,
              labels_subset = NULL,
              xtext = TRUE,
              show.legend = FALSE, 
              dataset_type = gsub(" ","\n",full_window_name),
              strip.text.y.angle = strip.text.y.angle,
              facet_formula = facet_formula,
              verbose = verbose) 
    }
    
    ####  Track: Main (GWAS) ####
    messager("++ echoplot::",dataset_type,"track", v=verbose)
    TRKS[[dataset_type]] <- snp_track_merged(
        dat = dat,
          yvar = "-log10(P)",
          genomic_units = genomic_units,
          sig_cutoff = sig_cutoff,
          labels_subset = NULL,
          xtext = xtext,
          dataset_type = "GWAS",
          strip.text.y.angle = strip.text.y.angle,
          facet_formula = facet_formula,
          verbose = verbose) 
    
    #### Track: QTL ####
    for (qtl in qtl_suffixes){
        messager("++ echoplot::",qtl,"track", v=verbose)
        pval_col <- guess_pvalue_col(dat, qtl_suffix = qtl)
        if(length(pval_col)>0) next
        TRKS[[qtl]]  <- snp_track_merged(
            dat = dat,
              yvar = paste0("-log10(",pval_col,")"),
              genomic_units = genomic_units,
              sig_cutoff = sig_cutoff,
              labels_subset = NULL,
              xtext = xtext,
              dataset_type = qtl,
              strip.text.y.angle = strip.text.y.angle,
              facet_formula = facet_formula,
              verbose = verbose)
    }
    #### Track: Fine-mapping ####
    messager("++ echoplot:: Merged fine-mapping track", v=verbose)
    TRKS[["Fine-mapping"]] <- snp_track_merged(
        dat = dat,
        yvar = "PP",
        genomic_units = genomic_units,
        sig_cutoff = sig_cutoff,
        absolute_labels = FALSE,
        label_type = "rsid_only",
        labels_subset = labels_subset,
        show.legend = FALSE,
        xtext = xtext,
        strip.text.y.angle = strip.text.y.angle,
        facet_formula = facet_formula,
        verbose = verbose)
    #### Track: Gene Models ####
    # DB tutorial: https://rdrr.io/bioc/ensembldb/f/vignettes/ensembldb.Rmd
    if(isTRUE(gene_track)){
        messager("++ echoplot:: Adding Gene model track.",v=verbose) 
        TRKS[["Genes"]] <- transcript_model_track(
            dat = dat,
            show.legend = show_legend_genes,
            xtext = xtext,
            max_transcripts = max_transcripts,
            tx_biotypes = tx_biotypes,
            expand_x_mult=NULL,
            verbose=verbose) 
    } 
    #### Track: XGR #### 
    palettes <- get_palettes(names_only = TRUE)
    for(i in seq_len(length(xgr_libnames))){
        lib_name <- xgr_libnames[[i]]
        xgr_out <-  XGR_plot(dat = dat, 
                             locus_dir = locus_dir,
                             lib_name = lib_name, 
                             palette = palettes[[i]],
                             n_top = xgr_n_top,
                             adjust = density_adjust,
                             nThread = nThread,
                             verbose = verbose) 
        TRKS[[lib_name]] <- xgr_out$plot
    } 
    #### Track: Roadmap #### 
    if(isTRUE(roadmap)){
        roadmap_out <- ROADMAP_plot(dat = dat, 
                                    roadmap_query = roadmap_query, 
                                    locus_dir = locus_dir, 
                                    n_top = roadmap_n_top,
                                    adjust = density_adjust,
                                    conda_env = conda_env,
                                    show_plot = FALSE,
                                    nThread = nThread,
                                    verbose = verbose)
        TRKS[["Roadmap\nchromatin marks\ncell-types"]] <- roadmap_out$plot
    }
    #### Track: NOTT2019 ####
    if(isTRUE(nott_epigenome)){
        #### Track: NOTT2019 histogram  #### 
        track.Nott_histo <- echoannot::NOTT2019_epigenomic_histograms(
            dat = dat,
            locus_dir = locus_dir,
            geom = "histogram",
            plot_formula = "Cell_type ~.",
            show_plot=FALSE,
            save_plot=FALSE,
            full_data=TRUE,
            return_assay_track=TRUE,
            binwidth=nott_binwidth, 
            density_adjust = density_adjust,
            save_annot=TRUE,
            as_ggplot=TRUE,
            strip.text.y.angle = strip.text.y.angle,
            xtext=xtext,
            nThread=nThread,
            verbose=verbose)
        TRKS[["Nott (2019)\nread densities"]] <- track.Nott_histo$plot +
            ggplot2::labs(y="Nott (2019)\nread densities") 
        #### Track: NOTT_2019 PLAC-seq  ####
        if(isTRUE(nott_show_placseq)){  
                track.Nott_plac <- echoannot::NOTT2019_plac_seq_plot(
                    dat = dat,
                    locus_dir=locus_dir,
                    title=locus,
                    show_regulatory_rects=nott_regulatory_rects,
                    return_interaction_track=TRUE,
                    save_annot=TRUE,
                    strip.text.y.angle = strip.text.y.angle,
                    xtext=xtext,
                    return_as = "ggplot",
                    show_plot = FALSE,
                    save_plot = FALSE,
                    nThread=nThread,
                    verbose=verbose)
                TRKS[["Nott (2019)\nPLAC-seq"]] <- track.Nott_plac$PLACseq +
                    ggplot2::labs(y="Nott (2019)\nPLAC-seq") 
        }
    } 
    #### Add vertical lines  ####
    if(!is.null(snp_group_lines)){
        TRKS <- add_multitrack_lines(TRKS = TRKS,
                                      dat = dat,
                                      snp_groups = snp_group_lines,
                                      line_alpha = .8,
                                      remove_duplicated_UCS_Consensus = TRUE,
                                      verbose = verbose)
    } 
    ##### Iterate over different window sizes #####
    plot_list <- list()
    for(pz in zoom){
        # try() Allows (X11) errors to occur and still finish the loop
        tryCatch({
            messager("+>+>+>+>+ zoom = ",pz," +<+<+<+<+", v=verbose)
            TRKS_zoom <- TRKS
            window_suffix <- get_window_suffix(dat=dat,
                                                    zoom=pz)
            if((plot_full_window) & (!window_suffix %in% c("1x","all"))){
                #### Add zoom polygon ####
                TRKS_zoom[["zoom_polygon"]] <- zoom_polygon(
                    dat = dat,
                     genomic_units = genomic_units,
                     zoom = pz)
                TRKS_zoom[[full_window_name]] <- zoom_highlight(
                    gg = TRKS_zoom[[full_window_name]],
                     dat = dat,
                     zoom = pz)
            }
            #### Remove extra full_window plot #####
            # The steps MUST come before reorder_tracks
            if(window_suffix=="1x"){
                # This track becomes redundant when you don't zoom in at all.
                messager("+ echoplot:: Removing",full_window_name,
                         "track @ zoom=1x",v=verbose)
                TRKS_zoom[[full_window_name]] <- NULL
            }
            
            # WARNING:: The order of these adjustment functions matters!
            ## Some of them reset the parameters of others
            
            #### Remove plots margins to save space ####
            TRKS_zoom <- remove_margins(TRKS = TRKS_zoom,
                                         dat = dat,
                                         verbose = verbose)
            #### Reorder tracks ####
            TRKS_zoom <- reorder_tracks(TRKS = TRKS_zoom,
                                         track_order = track_order,
                                         verbose = verbose)
            #### Make sure last plot has xtext ####
            TRKS_zoom <- add_back_xtext(TRKS = TRKS_zoom,
                                         verbose = verbose)
            #### Define zoom limits ####
            TRKS_zoom <- set_window_limits(TRKS = TRKS_zoom,
                                            dat = dat,
                                            zoom = pz,
                                            exceptions_str=zoom_exceptions_str,
                                            verbose = verbose)
            #### Construct title ####
            n_snps <- if(dataset_type %in% names(TRKS_zoom)){
                nrow(ggplot2::ggplot_build(
                    TRKS_zoom[[dataset_type]])$data[[2]])
            } else {NULL}
            title_text <- paste("Locus:",basename(locus_dir),
                                 paste0(
                                     "  (",
                                     "SNPs=",formatC(n_snps,big.mark = ","),
                                     "; ",
                                     "zoom=",window_suffix,
                                     ")"
                                 ))
            #### Check track heights ####
            heights <- check_track_heights(TRKS = TRKS_zoom,
                                                track_heights = track_heights,
                                                verbose = verbose)
            #### Fuse all tracks ####
            TRKS_FINAL <- patchwork::wrap_plots(TRKS_zoom,
                                                heights = heights,
                                                ncol = 1) +
                patchwork::plot_annotation(title = title_text)
            
            #### Add plot to list of zoomed plots ####
            plot_list[[pz]] <- if(return_list) TRKS_zoom else TRKS_FINAL
            
            #### Save plot ####
            plot_path <- file.path(
                locus_dir,
                paste("multiview",locus,LD_reference,
                      window_suffix,plot_format,sep="."))
            if(save_plot){
                messager("+ echoplot:: Saving plot ==>",plot_path)
                ggplot2::ggsave(filename = plot_path,
                                plot = TRKS_FINAL,
                                height = height,
                                width = width,
                                dpi = dpi,
                                bg = "transparent")
            }
            
            #### Save ggplot ####
            # Only save one zoom of these since these files are very large
            if(save_RDS & (pz==zoom[1])){
                trk_paths <- save_tracks(locus_dir=locus_dir,
                                         TRKS_zoom=TRKS_zoom,
                                         LD_reference = LD_reference,
                                         window_suffix=window_suffix,
                                         verbose=verbose)
            }
            #### Show the plot ####
            if(show_plot){methods::show(TRKS_FINAL)}
        }, error=function(e){message(e);NULL})
    } # End zoom loop
    return(plot_list)
}
RajLabMSSM/echoplot documentation built on Oct. 24, 2023, 2:41 a.m.