R/plots.R

Defines functions manplot

Documented in manplot

#' Manhattan Plot for GWAS Mapping Data
#'
#' \code{manplot} generates a manhattan plot using \code{ggplot2}
#'
#'
#' @param plot_df the output from the \code{gwas_mappings} function.
#' @param bf_line_color Set color of bonferroni line.
#' @return Ouput is a ggplot object facetted by chromosome. SNPs above bonferroni corrected p-value (gray line) are colored blue.
#' Confidence interval for a given peak is highlighted in red.
#' @export


manplot <- function(plot_df, bf_line_color = "gray") {
    plot_traits <- unique(plot_df$trait)
    plots <- lapply(plot_traits, function(i) {
        plot_df %>%
        dplyr::filter(trait == i) %>%
        dplyr::distinct(marker, .keep_all = T) %>%
        ggplot2::ggplot(.) +
        ggplot2::aes(x = POS/1e6, y = log10p) +
        ggplot2::scale_color_manual(values = c("black","red","blue")) +
        ggplot2::geom_rect(ggplot2::aes(xmin = startPOS/1e6,
                               xmax = endPOS/1e6,
                               ymin = 0,
                               ymax = Inf,
                               fill = "blue"),
                           color = "blue",fill = "cyan",linetype = 2,
                           alpha=.3)+
        ggplot2::geom_hline(ggplot2::aes(yintercept = BF),
                            color = bf_line_color,
                            alpha = .75,
                            size = 1) +
        ggplot2::geom_point( ggplot2::aes(color= factor(aboveBF)) ) +
        ggplot2::facet_grid( . ~ CHROM, scales = "free_x" , space = "free_x") +
        ggplot2::theme_bw() +
        ggplot2::theme(axis.text.x = ggplot2::element_text(size=14, face="bold", color="black"),
                       axis.text.y = ggplot2::element_text(size=16, face="bold", color="black"),
                       axis.title.x = ggplot2::element_text(size=20, face="bold", color="black", vjust=-.3),
                       axis.title.y = ggplot2::element_text(size=20, face="bold", color="black"),
                       strip.text.x = ggplot2::element_text(size=20, face="bold", color="black"),
                       strip.text.y = ggplot2::element_text(size=16, face="bold", color="black"),
                       plot.title = ggplot2::element_text(size=24, face="bold", vjust = 1),
                       legend.position="none",
                       panel.background = ggplot2::element_rect( color="black",size=1.2),
                       strip.background = ggplot2::element_rect(color = "black", size = 1.2)) +
        ggplot2::labs(x = "Genomic Position (Mb)",
                      y = expression(-log[10](p)),
                      title = i)
    })
    plots
  }



#' PxG plot
#'
#' \code{pxg_plot} generates a boxplot of phenotypes split by genotype at QTL peak position using \code{ggplot2}
#'
#'
#' @param plot_df the output from the \code{gwas_mappings} function.
#' @param loc custom location to output the pxg plot. Specified as CHROM:POS (e.g. II:1023423)
#' @param use_base Show base at position rather than REF/ALT.
#' @param color_strains character vector containing strains to color in plot. Default is c("N2","CB4856")
#' @return Ouput is a ggplot object facetted by peak SNP (if there are multiple peaks in a mapping).
#' Genotypes are encoded as REF or ALT and are on the x-axis. Phenotypes are on y-axis.
#' @export

pxg_plot <- function(plot_df, loc = NA, use_base = F, color_strains = c("N2","CB4856")){
  plot_traits <- unique(plot_df$trait)
  plots <- lapply(plot_traits, function(x) {
     plot_peak <- plot_df %>%
      na.omit() %>%
      dplyr::filter(trait == x) %>%
      dplyr::distinct(strain, value, peakPOS, .keep_all = T) %>%
      dplyr::select(strain, value, CHROM, POS = peakPOS, -allele) %>%
      dplyr::mutate(chr_pos = paste(CHROM, POS, sep=":"))

     if (is.na(loc)) {
      loc <- plot_peak %>% dplyr::select(CHROM, POS) %>%
        dplyr::distinct(.keep_all = T) %>%
        dplyr::transmute(chr_pos = paste0(CHROM, ":",POS))
     }

      to_plot <- snpeff(loc[1], severity = "ALL", elements = "ALL") %>%
      dplyr::select(strain, CHROM, POS, FILTER, FT, GT, REF, ALT) %>%
      dplyr::distinct( .keep_all = T) %>%
      dplyr::mutate(chr_pos = paste0(CHROM, ":", POS))

      to_plot <- dplyr::left_join(to_plot, plot_peak) %>%
          dplyr::mutate(chr_pos = paste(CHROM, POS, sep=":"))

      to_plot <- dplyr::filter(to_plot, !is.na(value)) %>%
      dplyr::distinct(strain, value, POS, .keep_all = T) %>%
      dplyr::filter(!is.na(GT), FILTER == "PASS", FT == "PASS") %>%
      dplyr::group_by(GT) %>%
      dplyr::mutate(GT2 = ifelse(use_base, ifelse(GT == "REF", REF, ALT), GT )) %>%
      dplyr::ungroup() %>%
      dplyr::mutate(GT = GT2)

      # Handle Ref/Alt Diffs



    if (!unique(is.na(color_strains))) {
      to_plot <- to_plot %>%
        dplyr::mutate(colors = ifelse(strain %in% color_strains, strain, "aa_ignore"))%>%
        dplyr::arrange(colors)
    } else {
      to_plot <- to_plot %>%
        dplyr::mutate(colors = "aa_ignore")
    }

    to_plot %>%
      ggplot2::ggplot(., ggplot2::aes(x = GT, y = value, fill = GT)) +
      ggplot2::scale_fill_brewer(palette = "Set1") +
      ggplot2::geom_boxplot(outlier.colour = NA) +
      ggplot2::theme_bw() +
      ggplot2::geom_jitter(ggplot2::aes(color = colors,
                                        size = colors)) +
      ggplot2::scale_color_manual(values = c("black", "#2474FF", "#EE8F03", "red", "green"),
                                  labels = c("Other",
                                             unique(to_plot$colors)[2],
                                             unique(to_plot$colors)[3],
                                             unique(to_plot$colors)[4],
                                             unique(to_plot$colors)[5])) +
      ggplot2::scale_size_manual(values = c(2, 12, 12, 12, 12),
                                 labels = c("Other",
                                            unique(to_plot$colors)[2],
                                            unique(to_plot$colors)[3],
                                            unique(to_plot$colors)[4],
                                            unique(to_plot$colors)[5])) +
      # ggplot2::scale_color_brewer(palette = "Dark2", name = "Specified\nStrains") +
      ggplot2::facet_grid(.~chr_pos, scales = "free") +
      ggplot2::theme(axis.text.x = ggplot2::element_text(size=24, face="bold", color="black"),
                     axis.text.y = ggplot2::element_text(size=24, face="bold", color="black"),
                     axis.title.x = ggplot2::element_text(size=24, face="bold", color="black", vjust=-.3),
                     axis.title.y = ggplot2::element_text(size=24, face="bold", color="black"),
                     strip.text.x = ggplot2::element_text(size=24, face="bold", color="black"),
                     strip.text.y = ggplot2::element_text(size=16, face="bold", color="black"),
                     plot.title = ggplot2::element_text(size=24, face="bold", vjust = 1),
                     # legend.position="none",
                     panel.background = ggplot2::element_rect( color="black",size=1.2),
                     strip.background = ggplot2::element_rect(color = "black", size = 1.2)) +
      ggplot2::labs(y = "Phenotype", x = "Genotype", title = x)
  })
  plots
}


#' Plot variants for gene
#'
#' \code{gene_variants} generates a plot to visualize presence of variants for a particular gene of interest
#'
#'
#' @param gene is the gene of interest in gene name (e.g. "top-2", "pot-2") or wormbase gene ID (e.g. "WBGene00010785") format.
#' @return Ouput is a list of ggplot objects with strains on the Y axis and variants for gene of interest on X axis. Tiles are colored by variant or reference call.
#' @examples  gene_variants(gene = c("top-2","pot-2"))
#' @examples  test[[1]]
#' @examples test[[2]]
#' @export

gene_variants <- function(gene){

  gene_variant_plot <- list()

  for(i in 1:length(gene)){

    gene_info <- snpeff(gene[i])

    gene_to_plot <- gene_info %>%
      dplyr::arrange(POS, GT) %>%
      dplyr::mutate(fac_aa = factor(aa_change,
                                    ordered = is.ordered(aa_change),
                                    levels = aa_change,
                                    labels = aa_change)) %>%
      dplyr::filter(!is.na(GT), GT != "HET") %>%
      tidyr::complete(CHROM, POS, strain) %>%
      tidyr::fill(fac_aa) %>%
      dplyr::mutate(GT = ifelse(is.na(GT), "ZMISSING", GT))


    gene_variant_plot[[i]] <- ggplot2::ggplot(gene_to_plot) +
      ggplot2::aes(x = fac_aa,
                   y = factor(strain,
                              ordered = is.ordered(GT),
                              levels = strain,
                              labels = strain),
                   fill = GT) +
      ggplot2::scale_fill_manual(values = c("#F97848","#FFFFB2","white"), breaks=c("REF", "ALT")) +
      ggplot2::geom_tile(color = "black") +
      ggplot2::theme_bw() +
      ggplot2::theme(axis.text.x = ggplot2::element_text(size=12, face="bold", color="black", angle = 60, vjust= 1.2, hjust = 1.2),
                     axis.text.y = ggplot2::element_text(size=12, face="bold", color="black"),
                     axis.title.x = ggplot2::element_text(size=18, face="bold", color="black", vjust=-.3),
                     axis.title.y = ggplot2::element_text(size=18, face="bold", color="black"),
                     strip.text.x = ggplot2::element_text(size=14, face="bold", color="black"),
                     strip.text.y = ggplot2::element_text(size=14, face="bold", color="black"),
                     plot.title = ggplot2::element_text(size=24, face="bold", vjust = 1),
                     panel.background = ggplot2::element_blank(),
                     strip.background = ggplot2::element_rect(color = "black", size = 1.2)) +
      ggplot2::labs(y = "Strain", x = "Variant") +
      ggplot2::scale_x_discrete(expand = c(0,0))
  }

  return(gene_variant_plot)
}

#' Plot LD values for significant SNPs in a mapping
#'
#' \code{plot_peak_ld} generates a plot to visualize LD (r) for significant SNPs in a mapping
#'
#'
#' @param plot_df is the output from the process_mappings function.
#' @param trait is a string object corresponding to a trait of interest if plot_df has multiple traits in it.
#' @return returns an LDHeatmap object of SNPs that are above the Bonferroni corrected p-value
#' @examples  plot_peak_ld(processed_mapping_df) # for a one trait mapping data frame
#' @examples plot_peak_ld(plot_df = all_maps, trait = "amsacrine_f.l1") # for a multiple trait mapping data frame
#' @export

plot_peak_ld <- function(plot_df, trait = NULL){

  if (is.null(trait)) {
    snp_df <- plot_df %>% na.omit()
  }
  else {
    snp_df <- dplyr::filter(plot_df, trait == trait) %>%
      na.omit()
  }
  ld_snps <- dplyr::filter(snps, CHROM %in% snp_df$CHROM, POS %in%
                             snp_df$POS)
  ld_snps <- data.frame(snp_id = paste(ld_snps$CHROM, ld_snps$POS,
                                       sep = "_"), data.frame(ld_snps)[, 5:ncol(ld_snps)])
  sn <- list()
  for (i in 1:nrow(ld_snps)) {
    sn[[i]] <- genetics::genotype(as.character(gsub(1, "T/T",
                                                    gsub(-1, "A/A", ld_snps[i, 4:ncol(ld_snps)]))))
  }
  test <- data.frame(sn)
  colnames(test) <- (ld_snps$snp_id)
  if (ncol(test) == 1) {
    print("Only one significant SNP, not calculating LD")
  }
  else {

    ldcalc <- t(genetics::LD(test)[[3]])
    diag(ldcalc) <- 1


    LDs <- tbl_df(data.frame(ldcalc) %>%
          dplyr::add_rownames(var = "SNP1")) %>%
          tidyr::gather(SNP2, Dprime, -SNP1) %>%
          dplyr::arrange(SNP1) %>%
          tidyr::separate(SNP1, sep = "_", into = c("CHROM1", "POS1"), remove = F) %>%
          dplyr::arrange(CHROM1, as.numeric(POS1))

    ldplot <- ggplot2::ggplot(LDs) +
      ggplot2::aes(x = factor(SNP1, levels = unique(SNP1), ordered = T),
                   y = factor(SNP2, levels = unique(SNP1), ordered = T)) +
      ggplot2::geom_tile(ggplot2::aes(fill = Dprime)) +
      ggplot2::geom_text(ggplot2::aes(label = signif(Dprime,3)), fontface = "bold", size = 12)+
      ggplot2::theme(axis.text.x = ggplot2::element_text(size=16, face="bold", color="black"),
                     axis.text.y = ggplot2::element_text(size=16, face="bold", color="black"),
                     axis.title.x = ggplot2::element_text(size=0, face="bold", color="black", vjust=-.3),
                     axis.title.y = ggplot2::element_text(size=0, face="bold", color="black"),
                     legend.position="none") +
      scale_x_discrete(labels = function(x) { gsub("_", ":", x)}, expand = c(0,0)) +
      scale_y_discrete(position = "right", labels = function(x) { gsub("_", ":", x)}, expand = c(0,0)) +
      scale_fill_continuous(high = "#FF0000", low = "white", na.value = "white")
    #     rgb.palette <- grDevices::colorRampPalette(rev(c("blue",
    #                                                      "orange", "red")), space = "rgb")
    #     ld_outs <- LDheatmap::LDheatmap(test, LDmeasure = "r",
    #                                     SNP.name = colnames(test), color = rgb.palette(18))
    #     LD.grob1 <- grid::editGrob(ld_outs$LDheatmapGrob, gPath("heatMap",
    #                                                             "title"), gp = gpar(cex = 1.25, col = "black"))
    #     LD.grob2 <- grid::editGrob(LD.grob1, gPath("geneMap",
    #                                                "title"), gp = gpar(cex = 0, col = "orange"))
    #     LD.grob3 <- grid::editGrob(LD.grob2, gPath("Key", "title"),
    #                                gp = gpar(cex = 1.25, col = "black"))
    #     grid::grid.newpage()
    #     grid::grid.draw(LD.grob3)
    return(ldplot)
  }
}

na_to_1 <- function(x) { ifelse(is.na(x), 1, x)}

#' QQ-plot implemented in ggplot2
#'
#' \code{qq_plot} generates a QQ plot given a vector of log10(p) values
#'
#'
#' @param log10p vector of log10(p) values
#' @return returns a ggplot2 object
#' @export

qq_plot <- function(log10p){
  # following four lines from base R's qqline()
  ps <- 10^(-log10p)
  y <- -log10(sort(ps, decreasing = F))
  x <- -log10(ppoints(length(y)))

  d <- data.frame(Observed = y,
                  Expected = x)

  qqpl <- ggplot2::ggplot(d, ggplot2::aes(x = Expected, y = Observed)) +
    ggplot2::geom_point() +
    ggplot2::geom_abline(slope = 1, intercept = 0, color = "red")+
    ggplot2::theme_bw()+
    ggplot2::theme(axis.text.x = ggplot2::element_text(size = 24,face = "bold", color = "black"),
                   axis.text.y = ggplot2::element_text(size = 24, face = "bold", color = "black"),
                   axis.title.x = ggplot2::element_text(size = 24, face = "bold", color = "black", vjust = -0.3),
                   axis.title.y = ggplot2::element_text(size = 24,face = "bold", color = "black"),
                   strip.text.x = ggplot2::element_text(size = 24,face = "bold", color = "black"),
                   strip.text.y = ggplot2::element_text(size = 16, face = "bold", color = "black"),
                   plot.title = ggplot2::element_text(size = 24, face = "bold", vjust = 1),
                   legend.position = "none",
                   panel.background = ggplot2::element_rect(color = "black", size = 1.2),
                   strip.background = ggplot2::element_rect(color = "black", size = 1.2)) +
    labs(x = "Theoretical", y = "Observed")

  return(qqpl)
}


#' Fine mapping plot of GWAS confidence interval
#'
#' \code{fine_map_plot} Fine mapping plot of GWAS confidence interval
#'
#'
#' @param df is a dataframe that is output by \code{process_correlations} function
#' @param plot_trait is a character vector corresponding to the trait of interest.
#' @param strain_comparison is a character vector corresponding to which strain genotype to use to color points by. Default is CB4856, which will color all variants that CB4856 contains a different color
#' @param color1 is a character vector corresponding to the color of the ALT genotype points. Default is blue
#' @param color2 is a character vector corresponding to the color of the REF genotype points. Default is orange
#' @return returns a ggplot2 object
#' @export

fine_map_plot <- function(df, plot_trait, strain_comparison = "CB4856", color1 = 'blue', color2 = 'orange'){

  fine_plot <- df %>%
    dplyr::filter(strain == strain_comparison,
                  trait == plot_trait)%>%
    ggplot2::ggplot(.)+
    ggplot2::aes(x = POS/1e6, y = -log10(abs_spearman_cor), fill = GT)+
    ggplot2::geom_point(shape=21, size =1, alpha = .7)+
    ggplot2::scale_fill_manual(values = c("ALT" = color1, "REF" = color2))+
    ggplot2::scale_color_manual(values = c("ALT" = color1, "REF" = color2))+
    facet_grid(.~CHROM,scales="free")+
    ggplot2::theme_bw() +
    ggplot2::theme(axis.text.x = ggplot2::element_text(size = 10, face = "bold", color = "black"),
                   axis.text.y = ggplot2::element_text(size = 10, face = "bold", color = "black"),
                   axis.title.x = ggplot2::element_text(size = 10, face = "bold", color = "black", vjust = -0.3),
                   axis.title.y = ggplot2::element_text(size = 8, face = "bold", color = "black"),
                   strip.text.x = ggplot2::element_text(size = 10, face = "bold", color = "black"),
                   strip.text.y = ggplot2::element_text(size = 10,  face = "bold", color = "black"),
                   plot.title = ggplot2::element_text(size = 0,  face = "bold", vjust = 1),
                   panel.background = ggplot2::element_rect(color = "black",  size = 1.2),
                   strip.background = ggplot2::element_rect(color = "black", size = 1.2),
                   legend.position = 'none')+
    labs(x = "Genomic Position (Mb)", y = expression(bold(-log[10](bolditalic(p)))))

  return(fine_plot)
}
AndersenLab/cegwas documentation built on March 6, 2020, 1:10 a.m.