R/micro_rocky_mtn.R

Defines functions micro_rocky_mtn phy_fun

Documented in micro_rocky_mtn

#' @title Create Rocky Mountain plots from negative binomial taxa models
#' @name micro_rocky_mtn
#' @description Display the magnitude of log p-values for each of the taxa in \code{nb_mods} as vertical bars next to each other along the x-axis. The direction of the bars will be determined by the direction of the estimated relationship. The taxa will be color coded by the phylum they belong to, and taxa that have FRD adjusted p-values below your desired significance cutoff for the specified covariate will be labeled
#' @param modsum The output from nb_mods
#' @param ... The covariate you'd like to plot. Must be in the models created by nb_mods
#' @param main Plot title
#' @param ylab y-axis labels
#' @param subtitle Plot subtitle
#' @param pval_lines Logical; include horizonal dashed lines at corresponding p-values
#' @param pval_text Logical; label the y-axis with corresponding p-values
#' @param sig_text Logical; label the taxa with p-values below specified alpha
#' @param facet_labels Labels for different facets if covariate has more than 1 beta coefficient
#' @param alpha Significance cutoff
#' @param lwd Line width for pval_lines
#' @param lty Line type for pval_lines
#' @param seed Seed for random placement of significant taxa labels
#' @return A ggplot you can add geoms to if you'd like
#' @author Charlie Carpenter, Rachel Johnson, Dan Frank
#' @examples
#' data(phy); data(cla); data(ord); data(fam); data(met)
#'
#' otu_tabs = list(Phylum = phy, Class = cla, Order = ord, Family = fam)
#' set <- tidy_micro(otu_tabs = otu_tabs, meta = met) \%>\%
#' filter(day == 7) ## Only including the first week
#'
#' ## Creating negative binomial models on filtered tidy_micro set
#' nb_fam <- set \%>\%
#' otu_filter(ra_cutoff = 0.1, exclude_taxa = c("Unclassified", "Bacteria")) \%>\%
#' nb_mods(table = "Family", bpd1)
#'
#' nb_fam \%>\% micro_rocky_mtn(bpd1)
#' @export
micro_rocky_mtn <- function(modsum, ..., main = NULL, xlab = NULL,
                         ylab = expression("log"[10]*" p-value"), subtitle = NULL,
                         pval_lines = TRUE, pval_text = TRUE, sig_text = TRUE,
                         facet_labels = NULL, alpha = 0.05, lwd = 2, lty = 1,
                         seed = NULL){

  if(missing(...)) stop("NB_RockMtn requires a model coefficient")

  CC <- modsum$Convergent_Summary %>%
    dplyr::filter(Coef != "(Intercept)", stringr::str_detect(Coef, cov_str(...))) %>%
    dplyr::mutate(FDR_Pval = ifelse(FDR_Pval == 0.0000, 0.0005, FDR_Pval),
           phyl = sapply(Taxa, phy_fun)) %>%
    phyl_ord()

  CC %<>%
    dplyr::mutate(Coef = factor(Coef, levels = unique(Coef)),
           rnum = 1:nrow(CC),
           ys = rep(0,nrow(CC)),
           ye = ifelse(Beta < 0, log(FDR_Pval, base = 10), -log(FDR_Pval, base = 10)))

  pline <- c(log(0.01, base = 10), log(0.05, base = 10), log(0.10, base = 10),
             -log(0.01, base = 10), -log(0.05, base = 10), -log(0.10, base = 10))

  gg <- ggplot(CC, aes(x = rnum, y = ye)) +
    ggplot2::geom_segment(aes(x = rnum, xend = rnum, y = ys, yend = ye, colour = phyl), size = lwd) +
    ggplot2::theme_bw() +
    ggplot2::labs(colour = "Phylum", y = ylab, x = xlab, title = main, subtitle = subtitle) +
    ggplot2::theme(axis.text.x=element_blank(),
          axis.ticks.x=element_blank())

  if(length(unique(CC$Coef)) > 1L){
    gg <- gg + ggplot2::facet_wrap( ~ Coef, labeller = ggplot2::labeller(Coef = facet_labels))
  }

  if(pval_lines){
    gg <- gg + ggplot2::geom_hline(yintercept = pline, linetype = "dotdash")
  }

  if(pval_text){
    gg <- gg +
      ggplot2::scale_y_continuous(breaks = pline,
                                  labels = c("0.01","0.05","0.10", "0.01","0.05","0.10"))
  } else{
    gg <- gg +
      ggplot2::theme(axis.text.y=element_blank(),
                     axis.ticks.y=element_blank())
  }

  if(sig_text){
   gg <- gg + ggrepel::geom_label_repel(data = CC %>% dplyr::filter(FDR_Pval < alpha),
     aes(label = Taxa), show.legend = F, seed = seed)
  }

  gg +
    ggplot2::geom_hline(yintercept = 0)
}

## Function to match taxa with their full phylum name for better legend
phy_fun <- function(x){
  phy <- c("Acidobacteria","Actinobacteria","Aquificae","Armatimonadetes","Bacteroidetes",
           "Caldiserica", "Chlamydiae", "Chlorobi","Chloroflexi","Chrysiogenetes",
           "Cyanobacteria","Deferribacteres","Deinococcus-Thermus","Dictyoglomi",
           "Elusimicrobia","Fibrobacteres","Firmicutes","Fusobacteria","Gemmatimonadetes",
           "Lentisphaerae","Nitrospirae","Planctomycetes","Proteobacteria","Spirochaetes",
           "Synergistetes","Tenericutes","Thermodesulfobacteria","Thermomicrobia",
           "Thermotogae","Verrucomicrobia")

  phyl <- ifelse(sum(stringr::str_detect(phy, str_sub(x, 1L, 5L))) == 1,
    phy[stringr::str_detect(phy, str_sub(x, 1L, 7L))],
    as.character(x)
  )
  phyl
}
CharlieCarpenter/tidy.micro documentation built on Jan. 19, 2020, 6:28 p.m.