tidy.micro: A Pipeline For Microbiome Analysis And Visualization

Documented in nb_bars

#' @title Create stacked bar charts based on negative binomial model estimates
#' @name nb_bars
#' @description nb_bars takes the output from nb_mods and creates stacked bar charts of the estimated relative abundance for each taxa. The benefit of modeling each taxa before created stacked bar charts is the ability to control for potential confounders. The function will facet wrap interaction terms. Currently, only quant_style "discrete" can be used for an interaction between two quantitative variables
#' @param modsum The output from nb_mods
#' @param ... The covariate you'd like to plot. Can be an interaction term or main effect, but must be in the models created by nb_mods
#' @param range The range you'd like to plot over for a quantitative variable. Will default to the IQR
#' @param quant_style "continuous" will plot over the entire range specified; "discrete" will plot only the endpoints of the range specified. "continuous" by default. This option is ignored without a quantitative variable
#' @param top_taxa Only plot X taxa with the highest relative abundance. The rest will be aggregated into an "Other" category
#' @param RA Only plot taxa with a relative abundance higher than X. The rest will be aggregated into an "Other" category
#' @param specific_taxa Plot this specific taxa even if it doesn't meet the top_taxa or RA requirements
#' @param lines Logical; Add outlines around the different taxa colors in the stacked bar charts
#' @param xaxis Labels for the x-axis ticks. Most useful for categorical variables and defaults to the levels
#' @param main Plot title
#' @param subtitle Subtitle for the plot
#' @param xlab x-axis label
#' @param ylab y-axis label
#' @param facet_labels Labels for the facets created for interaction terms
#' @param facet_layout Rearrange the facets created for interaction terms
#' @return Returns a ggplot that you can add geoms to if you'd like
#' @examples
#' data(phy); data(cla); data(ord); data(fam); data(met)
#' otu_tabs = list(Phylum = phy, Class = cla, Order = ord, Family = fam)
#' set <- tidy_micro(otu_tabs = otu_tabs, meta = met) %>%
#' filter(day == 7) ## Only including the first week
#'
#' ## Creating negative binomial models on filtered tidy_micro set
#' nb_fam <- set %>%
#' mutate(bpd1 = factor(bpd1)) %>%  ## making bpd1 a factor
#' otu_filter(ra_cutoff = 0.1, exclude_taxa = c("Unclassified", "Bacteria")) %>%
#' nb_mods(table = "Family", bpd1)
#' nb_fam %>%
#' nb_bars(bpd1, top_taxa = 9, xlab = "BPD Severity")
#' @export
nb_bars <- function(modsum, ..., range, quant_style = c("continuous", "discrete"),
                    top_taxa = 0, RA = 0, specific_taxa, lines = TRUE,
                    xaxis, main, subtitle, xlab, ylab, facet_labels, facet_layout){

  if(!is.double(top_taxa)) stop("top_taxa must be an integer.")
  if(RA < 0 | RA > 100) stop("RA must be between 0 and 100.")
  if(top_taxa > 0 & RA > 0) stop("Can not plot when both top_taxa and RA are greater than 0.")
  if(top_taxa > length(unique(modsum$Convergent_Summary$Taxa))){
    stop("top_taxa must be equal to or less than the number of convergent taxa.")
  }

  cc <- nb_type(modsum, ...)

  if(length(cc) == 0) stop("Variable specified for bar charts is not in original model.")

  ## Making leaving labels blank optional
  if(missing(main)) main <- NULL ; if(missing(xlab)) xlab <- NULL
  if(missing(specific_taxa)) specific_taxa <- NULL ; if(missing(subtitle)) subtitle <- NULL
  if(missing(ylab)) ylab <- "Relative Abundance (%)" ; if(missing(facet_labels)) facet_labels <- NULL
  if(missing(quant_style)) quant_style <- "continuous"
  if(missing(facet_layout)) facet_layout <- 1
  if(facet_layout %nin% c(1,2)) stop("facet_layout must be either 1 or 2")

  if(cc == "categ") {
    if(missing(xaxis)) xaxis <- modsum$Model_Covs[,cov_str(...)] %>% levels

    nb_bars_categ(modsum, ..., top_taxa = top_taxa, lines = lines,
                  RA = RA, specific_taxa = specific_taxa, main = main,
                  xaxis = xaxis, xlab = xlab, ylab = ylab, subtitle = subtitle)
  } else if(cc == "quant"){

    ## Setting range to 1st and 3rd quartile if not specified
    if(missing(range)) {
      cat("Range not specified and will be set to the 1st and 3rd quartile.\n")
      range <- modsum$Model_Covs[,cov_str(...)] %>%
        quantile(probs = c(0.25, 0.75), na.rm = TRUE)
    }
    if(!is.numeric(range) | length(range) != 2) stop("range must be a numeric vector of length 2")
    if(missing(xaxis)) xaxis <- as.character(range)

    nb_bars_quant(modsum, ..., quant_style = quant_style, range = range, top_taxa = top_taxa,
                  lines = lines, RA = RA, specific_taxa = specific_taxa, main = main,
                  xaxis = xaxis, xlab = xlab, ylab = ylab, subtitle = subtitle)
  } else if(cc == "c*c.int"){
    if(missing(xaxis)) xaxis <- modsum$Model_Covs[,cov_str(...)[1]] %>% levels

    nb_bars_ccint(modsum, ..., top_taxa = top_taxa,
                   RA = RA, specific_taxa = specific_taxa, main = main, lines = lines,
                   xaxis = xaxis, xlab = xlab, ylab = ylab, subtitle = subtitle,
                  facet_labels = facet_labels, facet_layout = facet_layout)

  } else if(cc == "q*c.int"){

    ## Setting range to 1st and 3rd quartile if not specified
    if(missing(range)) {
      cat("Range not specified and will be set to the 1st and 3rd quartile.\n")
      range <- modsum$Model_Covs[,cov_str(...)] %>%
        dplyr::select_if(is.numeric) %>% ## Pull the numeric var
        purrr::simplify() %>% ## Simplify down into a vector
        quantile(probs = c(0.25, 0.75), na.rm = TRUE)
    }
    if(!is.numeric(range) | length(range) != 2) stop("range must be a numeric vector of length 2")
    if(missing(xaxis)) xaxis <- as.character(range)

    nb_bars_qcint(modsum, ..., range = range, quant_style = quant_style, top_taxa = top_taxa, lines = lines,
                  RA = RA, specific_taxa = specific_taxa, main = main,
                  xaxis = xaxis, xlab = xlab, ylab = ylab, subtitle = subtitle,
                  facet_labels = facet_labels, facet_layout = facet_layout)
  } else if(cc == "q*q.int"){

    if(missing(range)){
      cat("Ranges not specified and will be set to the 1st and 3rd quartile.\n")

      r1 <- modsum$Model_Covs[,cov_str(...)] %>%
        dplyr::select(1) %>% ## Pull the numeric var
        purrr::simplify() %>% ## Simplify down into a vector
        quantile(probs = c(0.25, 0.75), na.rm = TRUE)

      r2 <- modsum$Model_Covs[,cov_str(...)] %>%
        dplyr::select(2) %>% ## Pull the numeric var
        purrr::simplify() %>% ## Simplify down into a vector
        quantile(probs = c(0.25, 0.75), na.rm = TRUE)

      range <- rbind(r1,r2)
    }

    if(all(dim(range) != 2) | !is.matrix(range)) stop("range must be a 2x2 matrix")

    if(missing(xaxis)){ ## xaxis labels when missing
      if(facet_layout == 1) xaxis <- as.character(round(range[1,], 2))
      if(facet_layout == 2) xaxis <- as.character(round(range[2,], 2))
    }

    if(is.null(facet_labels)){ ## facet labels when missing
      if(facet_layout == 1) facet_labels <- as.character(round(range[2,], 2))
      if(facet_layout == 2) facet_labels <- as.character(round(range[1,], 2))
    }
    names(facet_labels) <- c("Low","High")

    nb_bars_qqint(modsum, ..., range = range, top_taxa = top_taxa, lines = lines,
                  RA = RA, specific_taxa = specific_taxa, main = main,
                  xaxis = xaxis, xlab = xlab, ylab = ylab, subtitle = subtitle,
                  facet_labels = facet_labels, facet_layout = facet_layout)
  }
}

nb_bars_categ <- function(modsum, ..., top_taxa, RA, specific_taxa,
                          xaxis, xlab, ylab, main, subtitle, lines){

  if("quant" %in% modsum$Model_Coef$Cov_Type){
    modsum$Model_Coef <- plyr::ddply(modsum$Model_Coef,
                                     plyr::.(Taxa), quant_cont, modsum$Model_Covs,...)
  }

  msum <- modsum$Model_Coef %>%
    dplyr::filter(Cov_Type == "Intercept" | stringr::str_detect(Coef, cov_str(...))) %>%
    plyr::ddply(plyr::.(Taxa), categ_est) %>%
    Taxa_ord() %>%
    dplyr::mutate(Coef = factor(Coef, levels = unique(Coef)))

  ord <- msum %>%
    dplyr::group_by(Taxa) %>%
    dplyr::summarize(mean.nb = mean(nb.avg)) %>%
    dplyr::arrange(desc(mean.nb))

  if(top_taxa > 0){
    if("Other" %in% ord$Taxa[1:top_taxa]) top_taxa <- top_taxa + 1

    msum$ord <- ifelse(msum$Taxa %in% unique(ord$Taxa)[1:top_taxa] & msum$Taxa != "Other", "Top", "Other")
    msum$ord <- ifelse(msum$Taxa %in% specific_taxa, "Top", msum$ord)

    agg_bars(msum,...) %>%
      categ_bars(xaxis=xaxis, main=main, xlab=xlab, ylab=ylab, subtitle=subtitle, lines=lines)
  } else if(RA > 0 & RA < 100){
    msum$ord <- ifelse(msum$Taxa == "Other", "Other",
                       ifelse(msum$nb.avg >= RA, "Top", "Other"))
    msum$ord <- ifelse(msum$Taxa %in% specific_taxa, "Top", msum$ord)

    agg_bars(msum,...) %>%
      categ_bars(xaxis=xaxis, main=main, xlab=xlab, ylab=ylab, subtitle=subtitle, lines = lines)
  } else {
    msum %>%
      categ_bars(xaxis=xaxis, main=main, xlab=xlab, ylab=ylab, subtitle=subtitle, lines = lines)
  }
}

nb_bars_quant <- function(modsum, ..., range, quant_style, top_taxa, RA,
                          specific_taxa, main, xaxis, xlab, ylab, subtitle, lines){

  if("quant" %in% modsum$Model_Coef$Cov_Type){
    modsum$Model_Coef <- plyr::ddply(modsum$Model_Coef,
                                     plyr::.(Taxa), quant_cont, modsum$Model_Covs,...)
  }

  if(quant_style == "continuous"){
    RR <- seq(range[1], range[2], length.out = 10)

    msum <- modsum$Model_Coef %>%
      dplyr::filter(stringr::str_detect(Coef, cov_str(...)))

    ## Estimates for taxa along RR range
    Est <- exp(tcrossprod(msum$Estimate, RR) + msum$Intercept + log(100))
    colnames(Est) <- RR

    msum %<>%
      cbind(Est) %>%
      dplyr::select(Taxa, 6:15) %>%
      tidyr::gather(Quant, Value, -Taxa) %>%
      Taxa_ord()

    ord <- msum %>%
      dplyr::group_by(Taxa) %>%
      dplyr::summarise(nb.avg = mean(Value)) %>%
      dplyr::arrange(dplyr::desc(nb.avg))
  }

  if(quant_style == "discrete"){
    msum <- modsum$Model_Coef %>%
      dplyr::filter(stringr::str_detect(Coef, cov_str(...))) %>%
      dplyr::mutate(nb.avg = exp(Intercept + Estimate*mean(range) + log(100)),
             nb.low = exp(Intercept + Estimate*range[1] + log(100)),
             nb.up = exp(Intercept + Estimate*range[2] + log(100))
      )

  ord <- msum %>%
    dplyr::arrange(dplyr::desc(nb.avg))
  }

  if(top_taxa > 0){
    if("Other" %in% ord$Taxa[1:top_taxa]) top_taxa <- top_taxa + 1

    msum$ord <- ifelse(msum$Taxa %in% unique(ord$Taxa)[1:top_taxa] & msum$Taxa != "Other", "Top", "Other")
    msum$ord <- ifelse(msum$Taxa %in% specific_taxa, "Top", msum$ord)

    agg_bars(msum,...) %>%
      quant_bars(range = range, quant_style = quant_style, xaxis = xaxis, main = main,
                 xlab = xlab, ylab = ylab, subtitle=subtitle, lines = lines)
  } else if(RA > 0 & RA < 100){
    msum$ord <- ifelse(msum$Taxa == "Other", "Other",
                       ifelse(msum$nb.avg >= RA, "Top", "Other"))
    msum$ord <- ifelse(msum$Taxa %in% specific_taxa, "Top", msum$ord)

    agg_bars(msum,...) %>%
      quant_bars(range = range, quant_style = quant_style, xaxis = xaxis, main = main,
                 xlab = xlab, ylab = ylab, subtitle=subtitle, lines = lines)
  } else
    msum %>%
    quant_bars(range = range, quant_style = quant_style, xaxis = xaxis, main = main,
               xlab = xlab, ylab = ylab, subtitle=subtitle, lines = lines)

}

nb_bars_ccint <- function(modsum, ..., top_taxa, RA, specific_taxa, xaxis, lines,
                          xlab, ylab, main, subtitle, facet_labels, facet_layout){

  if("quant" %in% modsum$Model_Coef$Cov_Type){
    modsum$Model_Coef <- plyr::ddply(modsum$Model_Coef,
                                     plyr::.(Taxa), quant_cont, modsum$Model_Covs, ...)
  }

  l1 <- levels(modsum$Model_Covs[, cov_str(...)[1]]) ; l2 <- levels(modsum$Model_Covs[, cov_str(...)[2]])
  if( sum((l1%in%l2)) > 0) stop("Factor variables can not share levels.")


  msum <- suppressWarnings(
    modsum$Model_Coef %>%
      dplyr::filter(Cov_Type == "Intercept" |
             stringr::str_detect(Coef, cov_str(...)[1]) |
             stringr::str_detect(Coef, cov_str(...)[2]) ) %>%
      Taxa_ord() %>%
      plyr::ddply(plyr::.(Taxa), ccint_est, l1, l2, ...)
  )

  ord <- msum %>%
    dplyr::group_by(Taxa) %>%
    dplyr::summarise(nb.avg = mean(nb.avg)) %>%
    dplyr::arrange(desc(nb.avg))

  if(top_taxa > 0){
    if("Other" %in% ord$Taxa[1:top_taxa]) top_taxa <- top_taxa + 1

    msum$ord <- ifelse(msum$Taxa %in% unique(ord$Taxa)[1:top_taxa] & msum$Taxa != "Other", "Top", "Other")
    msum$ord <- ifelse(msum$Taxa %in% specific_taxa, "Top", msum$ord)

    agg_bars(msum, ...) %>%
      ccint_bars(xaxis = xaxis, main = main, xlab = xlab, ylab = ylab, subtitle=subtitle,
                 lines = lines, facet_labels = facet_labels, facet_layout = facet_layout, ...)
  } else if(RA > 0 & RA < 100){
    msum$ord <- ifelse(msum$Taxa == "Other", "Other",
                       ifelse(msum$nb.avg >= RA, "Top", "Other"))
    msum$ord <- ifelse(msum$Taxa %in% specific_taxa, "Top", msum$ord)

    agg_bars(msum, ...) %>%
      ccint_bars(xaxis = xaxis, main = main, xlab = xlab, ylab = ylab, subtitle=subtitle,
                 lines = lines, facet_labels = facet_labels, facet_layout = facet_layout, ...)
  } else
    msum %>%
    ccint_bars(xaxis = xaxis, main = main, xlab = xlab, ylab = ylab, subtitle=subtitle,
               lines = lines, facet_labels = facet_labels, facet_layout = facet_layout, ...)
}

nb_bars_qcint <- function(modsum, ..., range, quant_style, top_taxa, RA, specific_taxa, xaxis,
                          xlab, ylab, main, subtitle, facet_labels, facet_layout, lines){

  if("quant" %in% modsum$Model_Coef$Cov_Type){
    modsum$Model_Coef <- plyr::ddply(modsum$Model_Coef,
                                     plyr::.(Taxa), quant_cont, modsum$Model_Covs, ...)
  }

  msum <- modsum$Model_Coef %>%
    dplyr::filter(Coef == "(Intercept)" |
             stringr::str_detect(Coef, cov_str(...)[1]) |
             stringr::str_detect(Coef, cov_str(...)[2])) %>%
    Taxa_ord() %>%
    plyr::ddply(plyr::.(Taxa), qcint_est, modsum$Model_Covs, quant_style, range, ...)

  if(quant_style == "continuous"){
    ord <- msum %>%
      dplyr::group_by(Taxa) %>%
      dplyr::summarise(nb.avg = mean(Value)) %>%
      dplyr::arrange(desc(nb.avg))
  }

  if(quant_style == "discrete"){
    ord <- msum %>%
      dplyr::mutate(nb.avg = tapply(Est, Taxa, mean) %>%
               rep(each = nrow(msum)/length(unique(msum$Taxa)))) %>%
      dplyr::arrange(desc(nb.avg))
  }

  if(top_taxa > 0){
    if("Other" %in% ord$Taxa[1:top_taxa]) top_taxa <- top_taxa + 1

    msum$ord <- ifelse(msum$Taxa %in% unique(ord$Taxa)[1:top_taxa] & msum$Taxa != "Other", "Top", "Other")
    msum$ord <- ifelse(msum$Taxa %in% specific_taxa, "Top", msum$ord)

    agg_bars(msum, ...) %>%
      qcint_bars(m_cov = modsum$Model_Covs,
                 quant_style = quant_style, xaxis = xaxis, main = main, xlab = xlab,
                 ylab = ylab, subtitle = subtitle, lines = lines,
                 facet_labels = facet_labels, facet_layout = facet_layout, ...)
  } else if(RA > 0 & RA < 100){
    msum$ord <- ifelse(msum$Taxa == "Other", "Other",
                       ifelse(msum$nb.avg >= RA, "Top", "Other"))
    msum$ord <- ifelse(msum$Taxa %in% specific_taxa, "Top", msum$ord)

    agg_bars(msum, ...) %>%
      qcint_bars(m_cov = modsum$Model_Covs,
                 quant_style = quant_style, xaxis = xaxis, main = main, xlab = xlab,
                 ylab = ylab, subtitle = subtitle, lines = lines,
                 facet_labels = facet_labels, facet_layout = facet_layout, !!!quos(...))
    } else
    msum %>%
    qcint_bars(m_cov = modsum$Model_Covs,
               quant_style = quant_style, xaxis = xaxis, main = main, xlab = xlab,
               ylab = ylab, subtitle = subtitle, lines = lines,
               facet_labels = facet_labels, facet_layout = facet_layout, !!!quos(...))
}

nb_bars_qqint <- function(modsum, range, ..., top_taxa, RA, specific_taxa, xaxis, lines,
                          xlab, ylab, main, subtitle, facet_labels, facet_layout){

  if("quant" %in% modsum$Model_Coef$Cov_Type){
    modsum$Model_Coef <- plyr::ddply(modsum$Model_Coef,
                                     plyr::.(Taxa), quant_cont, modsum$Model_Covs,...)
  }

  msum <- modsum$Model_Coef %>%
    dplyr::filter(Coef == "(Intercept)" |
             stringr::str_detect(Coef, cov_str(...)[1]) |
             stringr::str_detect(Coef, cov_str(...)[2])) %>%
    Taxa_ord() %>%
    plyr::ddply(plyr::.(Taxa), qqint_est, range, ...)


  ord <- msum %>%
    dplyr::arrange(dplyr::desc(nb.avg))

  if(top_taxa > 0){
    if("Other" %in% ord$Taxa[1:top_taxa]) top_taxa <- top_taxa + 1

    msum$ord <- ifelse(msum$Taxa %in% unique(ord$Taxa)[1:top_taxa] & msum$Taxa != "Other", "Top", "Other")
    msum$ord <- ifelse(msum$Taxa %in% specific_taxa, "Top", msum$ord)

    agg_bars(msum, ...) %>%
      qqint_bars(xaxis = xaxis, main = main, xlab = xlab, ylab = ylab, subtitle=subtitle,
                 lines = lines, facet_labels = facet_labels, facet_layout = facet_layout)
  } else if(RA > 0 & RA < 100){
    msum$ord <- ifelse(msum$Taxa == "Other", "Other",
                       ifelse(msum$nb.avg >= RA, "Top", "Other"))
    msum$ord <- ifelse(msum$Taxa %in% specific_taxa, "Top", msum$ord)

    agg_bars(msum, ...) %>%
      qqint_bars(xaxis = xaxis, main = main, xlab = xlab, ylab = ylab, subtitle=subtitle,
                 lines = lines, facet_labels = facet_labels, facet_layout = facet_layout)
  } else
    msum %>%
    qqint_bars(xaxis = xaxis, main = main, xlab = xlab, ylab = ylab, subtitle=subtitle,
               lines = lines, facet_labels = facet_labels, facet_layout = facet_layout)
}

categ_bars <- function(msum, xaxis, main, xlab, ylab, subtitle, lines){

  gg <- msum %>%
    ggplot2::ggplot(aes(Coef,nb.avg)) +
    ggplot2::theme_bw() +
    ggplot2::labs(title = main, x = xlab, y = ylab, subtitle = subtitle) +
    ggplot2::scale_x_discrete(labels = xaxis)

  if(lines) gg <- gg + ggplot2::geom_col(aes(fill = Taxa), colour = "black")
  else gg <- gg + ggplot2::geom_col(aes(fill = Taxa))
  gg
}

quant_bars <- function(msum, range, quant_style, xaxis, main,
                       xlab, ylab, subtitle, lines){

  if(quant_style == "continuous"){
    gg <- msum %>%
      ggplot2::ggplot(aes(x = as.numeric(Quant), y = Value, fill = Taxa)) +
      ggplot2::theme_bw() +
      ggplot2::labs(title = main, x = xlab, y = ylab, subtitle = subtitle)

    if(lines) gg <- gg + ggplot2::geom_area(colour = "black")
    else gg <- gg + ggplot2::geom_area()
    gg

  } else if(quant_style == "discrete"){
  gg <- msum %>%
    dplyr::select(Taxa, nb.low, nb.up) %>%
    tidyr::gather(variable, value, -Taxa) %>%
    ggplot2::ggplot(aes(variable,value)) +
    ggplot2::theme_bw() +
    ggplot2::labs(title = main, x = xlab, y = ylab, subtitle = subtitle) +
    ggplot2::scale_x_discrete(labels = xaxis)

  if(lines) gg <- gg + ggplot2::geom_col(aes(fill = Taxa), colour = "black")
  else gg <- gg + ggplot2::geom_col(aes(fill = Taxa))
  gg

  }
}

ccint_bars <- function(msum, xaxis, main, xlab, ylab, lines,
                       subtitle, facet_labels, facet_layout, ...){

  Cov <- parse(text = cov_str(...)[1])
  Fac <- parse(text = cov_str(...)[2])

  gg <- msum %>%
    ggplot2::ggplot(aes(eval(Cov), nb.avg)) +
    ggplot2::theme_bw() +
    ggplot2::labs(title = main, x = xlab, y = ylab, subtitle = subtitle) +
    ggplot2::scale_x_discrete(labels = xaxis)

  if(lines) gg <- gg + ggplot2::geom_col(aes(fill = Taxa), colour = "black")
  else gg <- gg + ggplot2::geom_col(aes(fill = Taxa))

  if(facet_layout == 1){
    gg + ggplot2::facet_grid(. ~ eval(Fac),
                             labeller = ggplot2::labeller(Fac = facet_labels))
  } else {
    gg + ggplot2::facet_grid(eval(Fac) ~ .,
                             labeller = ggplot2::labeller(Fac = facet_labels)) +
      ggplot2::coord_flip()
  }
}

qcint_bars <- function(msum, m_cov, quant_style, xaxis, main, xlab, ylab,
                       subtitle, facet_labels, facet_layout, lines, ...){

  if(is.null(facet_labels)){
    if(is.factor(m_cov[,cov_str(...)[1]])){
      facet_labels <- levels(m_cov[,cov_str(...)[1]])
    } else facet_labels <- levels(m_cov[,cov_str(...)[2]])
  }

  names(facet_labels) <- levels(msum$Fac)

  if(quant_style == "continuous"){
    gg <- msum %>%
      ggplot2::ggplot(aes(x = as.numeric(Quant), y = Value, fill = Taxa)) +
      ggplot2::theme_bw() +
      ggplot2::labs(title = main, x = xlab, y = ylab, subtitle = subtitle)

    if(lines) gg <- gg + ggplot2::geom_area(colour = "black")
    else gg <- gg + ggplot2::geom_area()

    if(facet_layout == 1){
      gg <- gg + ggplot2::facet_grid(. ~ Fac,
                                     labeller = ggplot2::labeller(Fac = facet_labels))
    } else {
      gg <- gg + ggplot2::facet_grid(Fac ~ .,
                                     labeller = ggplot2::labeller(Fac = facet_labels)) +
        ggplot2::coord_flip()
    }
  }

  if(quant_style == "discrete"){
    gg <- msum %>%
      ggplot2::ggplot(aes(x=HL, y=Est)) +
      ggplot2::theme_bw() +
      ggplot2::labs(title = main, x = xlab, y = ylab, subtitle = subtitle) +
      ggplot2::scale_x_discrete(labels = xaxis)

    if(lines) gg <- gg + ggplot2::geom_col(aes(fill = Taxa), colour = "black")
    else gg <- gg + ggplot2::geom_col(aes(fill = Taxa))

    if(facet_layout == 1){
      gg <- gg + ggplot2::facet_grid(. ~ Fac,
                                     labeller = ggplot2::labeller(Fac = facet_labels))
    }
    else{
      gg <- gg + ggplot2::facet_grid(Fac ~ .,
                                     labeller = ggplot2::labeller(Fac = facet_labels)) +
        ggplot2::coord_flip()
    }
  }

  gg
}

qqint_bars <- function(msum, xaxis, main, xlab, ylab, lines,
                       subtitle, facet_labels, facet_layout){

  if(facet_layout == 1){

    gg <- msum %>%
      ggplot2::ggplot(aes(x=Fac1, y=Est)) +
      ggplot2::theme_bw() +
      ggplot2::labs(title = main, x = xlab, y = ylab, subtitle = subtitle) +
      ggplot2::scale_x_discrete(labels = xaxis) +
      ggplot2::facet_grid(. ~ Fac2, labeller = ggplot2::labeller(Fac2 = facet_labels))

    if(lines) gg <- gg + ggplot2::geom_col(aes(fill = Taxa), colour = "black")
    else gg <- gg + ggplot2::geom_col(aes(fill = Taxa))

  }

  if(facet_layout == 2){

    gg <- msum %>%
      ggplot2::ggplot(aes(x=Fac2, y=Est)) +
      ggplot2::theme_bw() +
      ggplot2::labs(title = main, x = xlab, y = ylab, subtitle = subtitle) +
      ggplot2::scale_x_discrete(labels = xaxis) +
      ggplot2::facet_grid(. ~ Fac1, labeller = ggplot2::labeller(Fac1 = facet_labels))

    if(lines) gg <- gg + ggplot2::geom_col(aes(fill = Taxa), colour = "black")
    else gg <- gg + ggplot2::geom_col(aes(fill = Taxa))
  }

  gg
}


by_fun <- function(...){
  rlang::quos(...) %>%
    rlang::splice() %>%
    unlist %>%
    as.character %>%
    stringr::str_replace_all(pattern = "~", replacement = "") %>%
    return()
}

nb_type <- function(modsum, ...){
  Cov <- rlang::quos(...) %>%
    rlang::splice() %>%
    unlist %>%
    as.character %>%
    stringr::str_remove("~")  ## Makes '...' into a character string

  if(length(Cov) < 1) stop("nb_Bars requires a model covariate. If looking for stacked bar charts of raw counts use the function ra_bars.")
  if(length(Cov) > 1) stop("nb_Bars requires the use of only one model covariate. That covariate can be an interaction term such as 'Group*Age'")

  cofs <- modsum$Model_Coef[modsum$Model_Coef$Taxa == unique(modsum$Model_Coef$Taxa)[1],]

  if(length(cov_str(...)) == 1){
    cc <- cofs$Cov_Type[str_detect(cofs$Coef, cov_str(...)) &
                          !str_detect(cofs$Coef, ":")] %>%
      unique
  }

  if(length(cov_str(...)) == 2){
    cc <- cofs$Cov_Type[str_detect(cofs$Coef, cov_str(...)[1]) &
                          str_detect(cofs$Coef, ":")] %>%
      unique
  }

  cc
}

agg_bars <- function(msum,...){
  if("Cov_Type" %in% names(msum) & all(msum$Cov_Type %in% c("Intercept","categ")) ) {
    agg <- rbind(
      msum %>%
        dplyr::filter(ord=="Top") %>%
        dplyr::select(Taxa, Coef, nb.avg),

      cbind(Taxa = "Other",
            msum %>% dplyr::filter(ord == "Other") %>%
              dplyr::group_by(Coef) %>%
              dplyr::summarize(nb.avg = sum(nb.avg))
      )
    )
    SS <- agg %>% dplyr::group_by(Coef) %>% dplyr::summarize(ss = sum(nb.avg))
    if(any(SS$ss > 105)) warning(paste("Maximum column sum is", round(max(SS$ss)),
                                       "and will be rescaled to equal 100."),
                                 call. = FALSE)
    if(any(SS$ss < 95)) warning(paste("Minimum column sum is", round(min(SS$ss)),
                                      "and will be rescaled to equal 100."),
                                call. = FALSE)

    agg %<>% plyr::ddply(plyr::.(Coef),
                         function(set){
                           set %<>% dplyr::mutate(nb.avg = 100*nb.avg/sum(nb.avg))
                         })

  } else if("Cov_Type" %in% names(msum) & all(msum$Cov_Type == "quant") ) {
    agg <- rbind(
      msum %>%
        dplyr::filter(ord=="Top") %>%
        dplyr::select(Taxa, Coef, nb.low, nb.up),

      cbind(Taxa = "Other",
            msum %>% filter(ord == "Other") %>%
              dplyr::group_by(Coef) %>%
              dplyr::summarize(nb.low = sum(nb.low), nb.up = sum(nb.up))
      )
    )

    if(sum(agg$nb.up) > 105) warning(paste("Maximum column sum is", round(sum(agg$nb.up)),
                                           "and will be rescaled to equal 100."),
                                     call. = FALSE)
    if(sum(agg$nb.low) < 95) warning(paste("Minimum column sum is", round(sum(agg$nb.low)),
                                           "and will be rescaled to equal 100."),
                                     call. = FALSE)

    agg %<>% dplyr::mutate(nb.low = 100*nb.low/sum(nb.low), nb.up = 100*nb.up/sum(nb.up))
  } else if(ncol(msum) == 4 & all(c("Taxa", "Quant", "Value", "ord") %in% names(msum))) {
    agg <- rbind(
      msum %>%
        dplyr::filter(ord == "Top") %>%
        dplyr::select(Taxa, Quant, Value),

      cbind(Taxa = "Other",
            msum %>% filter(ord == "Other") %>%
              dplyr::group_by(Quant) %>%
              dplyr::summarise(Value = sum(Value))
      )
    )

    SS <- agg %>% dplyr::group_by(Quant) %>% dplyr::summarise(ss = sum(Value))
    if(any(SS$ss > 105)) warning(paste("Maximum taxa sum is", round(max(SS$ss), 2),
                                       "and will be rescaled to equal 100."),
                                 call. = FALSE)
    if(any(SS$ss < 95)) warning(paste("Minimum taxa sum is", round(min(SS$ss), 2),
                                      "and will be rescaled to equal 100."),
                                call. = FALSE)

    agg %<>%
      plyr::ddply(plyr::.(Quant),
                  function(set) set %<>% dplyr::mutate(Value = 100*Value/sum(Value)))
  } else if("c*c.int" %in% msum$Cov_Type){

    agg <- rbind(
      msum %>%
        dplyr::filter(ord=="Top") %>%
        dplyr::select(Taxa, Coef, nb.avg),

      cbind(Taxa = "Other",
            msum %>% filter(ord == "Other") %>%
              dplyr::group_by(Coef) %>%
              dplyr::summarize(nb.avg = sum(nb.avg))
      )
    ) %>%
      tidyr::separate(Coef, cov_str(...), "[.]")
    SS <- agg %>%
      dplyr::group_by_(cov_str(...)[1], cov_str(...)[2]) %>%
      dplyr::summarise(ss = sum(nb.avg))

    if(sum(agg$nb.up) > 105) warning(paste("Maximum column sum is", round(max(SS$ss)),
                                           "and will be rescaled to equal 100."),
                                     call. = FALSE)
    if(sum(agg$nb.low) < 95) warning(paste("Minimum column sum is", round(min(SS$ss)),
                                           "and will be rescaled to equal 100."),
                                     call. = FALSE)

    agg %<>% plyr::ddply(plyr::.(eval(cov_str(...)[1] %>% parse(text = .)),
                                 eval(cov_str(...)[2] %>% parse(text = .))),
                         function(set){
                           set %<>% dplyr::mutate(nb.avg = 100*nb.avg/sum(nb.avg))
                         })
  } else if("Est" %in% names(msum) & "HL" %in% names(msum)){
    oo <- msum %>% filter(ord == "Other") %>%
      dplyr::group_by(Fac, HL) %>%
      dplyr::summarize(Est = sum(Est))

    agg <- rbind(
      msum %>%
        dplyr::filter(ord=="Top") %>%
        dplyr::select(Taxa, Fac, HL, Est),

      cbind(Taxa = "Other", oo)
    )

    SS <- agg %>% dplyr::group_by(Fac,HL) %>% dplyr::summarise(ss = sum(Est))

    if(sum(agg$nb.up) > 105) warning(paste("Maximum column sum is", round(max(SS$ss)),
                                           "and will be rescaled to equal 100."),
                                     call. = FALSE)
    if(sum(agg$nb.low) < 95) warning(paste("Minimum column sum is", round(min(SS$ss)),
                                           "and will be rescaled to equal 100."),
                                     call. = FALSE)

    agg %<>% dplyr::group_by(Fac,HL) %>% dplyr::mutate(Est = 100*Est/sum(Est))
  } else if( all(names(msum) %in% c("Taxa", "Quant", "Fac", "Value", "ord")) ){

    oo <- msum %>% filter(ord == "Other") %>%
      dplyr::group_by(Quant, Fac) %>%
      dplyr::summarise(Value = sum(Value))

    agg <- rbind(
      msum %>%
        dplyr::filter(ord=="Top") %>%
        dplyr::select(Taxa, Quant, Fac, Value),

      cbind(Taxa = rep("Other", nrow(oo)), oo)
    )

    SS <- agg %>% dplyr::group_by(Quant, Fac) %>% dplyr::summarise(ss = sum(Value))

    if(any(SS$ss > 105)) warning(paste("Maximum column sum is", round(max(SS$ss)),
                                           "and will be rescaled to equal 100."),
                                     call. = FALSE)
    if(any(SS$ss < 95)) warning(paste("Minimum column sum is", round(min(SS$ss)),
                                           "and will be rescaled to equal 100."),
                                     call. = FALSE)

    agg %<>%
      plyr::ddply(plyr::.(Quant, Fac),
                  function(set){
                    set %<>%
                      dplyr::mutate(Value = 100*Value/sum(Value), Fac = factor(Fac))
                  }
                  )
  } else if( all(c("Est", "Fac1", "Fac2") %in% names(msum)) ){

    oo <- msum %>% filter(ord == "Other") %>%
      dplyr::group_by(Fac1, Fac2) %>%
      dplyr::summarize(Est = sum(Est))

    agg <- rbind(
      msum %>%
        dplyr::filter(ord=="Top") %>%
        dplyr::select(Taxa, Fac1, Fac2, Est),

      cbind(Taxa = rep("Other", nrow(oo)), oo)
    )
    SS <- agg %>% group_by(Fac1,Fac2) %>% summarise(ss = sum(Est))

    if(sum(agg$nb.up) > 105) warning(paste("Maximum column sum is", round(max(SS$ss)),
                                           "and will be rescaled to equal 100."),
                                     call. = FALSE)
    if(sum(agg$nb.low) < 95) warning(paste("Minimum column sum is", round(min(SS$ss)),
                                           "and will be rescaled to equal 100."),
                                     call. = FALSE)

    agg %<>%
      dplyr::group_by(Fac1,Fac2) %>%
      dplyr::mutate(Est = 100*Est/sum(Est)) %>%
      dplyr::ungroup()

    agg$Fac1 <- factor(agg$Fac1, levels = c("Low", "High"))
    agg$Fac2 <- factor(agg$Fac2, levels = c("Low", "High"))
  }

  agg
}

quant_cont <- function(m_coef, m_cov, ...){
  q <- m_cov[ ,m_coef$Coef[m_coef$Cov_Type == "quant" &
                              !(m_coef$Coef %in% cov_str(...))] %>% as.character]

  if(is.data.frame(q)){
    qm <- apply(q, 2, mean, na.rm = T) %>% as.vector
  } else qm <- mean(q, na.rm = TRUE) %>% as.vector


  m_coef$Intercept <- unique(m_coef$Intercept) +
                             sum(
                               m_coef$Estimate[m_coef$Cov_Type == "quant" &
                                                    !(m_coef$Coef %in% cov_str(...))]*qm )

  return(m_coef)
}

categ_est <- function(msum){
  msum %>%
    dplyr::mutate(nb.avg = ifelse(Cov_Type == "Intercept", exp(Intercept + log(100)),
                           exp(Intercept + Estimate + log(100)))
    )
}

ccint_est <- function(msum, l1, l2, ...){

  l1.fe <- numeric(0) ; l2.fe <- numeric(0) ; l.int <- numeric(0)

  for(i in 2:length(l1)){
    l1.fe[i] <- unique(msum$Intercept) +
      msum$Estimate[stringr::str_detect(msum$Coef, l1[i]) & msum$Cov_Type != "c*c.int"]
  }

  for(i in 2:length(l2)){
    l2.fe[i] <- unique(msum$Intercept) +
      msum$Estimate[stringr::str_detect(msum$Coef, l2[i]) & msum$Cov_Type != "c*c.int"]
  }

  if(length(l2) <= length(l1)){
    for(i in 2:length(l2)){
      for(j in 2:length(l1)){
        l.int[j] <- l1.fe[j] +
          msum$Estimate[stringr::str_detect(msum$Coef, l2[i]) & msum$Cov_Type == "categ"] +
          msum$Estimate[stringr::str_detect(msum$Coef, l1[j]) & msum$Cov_Type == "c*c.int"]

      }
    }
  }

  if(length(l1) < length(l2)){
    for(i in 2:length(l1)){
      for(j in 2:length(l2)){
        l.int[j] <- l2.fe[j] +
          msum$Estimate[stringr::str_detect(msum$Coef, l1[i]) & msum$Cov_Type == "categ"] +
          msum$Estimate[stringr::str_detect(msum$Coef, l2[j]) & msum$Cov_Type == "c*c.int"]

      }
    }
  }

data.frame(nb.avg = exp(
  c(unique(msum$Intercept), c(l1.fe,l2.fe,l.int)[!is.na(c(l1.fe,l2.fe,l.int))]) + log(100)),
  Coef = levels(interaction(l1,l2)),
  Cov_Type = msum$Cov_Type)

}

qcint_est <- function(msum, m_cov, quant_style, range, ...){

  if(quant_style == "continuous"){
    if(is.factor(m_cov[,cov_str(...)[1]])) {
      levs <- levels(m_cov[,cov_str(...)[1]])
      ll <- paste(cov_str(...)[1], levs, sep = "")
    } else {
      levs <- levels(m_cov[,cov_str(...)[2]])
      ll <- paste(cov_str(...)[2], levs, sep = "")
    }

    RR <- seq(range[1], range[2], length.out = 10)

    ## The estimate from the reference level along the range of RR
    Est <- data.frame(Quant = RR,
                      l0 = exp( unique(msum$Intercept) +
                                  msum$Estimate[msum$Cov_Type == "quant"]*RR + log(100) )
                      )

    for(i in 2:length(ll)){ ## starting at the second level (one below reference)
      Est[, i+1 ] <- exp(
        unique(msum$Intercept) + ## interaction intercept
          msum$Estimate[stringr::str_detect(msum$Coef, ll[i]) & msum$Cov_Type == "categ"] +

          ## interaction slope
          (msum$Estimate[msum$Cov_Type == "quant"] +
             msum$Estimate[stringr::str_detect(msum$Coef, ll[i]) & msum$Cov_Type == "q*c.int"])*RR
       + log(100))
    }

    Est %<>% tidyr::gather(Fac, Value, -Quant)
  }

  if(quant_style == "discrete"){
    if(is.factor(m_cov[,cov_str(...)[1]])) {
      levs <- levels(m_cov[,cov_str(...)[1]])
      ll <- paste(cov_str(...)[1], levs, sep = "")
    } else {
      levs <- levels(m_cov[,cov_str(...)[2]])
      ll <- paste(cov_str(...)[2], levs, sep = "")
    }

    low <- exp( unique(msum$Intercept) +
                  msum$Estimate[msum$Cov_Type == "quant"]*range[1] + log(100) )
    high <- exp( unique(msum$Intercept) +
                   msum$Estimate[msum$Cov_Type == "quant"]*range[2] + log(100) )

    for(i in 2:length(ll)){ ## starting at the second level (one below reference)
      low[i] <- exp(
        unique(msum$Intercept) +
          msum$Estimate[stringr::str_detect(msum$Coef, ll[i]) & msum$Cov_Type == "categ"] +
          (msum$Estimate[msum$Cov_Type == "quant"] +
             msum$Estimate[stringr::str_detect(msum$Coef, ll[i]) & msum$Cov_Type == "q*c.int"]
          )*range[1] + log(100)
      )
    }

    for(i in 2:length(ll)){ ## starting at the second level (one below reference)
      high[i] <- exp(
        unique(msum$Intercept) +
          msum$Estimate[stringr::str_detect(msum$Coef, ll[i]) & msum$Cov_Type == "categ"] +
          (msum$Estimate[msum$Cov_Type == "quant"] +
             msum$Estimate[stringr::str_detect(msum$Coef, ll[i]) & msum$Cov_Type == "q*c.int"]
          )*range[2] + log(100)
      )
    }

    Est <- data.frame(
      Taxa = msum$Taxa,
      Est = c(low,high),
      Fac = levs,
      HL = rep(c("Low","high"), each = length(levs))
    )
  }

  Est
}

qqint_est <- function(msum, range, ...){

  LL <- exp( unique(msum$Intercept) +
    msum$Estimate[stringr::str_detect(msum$Coef, cov_str(...)[1]) &
                                      !stringr::str_detect(msum$Coef, cov_str(...)[2])]*range[1,1] +
    msum$Estimate[!stringr::str_detect(msum$Coef, cov_str(...)[1]) &
                    stringr::str_detect(msum$Coef, cov_str(...)[2])]*range[2,1] +
    msum$Estimate[stringr::str_detect(msum$Coef, cov_str(...)[1]) &
                    stringr::str_detect(msum$Coef, cov_str(...)[2])]*range[1,1]*range[2,1] +
      log(100))

  LH <- exp( unique(msum$Intercept) +
    msum$Estimate[stringr::str_detect(msum$Coef, cov_str(...)[1]) &
                    !stringr::str_detect(msum$Coef, cov_str(...)[2])]*range[1,1] +
    msum$Estimate[!stringr::str_detect(msum$Coef, cov_str(...)[1]) &
                    stringr::str_detect(msum$Coef, cov_str(...)[2])]*range[2,2] +
    msum$Estimate[stringr::str_detect(msum$Coef, cov_str(...)[1]) &
                    stringr::str_detect(msum$Coef, cov_str(...)[2])]*range[1,1]*range[2,2] +
      log(100))

  HL <- exp( unique(msum$Intercept) +
    msum$Estimate[stringr::str_detect(msum$Coef, cov_str(...)[1]) &
                    !stringr::str_detect(msum$Coef, cov_str(...)[2])]*range[1,2] +
    msum$Estimate[!stringr::str_detect(msum$Coef, cov_str(...)[1]) &
                    stringr::str_detect(msum$Coef, cov_str(...)[2])]*range[2,1] +
    msum$Estimate[stringr::str_detect(msum$Coef, cov_str(...)[1]) &
                    stringr::str_detect(msum$Coef, cov_str(...)[2])]*range[1,2]*range[2,1] +
      log(100))

  HH <- exp( unique(msum$Intercept) +
    msum$Estimate[stringr::str_detect(msum$Coef, cov_str(...)[1]) &
                    !stringr::str_detect(msum$Coef, cov_str(...)[2])]*range[1,2] +
    msum$Estimate[!stringr::str_detect(msum$Coef, cov_str(...)[1]) &
                    stringr::str_detect(msum$Coef, cov_str(...)[2])]*range[2,2] +
    msum$Estimate[stringr::str_detect(msum$Coef, cov_str(...)[1]) &
                    stringr::str_detect(msum$Coef, cov_str(...)[2])]*range[1,2]*range[2,2] +
      log(100))

  data.frame(Taxa = msum$Taxa,
             Est = c(LL,LH,HL,HH),
             Fac1 = rep(c("Low","High"), each = 2),
             Fac2 = rep(c("Low","High"), times = 2),
             nb.avg = mean( c(LL,LH,HL,HH) + log(100) )
             )
}

## Function to reorder Taxa to make sure "Other" is at the bottom of bar plots
Taxa_ord <- function(msum){

  if("Other" %in% unique(msum$Taxa)){
    msum$Taxa <- factor(msum$Taxa,
                        levels = c(as.character(sort(unique(msum$Taxa)[unique(msum$Taxa) != "Other"])),
                                   "Other")
                        )
  } else{
    msum$Taxa <- factor(msum$Taxa,
                        levels = as.character(sort(unique(msum$Taxa)))
                        )
  }

  msum
}
CharlieCarpenter/tidy.micro documentation built on Jan. 19, 2020, 6:28 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
CharlieCarpenter/tidy.micro
A Pipeline For Microbiome Analysis And Visualization

R/nb_bars.R
In CharlieCarpenter/tidy.micro: A Pipeline For Microbiome Analysis And Visualization

Defines functions nb_bars nb_bars_categ

Documented in nb_bars

R Package Documentation

Browse R Packages

We want your feedback!

CharlieCarpenter/tidy.micro A Pipeline For Microbiome Analysis And Visualization

R/nb_bars.R In CharlieCarpenter/tidy.micro: A Pipeline For Microbiome Analysis And Visualization

Defines functions nb_bars nb_bars_categ

Documented in nb_bars

R Package Documentation

Browse R Packages

We want your feedback!

CharlieCarpenter/tidy.micro
A Pipeline For Microbiome Analysis And Visualization

R/nb_bars.R
In CharlieCarpenter/tidy.micro: A Pipeline For Microbiome Analysis And Visualization