R/ra_bars.R

Defines functions ra_bars

Documented in ra_bars

#' @title Function to make stacked bar charts of taxa relative abundance
#' @name ra_bars
#' @description A function to make stacked bar charts of taxa relative abuncance with the choice to stratify by a variable of interest
#' @param micro_set A tidy_micro data set
#' @param table OTU table you'd like to use when calculating alpha diversity. Your lowest level is recommended
#' @param ... A categorical variable by which you'd like to stratify your relative abundances
#' @param top_taxa Only plot X taxa with the highest relative abundance. The rest will be aggregated into an "Other" category.
#' @param RA Only plot taxa with a relative abundance higher than X. The rest will be aggregated into an "Other" category.
#' @param specific_taxa Plot this specific taxa even if it doesn't meet the top_taxa or RA requirements
#' @param xlab x-axis label
#' @param ylab y-axis label
#' @param lines Logical; Add outlines around the different taxa colors in the stacked bar charts
#' @param rotate Logical; Tilting x-axis labels if many levels are involved
#' @return Returns a ggplot that you can add geoms to if you'd like
#' @examples
#' data(phy); data(cla); data(ord); data(fam); data(met)
#'
#' otu_tabs = list(Phylum = phy, Class = cla, Order = ord, Family = fam)
#' set <- tidy_micro(otu_tabs = otu_tabs, meta = met) %>%
#' filter(day == 7) %>% ## Only including the first week
#' mutate(bpd1 = factor(bpd1))
#'
#' ## Full cohort abundance
#'
#' set %>%
#' ra_bars(table = "Family", top_taxa = 10)
#'
#' ## Stratified by variable of interest
#' set %>%
#' ra_bars(table = "Family", bpd1, top_taxa = 10)
#' @export
ra_bars <- function(micro_set, table,  ..., top_taxa = 0, RA = 0, specific_taxa,
                    ylab, xlab, main, xaxis, lines = TRUE, rotate = TRUE){
  if(missing(specific_taxa)) specific_taxa <- NULL; if(missing(xlab)) xlab <- NULL; if(missing(main)) main <- NULL
  if(missing(ylab)) ylab <- "Relative Abundance (%)"

  if(table %nin% unique(micro_set$Table)) stop("Specified table is not in supplied micro_set")

  ## No grouping variable, single bar
  if(missing(...)){
    arr <- micro_set %>%
      dplyr::filter(Table == table) %>%
      dplyr::group_by(Taxa) %>%
      dplyr::summarise(perc = sum(cts) / sum(unique(Total))*100) %>%
      dplyr::mutate(x = "X") %>%
      dplyr::arrange(desc(perc))

    if(RA < 0 | RA > 100) stop("RA must be between 0 and 100")
    if(top_taxa > 0 & RA > 0) stop("Can not aggregate based on both 'top_taxa' and 'RA'")

    ## Aggregating taxa
    if(top_taxa > 0 | RA > 0){
      if(top_taxa > 0){
        arr$ord <- ifelse(arr$Taxa %in% unique(arr$Taxa)[1:top_taxa], "Top", "Other")
        arr$ord <- ifelse(arr$Taxa %in% specific_taxa, "Top", arr$ord)
      } else if(RA > 0){
        arr$ord <- ifelse(arr$perc >= RA, "Top", "Other")
        arr$ord <- ifelse(arr$Taxa %in% specific_taxa, "Top", arr$ord)
      }

    ar <- rbind(
      arr %>%
        dplyr::filter(ord == "Top") %>%
        as.data.frame,

        cbind(Taxa = "Other",
              arr %>% dplyr::filter(ord == "Other") %>%
                dplyr::summarise(perc = sum(perc))) %>%
        dplyr::mutate(x = "X", ord = "Other") %>%
        as.data.frame
    )

    if(missing(xaxis)) xaxis <- "Cohort Relative Abundance"

    gg <- ar %>%
      Taxa_ord() %>%
      ggplot2::ggplot(aes(x = x, y = perc)) +
      ggplot2::scale_x_discrete(labels = xaxis)

    } else{

      if(missing(xaxis)) xaxis <- "Cohort Relative Abundance"

      gg <- arr %>%
        Taxa_ord() %>%
        ggplot2::ggplot(aes(x = x, y = perc)) +
        ggplot2::scale_x_discrete(labels = xaxis)
    }

  } else{ ## Grouping variable
    arr <- micro_set %>%
      dplyr::filter(Table == table) %>%
      dplyr::group_by(!!!rlang::quos(...)) %>%
      dplyr::summarise(tot = sum(cts)) %>%
      dplyr::left_join(micro_set %>%
                         dplyr::filter(Table == table) %>%
                         dplyr::group_by(Taxa,!!!rlang::quos(...)) %>%
                         dplyr::summarise(count = sum(cts)), by = by_fun(!!!rlang::quos(...))) %>%
      dplyr::group_by(Taxa, !!!rlang::quos(...)) %>%
      dplyr::summarise(perc = (count/tot)*100) %>%
      dplyr::arrange(desc(perc))

    if(RA < 0 | RA > 100) stop("RA must be between 0 and 100")
    if(top_taxa > 0 & RA > 0) stop("Can not aggregate based on both 'top_taxa' and 'RA'")

    ## Aggregating taxa
    if(top_taxa > 0 | RA > 0){
      if(top_taxa > 0){
        arr$ord <- ifelse(arr$Taxa %in% unique(arr$Taxa)[1:top_taxa], "Top", "Other")
        arr$ord <- ifelse(arr$Taxa %in% specific_taxa, "Top", arr$ord)
      } else if(RA > 0){
        arr$ord <- ifelse(arr$perc >= RA, "Top", "Other")
        arr$ord <- ifelse(arr$Taxa %in% specific_taxa, "Top", arr$ord)
      }

      ar <- rbind(
        arr %>%
          dplyr::filter(ord=="Top") %>%
          dplyr::select(Taxa, !!!rlang::quos(...), perc) %>%
          as.data.frame,

        cbind(Taxa = "Other",
              arr %>% dplyr::filter(ord == "Other") %>%
                dplyr::group_by(!!!rlang::quos(...)) %>% dplyr::summarize(perc = sum(perc)) %>%
                as.data.frame
        )
      )

      ar$plot_grp <- ar %>%
        dplyr::select(!!!rlang::quos(...)) %>%
        interaction

      gg <- ar %>%
        Taxa_ord() %>%
        ggplot(aes(x = plot_grp, y=perc))

    } else{ ## No Aggregation

      if(length(cov_str(...)) == 1){
        names(arr)[2] <- "plot_grp"

        arr %<>%
          dplyr::filter(!is.na(plot_grp)) %>%
          dplyr::mutate(plot_grp = factor(plot_grp))

        # if(!is.null(sort_by)){
        #   arr$plot_grp <- factor(arr$plot_grp,
        #                          levels = unique(arr$plot_grp)[order(sort(unique()))])
        # }

      } else if(length(cov_str(...)) > 1){
        arr %<>%
          dplyr::mutate(plot_grp = interaction(!!!rlang::quos(...)))
      }

      gg <- arr %>%
        dplyr::ungroup() %>%
        Taxa_ord() %>%
        ggplot2::ggplot(aes(x = plot_grp, y=perc))
    }
    if(!missing(xaxis)) gg <- gg + ggplot2::scale_x_discrete(limits = xaxis)
  }

    gg <- gg +
      ggplot2::theme_bw() +
      ggplot2::labs(title = main, x = xlab, y = ylab)

  if(lines) gg <- gg + ggplot2::geom_col(aes(fill = Taxa), colour = "black")
  else gg <- ggplot2::geom_col(aes(fill = Taxa))

  ## Tilting x-axis labels if many levels are involved
  if(rotate & "plot_grp" %in% names(gg$data)){
    if(length(unique(gg$data$plot_grp)) > 10)
    gg <- gg + ggplot2::theme(axis.text.x = element_text(angle = 90))
  }
  gg
}
CharlieCarpenter/tidy.micro documentation built on Jan. 19, 2020, 6:28 p.m.