R/construct_otu_table.R

Defines functions construct_otu_table

Documented in construct_otu_table

#' construct_otu_table
#'
#' construct_otu_table can construct a OTU table with a phyloseq object.
#'
#' @param phyloseq A phyloseq object contain otu table, taxonomy table, sample metadata and
#' phylogenetic tree.
#'
#' @param level The coloumn name of the level wanted to select. Default is "all". If "all" then
#' retain all taxonomy level, else ONLY retain the given taxonomy level, drop everything else. Level
#' name should be one of "all", "Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species".
#'
#' @param relative_abundance Turn OTU table into relative abundance or not. Default is FALSE. If
#' TRUE, the raw count value will be converted to relative abundance and total sum is 1.
#'
#' @param hundred_percent Default is FALSE, if TRUE, turn relative abundance to 100 percent, total
#' sum is 100.
#'
#' @export
#'
#' @examples
#' construct_otu_table(demo_phyloseq_object, level = "Genus") %>% .[,1:5]

construct_otu_table <- function(
  phyloseq,
  level = "all",
  relative_abundance = FALSE,
  hundred_percent = FALSE
) {
  # Set options, prevent R turnning numeric value to factor
  options(stringsAsFactors = FALSE)
  # Check if input 'level' is correct
  if (!level %in% c("all", "Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")) {
    stop(paste0('Argument "level" should be one of "all", "Kingdom", "Phylum", "Class", "Order", ',
                '"Family", "Genus", "Species".'))
    }
  # Read in sequence table and taxonomy table from phyloseq
  if (!taxa_are_rows(phyloseq)) {
    otu <- otu_table(phyloseq) %>% as.data.frame() %>% t() %>%
      as.data.frame() %>% rownames_to_column(var = "OTU_ID")
  } else {
    otu <- otu_table(phyloseq) %>% as.data.frame() %>%
      as.data.frame() %>% rownames_to_column(var = "OTU_ID")
  }
  taxa <- tax_table(phyloseq) %>% as.data.frame() %>%
    rownames_to_column(var = "OTU_ID")
  ## Step 1: Clean sequence table and taxonomy table
  # If a sequence's taxonomy is all NA, remove it
  all_NA_taxa <- taxa %>% filter_all(all_vars(is.na(.)))
  taxa <- taxa %>% dplyr::filter(!OTU_ID %in% all_NA_taxa$OTU_ID)
  ## Step 2: Construct OTU table
  if (level == "all") {
    # If level == "all", collapse all taxonomy levels and separate by "; "
    level <- "Taxonomy"
    taxa <- taxa %>% tidyr::unite(Taxonomy, 2:ncol(taxa), sep = "; ")
  } else {
    # If level != "all", select the given taxonomy level
    taxa <- taxa %>% dplyr::select(OTU_ID, level) %>% dplyr::filter(., !is.na(.[[level]]))
    otu <- otu %>% dplyr::filter(OTU_ID %in% taxa$OTU_ID)
  }
  ## Step 3: Merge sequence table and taxonomy table
  otu <- left_join(otu, taxa)
  ## Step 4: Add abundance of those have the same taxonomy names and convert it to data frame
  otu <- otu %>%
    # replace group_by_() with group_by()
    # See https://stackoverflow.com/questions/38220963/how-to-pass-a-variable-name-in-group-by
    group_by(!!!rlang::syms(level)) %>%
    summarise_if(is.numeric, sum, na.rm = TRUE) %>%
    filter_if(is.numeric, any_vars(. != 0)) %>%
    column_to_rownames(var = level) %>%
    t() %>%
    as.data.frame()
  if(relative_abundance) {
    if(hundred_percent) {
      otu <- convert_to_percentage(otu, row_sum = TRUE, hundred_percent = T)
    } else {
      otu <- convert_to_percentage(otu, row_sum = TRUE)
    }
  }
  if(level == "Taxonomy") {
    # 转换成适合阅读和导出excel的格式:
    # colnames为taxa level + SampleID,没有rownames(taxa_are_rows = TRUE)
    otu <- otu %>% t() %>% as.data.frame() %>% tibble::rownames_to_column("Taxonomy") %>%
      tidyr::separate(
        Taxonomy, c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species"), "; "
      )
  }
  return(otu)
}
yeguanhuav/visualization416S documentation built on March 22, 2022, 9:03 p.m.