R/replicates.R

Defines functions combine_replicates

Documented in combine_replicates

# replicates.R
# Combine replicates
# Copyright Jackson M. Tsuji, 2020 (Neufeld Lab)

#' Combine replicates
#'
#' @aliases dereplicate
#' @description: Combines replicates in a normalized metannotate table into means with standard deviations
#' @param metannotate_data_normalized_list List output of \code{\link{normalize}}
#' @return List of three:
#' - Tibble of metannotate data, with 'percent_abundance' and 'percent_abundance_sd'
#' - Tibble summarizing total normalized counts for genes
#' - Tibble summarizing total normalized counts for genes in long format, including standard deviations
#' @export
combine_replicates <- function(metannotate_data_normalized_list) {
  # # Example column names of the plotting table, if collapsed to family
  # [1] "Dataset"       "replicate"         "HMM.Family"                   "Closest.Homolog.Superkingdom"
  # [4] "Closest.Homolog.Phylum"       "Closest.Homolog.Class"        "Closest.Homolog.Order"
  # [7] "Closest.Homolog.Family"       "percent_abundance"

  # Check that the first input is actually a list
  if (class(metannotate_data_normalized_list)[1] != "list") {
    stop("Input object is not a list as expected. Are you sure that you have already run normalize()?")
  }

  # Extract list components
  metannotate_data <- metannotate_data_normalized_list$metannotate_data
  hit_totals <- tidyr::pivot_longer(metannotate_data_normalized_list$total_normalized_hits, -c('Dataset', 'replicate'),
                                    names_to = "HMM.Family", values_to = "percent_abundance")
  hit_totals$HMM.Family <- factor(hit_totals$HMM.Family, levels = unique(hit_totals$HMM.Family), ordered = TRUE)

  # Check metannotate data has been normalized
  # TODO - consider making a more exhaustive data check
  if ("percent_abundance" %in% colnames(metannotate_data) == FALSE) {
    stop(paste0("Provided metannotate_data table does not contain the expected 'percent_abundance' column. "),
         "Are you sure that you have already run normalize()?")
  }

  # Detect the taxonomy that the data has been collapsed to
  plotting_taxon_colname <- TAXONOMY_NAMING$metannotate_colnames[
    TAXONOMY_NAMING$metannotate_colnames %in% colnames(metannotate_data)] %>%
    tail(n = 1)
  plotting_taxon <- TAXONOMY_NAMING$taxonomy[match(plotting_taxon_colname,
                                                   TAXONOMY_NAMING$metannotate_colnames)]
  futile.logger::flog.debug(paste0("Plotting input dataframe has been collapsed to the '", plotting_taxon, "' level."))

  # Get the mean and standard deviation of replicates
  grouping_cols <- c("Dataset", "HMM.Family", TAXONOMY_NAMING$metannotate_colnames[
    TAXONOMY_NAMING$metannotate_colnames %in% colnames(metannotate_data)])
  metannotate_data <- dplyr::group_by_at(metannotate_data, grouping_cols) %>%
    dplyr::summarise(percent_abundance_mean = mean(percent_abundance), percent_abundance_sd = sd(percent_abundance)) %>%
    dplyr::rename(percent_abundance = percent_abundance_mean)

  # Also get means of the total hits
  hit_totals_long <- dplyr::group_by(hit_totals, Dataset, HMM.Family) %>%
    dplyr::summarise(percent_abundance_mean = mean(percent_abundance), percent_abundance_sd = sd(percent_abundance)) %>%
    dplyr::rename(percent_abundance = percent_abundance_mean) %>%
    dplyr::ungroup()

  hit_totals <- dplyr::select(hit_totals_long, -percent_abundance_sd) %>%
    tidyr::pivot_wider(names_from = HMM.Family, values_from = percent_abundance)

  # Output the list in the same format as for normalize()
  output_list <- list(metannotate_data, hit_totals, hit_totals_long)
  names(output_list) <- c("metannotate_data", "total_normalized_hits", "total_normalized_hits_with_sd")

  return(output_list)
}
MetAnnotate/metannoviz documentation built on Aug. 2, 2020, 3:12 p.m.