R/helper.R

#' Tabulate interaction between two factors in the sample_data of a phyloseq object
#'
#' @param physeq Phyloseq object
#' @param factor1 First factor
#' @param factor2 Second factor
#'
#' @return data frame containing table of two-factor interactions.
#' @export
#'
#' @examples
#' \dontrun{
#' tab <- tabulate_factors(physeq, "Diet", "Enterotype")
#' colnames(tab) <- paste0("ET-", colnames(tab))
#' knitr::kable(tab, caption = "Distribution of enterotypes in the different Diet groups")
#' }
tabulate_factors <- function(physeq, factor1, factor2) {

  sample_data <- sample_data(physeq)
  lev1 <- levels(sample_data[[factor1]])
  lev2 <- levels(sample_data[[factor2]])
  x <- with(sample_data, table(get(factor1), get(factor2)))
  x <- as.data.frame(matrix(as.vector(x), nrow=length(lev1), ncol=length(lev2)))
  row.names(x) <- lev1
  colnames(x) <- lev2
  return(x)
}



#' Get taxonomy annotation for a given id in a phyloseq object
#'
#' @param physeq Phyloseq object
#' @param id     Identifier of the taxon to get information
#'
#' @return The entire row of the \code{tax_table} object corresponding to this identifier.
#' @export
#'
#' @examples
#' \dontrun{
#' get_taxon_by_id(physeq, "u__712677")
#' }
get_taxon_by_id <- function(physeq, id) {
  return(tax_table(physeq)[which(row.names(t(otu_table(physeq))) == id),])
}



#' Get statistical significance level strings for P-values.
#'
#' @param plist Vector of P-values
#'
#' @return Vector of strings
#' @export
#'
#' @examples
#' get_significance_string(c(0.1, 0.01, 0.001, 0.001))
get_significance_string <- function(plist) {

  sig_list <- sapply(plist, function(p) {
    sig = " NS"
    if (!is.nan(p) & !is.na(p)) {
      if (p <= 0.10) {
        sig = "."
      }
      if (p <= 0.05)  {
        sig = "*"
      }
      if (p <= 0.01)  {
        sig = "**"
      }
      if (p <= 0.001) {
        sig = "***"
      }
    }
    sig
  })
  return(sig_list)
}

#' Get presentable name for a taxon
#'
#' @param x Row of a data frame from \code{taxa_table(physeq)}.
#'
#' @return String with an elegant name.
#' @export
#'
#' @examples
#' \dontrun{
#' get_pretty_taxon_name()
#' }
get_pretty_taxon_name <- function(x) {

  if (!is.na(x['Species'])) {
    name <- paste(x['Genus'], x['Species'])
    return(name)
  }

  name <- "Unknown";

  tax_levels <- c('Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus')
  for (i in c(1:length(tax_levels))) {
    level <- tax_levels[i]
    if (!is.na(x[level])) {
      if (i < 6) { # above genus
        name <- paste0(level, "=", x[level])
      }
      if (level == "Genus") {
        name <- x[level]
      }
      # If '-' or '_' found in name, or name starts with lower case, prepend previous level
      if (regexpr("[-_]", name, perl = TRUE) > 0 | regexpr("[a-z]", name, perl = TRUE) == -1) {
        #If previous level was prepended already or the name is genuine, then dont!
        if (regexpr(x[tax_levels[i-1]], x[level], fixed=TRUE) == -1 &
            regexpr("Candidatus_|Ruminiclostridium_|Clostridium_|Corynebacterium_|Ruminococcus_|Tyzzerella_", x[level], perl=TRUE) == -1) {
          name <- paste(x[tax_levels[i-1]], x[level])
        }
      }
    }
  }
  return(name)
}



#' Get the name of the custom palette for a given variable
#'
#' @param pal Custom palette as a vector (see example).
#' @param name Name of variable to look up in the custom palette vector.
#'
#' @return Name of ColorBrewer palette.
#' @export
#'
#' @examples
#' pal = list(Diet = "Set2", Sample_type = "Pastel1", Significance = "PuRd")
#' get_my_palette_name(pal, "Diet")
get_my_palette_name <- function(pal, name) {

  if (is.null(name)) {
    stop("get_my_palette_name() requires name of factor")
  }
  l_palette = NULL
  if (!is.null(pal)) {
    l_palette = pal[[name]]
  } else {
    l_palette = "Set2"
    warning(paste0("Cannot find palette for ", name, "; using ", l_palette))
  }
  return(l_palette)
}



#' Get palette color maps for a variable from a custom palette vector of ColorBrewer palette names.
#'
#' @param pal Custom palette as a vector (see example).
#' @param name Name of variable to look up in the custom palette vector.
#' @param levels List of values for the variable. Typically obtained by \code{levels(var)}.
#' @param offset Offset to use when using the corresponding ColorBrewer color vector.
#'
#' @importFrom RColorBrewer brewer.pal
#'
#' @return List of named color vectors.
#' @export
#'
#' @examples
#' pal = list(Diet = "Set2", Sample_type = "Pastel1", Significance = "PuRd")
#' get_my_palette_colors(pal, name = "Diet", levels = c("HFD", "HSD", "Control"))
get_my_palette_colors <- function(pal, name = NULL, levels = NULL, offset = 0) {

  l_palette <- get_my_palette_name(pal, name)

  if (is.null(l_palette)) {
    warning(paste("Cannot find palette for", name))
    return(NULL)
  }

  # If l_palette is already a named vector, then this could be the full colormap

  if (!is.null(names(l_palette))) {

    # Does it match the levels vector?

    if (identical(sort(names(l_palette)), sort(levels))) {

      # Perfectly matching with input entries
      # Just return it

      l_colors = l_palette
    }
  } else {

    # Let's get colormap from colorbrewer

    l_n = length(levels)
    l_colors = brewer.pal(offset + max(l_n, 3), l_palette)
    l_colors = l_colors[(offset+1):(offset+l_n)]
    names(l_colors) = levels
  }

  return(l_colors)
}
TBrach/MicrobiomeX documentation built on May 14, 2019, 2:28 p.m.