R/t4f.R
In themetagenomics: Exploring Thematic Structure and Predicted Functionality of 16s rRNA Amplicon Data

Documented in t4f

#' @importFrom Matrix t colSums rowSums
NULL

#' Predict taxonomic functional content via tax4fun
#'
#' Given a taxonomic abundance table prepared with the Silva reference database,
#' predicts the functional content using a KO precalculated mapping
#' table that maps the taxonomic abundance for a given tax_table to functional
#' abundance content across a set of functional genes.
#'
#' @param otu_table (required) Matrix or dataframe containing taxa abundances
#'   (counts, non-negative integers) across samples. Rows and columns must be
#'   uniquely named.
#' @param rows_are_taxa (required) Logical flag indicating whether otu_table
#'   rows correspond to taxa (TRUE) or samples (FALSE).
#' @param tax_table Matrix or dataframe containing Silva taxonimic information
#'   with row or column names corresponding to the otu_table. Silva species
#'   information is required.
#' @param reference_path Folder path of the silva-to-kegg mapping file
#' (t4f_silva_to_kegg.rds) and reference profiles (t4f_ref_profiles.rds). Must
#' not be renamed.
#' @param type Type of protein domain classification methods used to generate
#'   references (uproc or pauda). Defaults to uproc.
#' @param short Logical flag whether to use a short or long read
#'   references. Defaults to TRUE.
#' @param cn_normalize Logical flag for performing 16S rRNA copy number
#'   normalization. Defaults to FALSE.
#' @param sample_normalize Logical flag to normalize functional
#'   predictions by the total functional abundance in a sample. Defaults to FALSE.
#' @param scalar Value for scaling the topics over functions distrubution
#'   to predicted counts.
#' @param drop Logical flag to drop empty gene columns after prediction. Defaults to TRUE.
#' @param verbose Logical flag to print progress information. Defaults to FALSE.
#'
#' @return A list containing
#' \describe{
#' \item{fxn_table}{A matrix of gene counts across topics.}
#' \item{fxn_meta}{A list of functional metadata corresponding to fxn_table.}
#' \item{method_meta}{A matrix of method specific metadata (FTU).}
#' }
#'
#' @references
#' ABhauer, K. P., Wemheuer, B. Daniel, R., and Meinicke, P. (2015).
#' Bioinformatics, 1-3. 31(17).
#'
#' @seealso \code{\link{download_ref}} \code{\link{picrust}}
#'
#' @examples
#' \dontrun{
#' download_ref(destination='/references',reference='silva_ko')
#' predicted_functions <- t4f(otu_table=DAVID$OTU,rows_are_taxa=FALSE,
#'                            tax_table=DAVID$TAX,reference='/references',
#'                            type='uproc',short=TRUE,cn_normalize=TRUE,
#'                            sample_normalize=FALSE,scalar=NULL,drop=TRUE)
#'                            }
#'
#' @export

t4f <- function(otu_table,rows_are_taxa,tax_table,reference_path,type=c('uproc','pauda'),
                short=TRUE,cn_normalize=FALSE,sample_normalize=FALSE,scalar,
                drop=TRUE,verbose=FALSE){

  if (any(is.na(otu_table)))
    stop('Please remove NA values from abundance table.')

  type <- match.arg(type)

  ref_profile_path <- file.path(reference_path,'t4f_ref_profiles.rds',fsep=platform_sep())
  map_path <- file.path(reference_path,'t4f_silva_to_kegg.rds',fsep=platform_sep())

  if (rows_are_taxa == TRUE) otu_table <- t(otu_table)

  if (missing(tax_table)){
    taxa_ids <- colnames(otu_table)
  }else{
    tax_table <- tax_table[colnames(otu_table),]
    if (all(grepl('^[a-z]__',tax_table))) tax_table <- gsub('^[a-z]__','',tax_table)
    taxa_ids <- apply(tax_table,1,function(l){paste0(paste0(l[!is.na(l) & l != ''],collapse=';'),';')})
  }
  colnames(otu_table) <- taxa_ids

  if (short) size <- 'short' else size <- 'long'

  silva_to_kegg <- readRDS(map_path)
  copy_numbers <- readRDS(system.file('references/copy_numbers.rds',package='themetagenomics'))
  kegg_lookup <- readRDS(system.file('references/kegg_lookup_table.rds',package='themetagenomics'))
  ref_profile <- readRDS(ref_profile_path)[[type]][[size]]

  silva_ids <- rownames(silva_to_kegg)

  overlap_taxa <- intersect(silva_ids,taxa_ids)

  sample_sums <- rowSums(otu_table)
  otu_table <- otu_table[,overlap_taxa]
  sample_sums_mapped <- rowSums(otu_table)
  silva_to_kegg <- silva_to_kegg[overlap_taxa,]

  tax_to_kegg <- t(silva_to_kegg) %*% t(otu_table)
  if (cn_normalize) tax_to_kegg <- tax_to_kegg/copy_numbers
  if (sample_normalize){
    empty_samps <- colSums(tax_to_kegg) == 0
    if (sum(empty_samps) > 0){
      if (verbose) cat(sprintf('Empty mappings detected, dropping %s samples.\n',sum(empty_samps)))
      tax_to_kegg <- tax_to_kegg[,!empty_samps]
    }
    tax_to_kegg <- t(as.matrix(t(tax_to_kegg))/colSums(tax_to_kegg))
  }
  fxn_table <- t(ref_profile %*% tax_to_kegg)

  desc_tmp <- unique(kegg_lookup[,c(1,5)])
  desc_tmp <- desc_tmp[desc_tmp[,1] %in% colnames(fxn_table),]
  desc <- desc_tmp[,2]
  names(desc) <- desc_tmp[,1]

  if (drop){
    if (verbose) cat('Dropping post-prediction empty rows and columns.\n')
    fxn_table <- fxn_table[,names(desc)]
    fxn_table <- fxn_table[,colSums(fxn_table) > 0]
  }
  t4f_meta <- cbind(NULL,ftu=1-sample_sums_mapped/sample_sums)

  # scale to counts
  if (!missing(scalar)) {
    fxn_table <- round(fxn_table*scalar/max(fxn_table))
    if (drop) fxn_table <- fxn_table[,colSums(fxn_table) > 0]
  }

  pw_tmp <- kegg_lookup[kegg_lookup[,1] %in% colnames(fxn_table),-5]
  pw_kos <- unique(pw_tmp[,1])
  pw <- vector(mode='list',length=length(pw_kos))
  names(pw) <- pw_kos
  for (i in seq_len(nrow(pw_tmp))){
    ko <- pw_tmp[i,1]
    pw[[ko]] <- c(pw[[ko]],list(tolower(pw_tmp[i,2:4])))
  }

  fxn_meta <- list(KEGG_Description=desc,KEGG_Pathways=pw)

  return(list(fxn_table=as.matrix(fxn_table),fxn_meta=fxn_meta,method_meta=t4f_meta))
}

Any scripts or data that you put into this service are public.

themetagenomics documentation built on May 1, 2020, 1:06 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

themetagenomics
Exploring Thematic Structure and Predicted Functionality of 16s rRNA Amplicon Data

R/t4f.R
In themetagenomics: Exploring Thematic Structure and Predicted Functionality of 16s rRNA Amplicon Data

Defines functions t4f

Documented in t4f

Try the themetagenomics package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

themetagenomics Exploring Thematic Structure and Predicted Functionality of 16s rRNA Amplicon Data

R/t4f.R In themetagenomics: Exploring Thematic Structure and Predicted Functionality of 16s rRNA Amplicon Data

Defines functions t4f

Documented in t4f

Try the themetagenomics package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

themetagenomics
Exploring Thematic Structure and Predicted Functionality of 16s rRNA Amplicon Data

R/t4f.R
In themetagenomics: Exploring Thematic Structure and Predicted Functionality of 16s rRNA Amplicon Data