#' Import taxpasta-specific BIOM results to
#' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}
#'
#' @name importTaxpasta
#'
#' @details
#' \code{importTaxpasta} imports data that is returned from Taxonomic Profile
#' Aggregation and Standardization (taxpasta) pipeline. See more information on
#' taxpasta from
#' \href{https://taxpasta.readthedocs.io/en/latest/}{taxpasta documentation}.
#'
#' @param file \code{Character scalar}. Defines the file path to a
#' BIOM file.
#' @param add.tree \code{Logical scalar}. Specifies whether to calculate
#' and add hierarchy tree using \code{\link{addHierarchyTree}}.
#' (Default: \code{TRUE})
#'
#' @param ... additional arguments
#' \itemize{
#' \item \code{set.ranks}: \code{Logical scalar}. Should column names of
#' taxonomy table be treated as taxonomy ranks? (Default: \code{FALSE})
#' }
#'
#' @return A \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}} object.
#'
#' @examples
#'
#' \dontrun{
#' # File path to BIOM file
#' file_path <- system.file("extdata", "complete.biom", package = "mia")
#' # Import BIOM as TreeSE, and set ranks.
#' tse <- importTaxpasta(file_path, set.ranks = TRUE)
#' # Import BIOM as TreeSE without adding hierarchy tree
#' tse <- importTaxpasta(file_path, add.tree = FALSE)
#' }
#'
#' @seealso
#' \code{\link[=importBIOM]{importBIOM}}
#' \code{\link[=convertFromBIOM]{convertFromBIOM}}
#'
NULL
#'
#' @rdname importTaxpasta
#' @export
#'
#' @importFrom SummarizedExperiment rowData
importTaxpasta <- function(file, add.tree = TRUE, ...) {
# Check dependencies.
.require_package("rhdf5")
.require_package("biomformat")
# Validate the input.
if(!.is_non_empty_string(file) ){
stop("'filename' must be a single non-empty character value.", call. = FALSE)
}
if( !file.exists(file) ){
stop("'", file, "' not found.", call. = FALSE)
}
if (!.is_a_bool(add.tree)) {
stop("'add.tree' must be TRUE or FALSE.", call. = FALSE)
}
# We read our own HDF5 array to later be able to read observation group
# metadata, which [biomformat::read_biom()] currently doesn't do.
raw <- rhdf5::h5read(file, "/", read.attributes = TRUE)
biom <- .create_biom(raw)
# Convert BIOM to TreeSE
tse <- convertFromBIOM(biom)
# If we have taxonomic information, we add a hierarchy to the TreeSE.
if( !is.null(raw$observation$`group-metadata`$ranks) ){
# Get ranks from data
ranks <- .get_ranks(raw)
# Create rowData and rowTree
rowData(tse) <- .create_row_data(biom, ranks)
.set_ranks_based_on_rowdata(tse, ...)
if (add.tree) tse <- addHierarchyTree(tse)
# Agglomerate to all existing ranks
tse <- agglomerateByRanks(tse, update.tree = TRUE)
} else{
# Without taxonomic information, we return a simple TreeSE.
warning(
"The BIOM file does not contain taxonomy information;",
"unable to generate a taxonomic tree.", call. = FALSE)
}
return(tse)
}
#' Create BIOM object from raw HDF5 array
#'
#' \code{.create_biom} is an internal function used by
#' \code{\link[=importTaxpasta]{importTaxpasta}}, that more or less replicates
#' \code{\link[biomformat:read_hdf5_biom]{read_hdf5_biom}}.
#'
#' @param h5array A raw HDF5 array read from a BIOM file.
#'
#' @return A \code{\link[biomformat:biom]{biom}} object.
#'
#' @keywords internal
#' @noRd
#' @importFrom utils getFromNamespace
.create_biom <- function(h5array) {
# Get relevant functions from namespace. Using pkg:::function gives warnings
# in R CMD check. By doing like this, we can avoid them.
generate_matrix <- getFromNamespace("generate_matrix", "biomformat")
generate_metadata <- getFromNamespace("generate_metadata", "biomformat")
namedList <- getFromNamespace("namedList", "biomformat")
# Get abundance data, row data and column data along with shape
data <- generate_matrix(h5array)
rows <- generate_metadata(h5array$observation)
columns <- generate_metadata(h5array$sample)
shape <- c(length(data), length(data[[1]]))
# Get metadata on the data
id <- attr(h5array, "id")
vs <- attr(h5array, "format-version")
format <- sprintf("Biological Observation Matrix %s.%s", vs[1], vs[2])
format_url <- attr(h5array, "format-url")
type <- attr(h5array, "type")
# this bc biomformat does not allow empty 'type' field
type <- ifelse(type == "", "OTU table", type)
generated_by <- attr(h5array, "generated-by")
date <- attr(h5array, "creation-date")
matrix_type <- "dense"
matrix_element_type <- "int"
# Create BIOM object
result <- namedList(
id, format, format_url, type, generated_by, date, matrix_type,
matrix_element_type, rows, columns, shape, data
)
result <- biomformat::biom(result)
return(result)
}
#' Get taxonomic ranks from observation group metadata
#'
#' \code{.get_ranks} is an internal function used by
#' \code{\link[=importTaxpasta]{importTaxpasta}}, that reads the string of
#' semi-colon separated ranks from the group observation metadata.
#'
#' @param h5array A raw HDF5 array read from a BIOM file.
#'
#' @return A character vector of taxonomic ranks in order.
#'
#' @keywords internal
#' @noRd
.get_ranks <- function(h5array) {
ranks <- strsplit(h5array$observation$`group-metadata`$ranks,
";",
fixed = TRUE
)[[1]]
return(ranks)
}
#' Recreate observation metadata
#'
#' \code{.create_row_data} is an internal function used by
#' \code{\link[=importTaxpasta]{importTaxpasta}}, that returns a copy of the
#' BIOM object's observation metadata, where the headers of the taxonomy
#' columns have been replaced with the correct taxonomic rank names.
#'
#' @param biom A BIOM object.
#' @param ranks A character vector of taxonomic ranks in order.
#'
#' @return A \code{\link[base:data.frame]{data.frame}} with observation
#' metadata.
#'
#' @keywords internal
#' @noRd
.create_row_data <- function(biom, ranks) {
# Get taxonomy table
meta <- biomformat::observation_metadata(biom)
# Get column names of taxonomy table
column.names <- colnames(meta)
# Taxonomy ranks should start with "taxonomy", take all of them
indeces <- startsWith(column.names, "taxonomy")
# Check that they match with ranks got from the data
if( sum(indeces) != length(ranks) ){
stop(
"The number of generic taxonomy* columns differs",
"from the number of ranks.", call. = FALSE)
}
# And replace column names with the set of ranks
colnames(meta) <- replace(column.names, indeces, ranks)
return(meta)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.