#!/usr/bin/Rscript
### SIAMCAT - Statistical Inference of Associations between
### Microbial Communities And host phenoTypes R flavor EMBL
### Heidelberg 2012-2018 GNU GPL 3.0
#' Build siamcat-class objects from their components.
#' @title SIAMCAT constructor function
#' @name siamcat
#' @description Function to construct an object of class \link{siamcat-class}
#' @usage siamcat(..., feat=NULL, label=NULL, meta=NULL,
#' phyloseq=NULL, validate=TRUE, verbose=3)
#' @param ... additional arguments
#' @param feat feature information for SIAMCAT (see details)
#' @param label label information for SIAMCAT (see details)
#' @param meta (optional) metadata information for SIAMCAT (see details)
#' @param phyloseq (optional) a phyloseq object for the creation of an SIAMCAT
#' object (see details)
#' @param validate boolean, should the newly constructed SIAMCAT object be
#' validated? defaults to TRUE (\strong{we strongly recommend against
#' setting this parameter to FALSE})
#' @param verbose control output: \code{0} for no output at all, \code{1}
#' for only information about progress and success, \code{2} for normal
#' level of information and \code{3} for full debug information,
#' defaults to \code{1}
#' @return A new \link{siamcat-class} object
#' @export
#' @details This functions creates a SIAMCAT object (see \link{siamcat-class}).
#' In order to do so, the function needs \itemize{
#' \item \code{feat} the feature information for SIAMCAT, should be either a
#' matrix, a data.frame, or a \link[phyloseq]{otu_table-class}. The columns
#' should correspond to the different samples (e.g. patients) and the rows the
#' different features (e.g. taxa). Columns and rows should be named.
#' \item \code{meta} metadata information for the different samples in the
#' feature matrix. Metadata is optional for the SIAMCAT workflow. Should be
#' either a data.frame (with the rownames corresponding to the sample
#' names of the feature matrix) or an object of class
#' \link[phyloseq]{sample_data-class}
#' \item \code{phyloseq} Alternatively to supplying both feat and meta,
#' SIAMCAT can also work with a phyloseq object containing an otu_table and
#' other optional slots (like sample_data for meta-variables).}
#'
#' Notice: do supply \strong{either} the feature information as
#' matrix/data.frame/otu_table (and optionally metadata) \strong{or} a
#' phyloseq object, but not both.
#'
#' The label information for SIAMCAT can take several forms:\itemize{
#' \item metadata column: if there is metadata (either via meta or as
#' sample_data in the phyloseq object), the label object can be created
#' by taking the information in a specific metadata column. In order to
#' do so, \code{label} should be the name of the column, and \code{case}
#' should indicate which group(s) should be the positive group(s). A
#' typical example could look like that:
#'
#' \code{siamcat <- siamcat(feat=feat.matrix, meta=metadata,
#' label='DiseaseState', case='CRC')}
#'
#' for the construction of a label to predict CRC status (which is encoded
#' in the column \code{"DiseaseState"} of the metadata). For more control
#' (e.g. specific labels for plotting or specific control state), the
#' label can also be created outside of the \code{siamcat} function using
#' the \link{create.label} function.
#' \item named vector: the label can also be supplied as named vector which
#' encodes the label either as characters (e.g. "Healthy" and "Diseased"),
#' as factor, or numerically (e.g. -1 and 1). The vector must be named
#' with the names of samples (corresponding to the samples in features).
#' Also here, the information about the positive group(s) is needed via
#' the \code{case} parameter. Internally, the vector is given to the
#' \link{create.label} function.
#' \item label object: A label object can be created with the
#' \link{create.label} function or by reading a dedicated label file
#' with \link{read.label}.
#' }
#' @examples
#' # example with package data
#' data("feat_crc_zeller", package="SIAMCAT")
#' data("meta_crc_zeller", package="SIAMCAT")
#'
#' siamcat <- siamcat(feat=feat.crc.zeller,
#' meta=meta.crc.zeller,
#' label='Group',
#' case='CRC')
siamcat <- function(..., feat=NULL, label=NULL, meta=NULL, phyloseq=NULL,
validate=TRUE, verbose=3) {
if (is.null(phyloseq) && is.null(feat)){
msg <- paste0('SIAMCAT needs either a feature matrix or a phyloseq',
' object!!! Exiting...')
stop(msg)
}
# optional arguments
other.args <- list(...)
# keep case info if the user wants to create the label from metadata
if ('case' %in% names(other.args)){
case <- other.args$case
} else {
case <- NULL
}
if ('control' %in% names(other.args)){
control <- other.args$control
} else {
control <- NULL
}
# Remove names from arglist. Will replace them based on their class
names(other.args) <- NULL
if (!is.null(other.args)){
# ignore all but component data classes.
component_classes <- get.component.classes("both")
for (argNr in seq_along(other.args)) {
classOfArg <- class(other.args[[argNr]])[1]
if (classOfArg %in% names(component_classes)) {
names(other.args)[argNr] <- component_classes[classOfArg]
}
}
}
# if phyloseq object has been given
if (!is.null(phyloseq)){
if (!is.null(feat)){
msg <- paste0('Both features matrix and phyloseq object provided. ',
'Please provide only one of them!')
stop(msg)
}
if (!is(phyloseq,'phyloseq')){
stop('Please provide an object of class phyloseq for SIAMCAT!')
}
feat <- otu_table(phyloseq)
if (!is.null(sample_data(phyloseq, errorIfNULL=FALSE))) {
meta <- sample_data(phyloseq)
} else {
meta <- NULL
}
# get all other slots from the phyloseq object
for (x in setdiff(get.component.classes('phyloseq'),
c('otu_table', 'sam_data'))){
if (!is.null(access(phyloseq, x))){
other.args[[x]] <- access(phyloseq, x)
}
}
}
# validate features and metadata
feat <- validate.features(feat)
meta <- validate.metadata(meta)
# make Phyloseq object properly
if (any(vapply(names(other.args), is.component.class, "phyloseq",
FUN.VALUE = logical(1)))){
arglistphyloseq <- other.args[vapply(names(other.args),
is.component.class,
"phyloseq", FUN.VALUE = logical(1))]
arglistphyloseq$otu_table <- feat
arglistphyloseq$sam_data <- meta
} else {
arglistphyloseq <- list('otu_table'=feat, 'sam_data'=meta)
}
other.args$phyloseq <- do.call("new", c(list(Class = "phyloseq"),
arglistphyloseq))
# label object
temp <- validate.label(label, feat, meta, case, control, verbose)
label <- temp$label
if (!is.null(temp$meta)){
sample_data(other.args$phyloseq) <- temp$meta
}
other.args$label <- label
# any other slots
other.args <-
other.args[vapply(names(other.args),
is.component.class,
"siamcat",
FUN.VALUE = logical(1))]
sc <- do.call("new", c(list(Class = "siamcat"), other.args))
# validate
if (validate){
sc <- validate.data(sc, verbose=verbose)
} else {
if (verbose > 0){
msg <- paste0('### Not validating the SIAMCAT object!!!\n',
'\tPlease be advised that some functions may not',
' work correctly!')
warning(msg)
}
}
return(sc)
}
# source: https://github.com/joey711/phyloseq/blob/master/R/phyloseq-class.R
#' Show the component objects classes and slot names.
#' @keywords internal
#' @return list of component classes
get.component.classes <- function(class) {
# define classes vector the names of component.classes needs to be the slot
# names to match getSlots / splat
#slot names
component.classes.siamcat <-
c(
"phyloseq",
"label",
"filt_feat",
"associations",
"norm_feat",
"data_split",
"model_list",
"pred_matrix",
"eval_data"
)
#class names
names(component.classes.siamcat) <-
c(
"phyloseq",
"label",
"filt_feat",
"associations",
"norm_feat",
"data_split",
"model_list",
"pred_matrix",
"eval_data"
)
#slot names
component.classes.phyloseq <-
c("otu_table", "sam_data", "phy_tree",
"tax_table", "refseq")
#class names
names(component.classes.phyloseq) <-
c("otu_table",
"sample_data",
"phylo",
"taxonomyTable",
"XStringSet")
if (class == "siamcat") {
return(component.classes.siamcat)
} else if (class == "phyloseq") {
return(component.classes.phyloseq)
} else if (class == "both") {
return(c(
component.classes.siamcat,
component.classes.phyloseq
))
}
}
# Returns TRUE if x is a component class, FALSE otherwise.
#' @keywords internal
is.component.class <- function(x, class) {
x %in% get.component.classes(class)
}
# check and convert feature object to otu_table
#' @keywords internal
validate.features <- function(feat){
# check if NA and stop
if (is.null(feat)){
stop('SIAMCAT needs features!!! Exiting...')
}
# check class of feature input
if (is(feat,'otu_table')){
# can either be an otu_table (only check that taxa_are_rows == TRUE)
if (!taxa_are_rows(feat)){
feat <- otu_table(t(feat@.Data), taxa_are_rows=TRUE)
}
return(feat)
} else if (is(feat, 'matrix')){
# or a matrix (then check if it is numeric or not)
# and convert to otu_table
if (any(!is.numeric(feat))){
msg <- paste0('SIAMCAT expects numerical features!.\n',
'Please check your feature matrix! Exiting...')
stop(msg)
}
if (length(unique(rownames(feat))) != nrow(feat)){
stop("Features need unique identifiers!")
}
feat <- otu_table(feat, taxa_are_rows=TRUE)
return(feat)
} else if (is.data.frame(feat)){
if (is(feat, 'tbl')){
msg <- paste0("Tibbles are not supported. Features need to be a",
" numeric matrix or data.frame with rownames!")
stop(msg)
}
# or a dataframe (then do the same as above)
if (any(!is.numeric(unlist(feat)))){
msg <- paste0('SIAMCAT expects numerical features!.\n',
'Please check your feature data.frame! Exiting...')
stop(msg)
}
if (length(unique(rownames(feat))) != nrow(feat)){
stop("Features need unique identifiers!")
}
feat <- otu_table(feat, taxa_are_rows=TRUE)
return(feat)
}
}
# check label object
#' @keywords internal
validate.label <- function(label, feat, meta, case, control, verbose){
# if NA, return simple label object which contains only one class
if (is.null(label)){
msg <- paste0('No label information given! Generating SIAMCAT object ',
'with placeholder label!\n\tThis SIAMCAT object is not suitable for ',
'the complete workflow...')
warning(msg)
label <- list(label = rep(-1, ncol(feat)),
info=c('TEST'=-1), type="TEST")
names(label$label) <- colnames(feat)
} else if (is.list(label)){
label <- label
} else if (is.character(label) & length(label) == 1){
if(is.null(meta)) stop('Metadata needed to generate label! Exiting...')
temp <- create.label(meta=meta, label=label, case=case, control=control,
verbose=verbose, remove.meta.column=TRUE)
label <- temp$label
meta <- temp$meta
} else if (is.atomic(label)) {
label <- create.label(label=label, case=case, control=control,
verbose=verbose)
} else {
msg <- paste0('Cannot interpret the label object!\nPlease ',
'provide either a label object, a column in your metadata, or a',
'vector to distinguish cases and controls!.')
stop(msg)
}
return(list(label=label, meta=meta))
}
# check meta-data object
#' @keywords internal
validate.metadata <- function(meta){
if (is.null(meta)){
return(NULL)
}
if (is(meta, 'sample_data')){
return(meta)
}
if (is.data.frame(meta)){
if (is(meta, 'tbl')){
msg <- paste0("Tibbles are not supported. Metadata needs to be",
" a dataframe with rownames!")
stop(msg)
}
meta <- sample_data(meta)
return(meta)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.