#' @title Get Taxmap Object
#' @description A function that handles the conversion of objects to Taxmap (\strong{taxa::taxmap}) objects.
#' @param obj An object that contains the data being analyzed. Can be one of the following:
#' \describe{
#' \item{Phyloseq Object}{An object generated from the phyloseq package.}
#' \item{Taxmap Object}{An object generated from the metacoder or taxa package.}
#' \item{RData file}{An RData file generated from the base::save function. Can have an extension of .RData or .rda.}
#' }
#' @return The output generated is a \strong{taxa::taxmap} object.
#' @pretty_print TRUE
#' @details This function is used to convert data to metacoder/taxmap objects for \emph{microbiome} analysis.
#' This function is used at the beginning of every other function to support multiple types of input
#' for the obj parameter in those functions.
#' @examples
#' \dontrun{
#' if(interactive()){
#' library(MicrobiomeR)
#' phy_obj <- MicrobiomeR::phyloseq_silva_2
#' mc_obj <- create_taxmap(phy_obj)
#' }
#' }
#' @export
#' @family Data Importers
#' @rdname create_taxmap
#' @seealso
#' \code{\link[metacoder]{parse_phyloseq}}
#'
#' \code{\link[tools]{fileutils}}
#' @importFrom metacoder parse_phyloseq
#' @importFrom tools file_ext
#' @importFrom crayon red
create_taxmap <- function(obj) {
if (is.null(obj)) {
stop(crayon::red("Please use a metacoder/phyloseq object or an rdata file."))
} else {
if (inherits(obj, "phyloseq")) {
metacoder_object <- metacoder::parse_phyloseq(obj)
} else if (inherits(obj, "Taxmap")) {
metacoder_object <- obj
} else if (file.exists(obj)) {
if (tools::file_ext(obj) %in% c("RData", ".rda")) {
load(file = obj)
if (!"metacoder_object" %in% ls()) {
stop(crayon::red("Please provide a loadable .RData/.rda file that contains an object called \"metacoder_object\"."))
}
}
}
}
return(metacoder_object)
}
#' @title Get Treatment Matrix
#' @description A function that returns a matrix with used for comparing treatment data.
#' @param obj An object to be converted to a taxmap object with \code{\link[MicrobiomeR]{create_taxmap}}.
#' @return A matrix with each column representing a comparison to be made.
#' @pretty_print TRUE
#' @details Use this when you want to do pairwise comparisons.
#' @examples
#' \dontrun{
#' if(interactive()){
#' #EXAMPLE1
#' }
#' }
#' @export
#' @family Formatting
#' @rdname treatment_matrix
#' @importFrom utils combn
treatment_matrix <- function(obj) {
# Get treatment data
treat_1 <- as.character(obj$data$stats_tax_data$treatment_1)
treat_2 <- as.character(obj$data$stats_tax_data$treatment_2)
treatments <- unique(c(treat_1, treat_2))
combinations <- t(utils::combn(seq_along(treatments), 2))
combinations <- sapply(seq_len(nrow(combinations)),
function(index) {
set.seed(1)
treat_a <- treatments[combinations[index, 1]]
treat_b <- treatments[combinations[index, 2]]
c(treat_a, treat_b)
})
return(combinations)
}
#' @title Create an Output Directory
#' @description A function for generating a consistent file system for data files and visualizations. If none
#' of the parameters are set the full path generated by this function will be \emph{root_path}/output
#' @param start_path With the start_path set, the full path generated by this function will be
#' \emph{root_path}/\emph{start_path}/\emph{?experiment?}/\emph{?plot_type?}/\emph{end_path}. Default: NULL
#' @param experiment (optional) This will add a second level to the start_path file system. Default: NULL
#' @param plot_type (optional) This will add a third level to the start_path file system. With \strong{ONLY}
#' the plot_type set the full path generated by this function will be \emph{root_path}/\emph{plot_type}. Default: NULL
#' @param end_path (optional) This will add the final directory to the start_path file system Default: A formatted date-time string.
#' @param root_path The root of the new file system (start_path or not). Default: The working directory.
#' @param custom_path This is an absolute path that overrides everything else. Default: NULL
#' @param overwrite A logical denoting that overwriting is acceptable. Default: FALSE
#' @param mkdir A logical denoting weather or not the directory should be created or not. Default: TRUE
#' @return Creates a directory for output and returns the path as a string.
#' @pretty_print TRUE
#' @details This function is incredibly useful on it's own but also for various other plotting/saving functions within the package.
#' It helps keep data organized using a standard workflow.
#' @examples
#' \dontrun{
#' if(interactive()){
#' # Get the path to an output directory without creating
#' library(MicrobiomeR)
#' output_dir <- output_dir(start_path="output", experiment="microbiome-proj", mkdir=FALSE)
#' print(output_dir)
#'
#' # Create a folder for your plot types
#' output_dir <- output_dir(plot_type="scatter-plots")
#' print(output_dir)
#'
#' }
#' }
#' @export
#' @family Project Management
#' @rdname output_dir
#' @seealso
#' \code{\link[glue]{glue}}
#'
#' \code{\link[stringr]{case}}
#' @importFrom glue glue
#' @importFrom stringr str_to_lower
#' @importFrom crayon yellow blue red green
output_dir <- function(start_path=NULL, experiment=NULL, plot_type=NULL, end_path=NULL, root_path=NULL,
custom_path = NULL, overwrite=FALSE, mkdir=TRUE) {
# Create the relative path to the plots. By default the full_path will be <root_path>/output
# With ONLY the plot_type set the full_path will be <root_path>/<plot_type>
# With the start_path set the full path will be <root_path>/<start_path>/?experiment?/?plot_type?/<end_path>.
# Experiment and plot_type are optional. End_path is optional, but defaults to formatted data-time string.
if (is.null(custom_path)) {
# Get the root path (generally this already exists; like the working directory)
if (is.null(root_path)) {
full_path <- file.path(getwd())
} else {
full_path <- file.path(root_path)
}
# Add the output folder to use (generally a new folder, or pre-existing output folder)
if (is.character(start_path)) {
full_path <- file.path(full_path, start_path)
# Add a folder for the specific experiment you are conducting
if (is.character(experiment)) {
full_path <- file.path(full_path, experiment)
}
# Add a folder for the plot_type that's being generated
if (is.character(plot_type)) {
full_path <- file.path(full_path, plot_type)
}
# Add a folder for any extra
if (is.null(end_path)) {
full_path <- file.path(full_path, format(Sys.time(), "%Y-%m-%d_%s"))
} else if (is.character(end_path)) {
full_path <- file.path(full_path, end_path)
}
} else if (is.null(end_path)) {
# Add a folder for the plot_type that's being generated
if (is.character(plot_type)) {
full_path <- file.path(full_path, plot_type)
} else {
full_path <- file.path(full_path, "output")
}
}
} else {
full_path <- custom_path
}
# Directory creation.
if (mkdir == TRUE) {
answer_flag <- FALSE
if (dir.exists(full_path) && overwrite == FALSE) {
warning(glue::glue("The directory {full_path} already exists. And you don't want to overwrite the directory."))
} else if (dir.exists(full_path) && overwrite == TRUE) {
warning(glue::glue(crayon::yellow("You have chosen to overwrite the directory: {full_path}.")))
while (answer_flag == FALSE) {
answer <- readline(prompt = crayon::blue("Are you sure? (Y/N)"))
if (stringr::str_to_lower(answer) == "y") {
dir.create(full_path, recursive = TRUE)
answer_flag <- TRUE
} else if (stringr::str_to_lower(answer) == "n") {
warning(glue::glue(crayon::red("Please set overwrite to FALSE and change the path ({full_path}) with experiment and/or other_path.")))
stop(crayon::red("The files haven't been saved. You have chosen not to overwrite you files."))
} else {
message(crayon::yellow("Please enter Y for YES or N for NO. This is not case sensitive."))
answer_flag <- FALSE
}
}
} else if (!dir.exists(full_path)) {
message(glue::glue(crayon::green("Creating a new directory: {full_path}")))
dir.create(full_path, recursive = TRUE)
}
}
return(full_path)
}
#' @title Transposing Tidy Data
#' @description This function transposes tables containing numeric and categorical data using the
#' tidyr package.
#' @param .data A matrix/data_frame/tibble for transposing.
#' @param ids The column to transpose by. Default: The first column.
#' @param header_name A name for the numeric data that will be transposed.
#' @param preserved_categories A logical denoting weather categorical data should be conserved. A
#' value of FALSE will cause all categorical data except the \emph{ids} to be dropped. A value of
#' TRUE will cause the categorical data to preserved by \emph{tidyr::unite}ing these columns. Default: TRUE
#' @param separated_categories A vector containing ordered column names to use in a previously transposed
#' and categorically preserved table. Retransposing with this set should yield an exact replicate of
#' the original data. Default: NULL
#' @return A transposed data table as a tibble.
#' @pretty_print TRUE
#' @details Transposing can help with preforming operations on the rows of your tibbles.
#' @examples
#' \dontrun{
#' if(interactive()){
#' # This example uses data that are no longer available in the MicrobiomeR package,
#' # however, they can be easily generated with \code{\link{MicrobiomeR}{as_basic_format}}.
#' library(MicrobiomeR)
#' basic_silva <- as_MicrobiomeR_format(MicrobiomeR::raw_silva_2, "basic_format")
#' data <- basic_silva$data$taxa_abundance
#' trans_data <- data %>%
#' transposer(ids = "taxon_id", header_name = "samples")
#' retrans_data <- trans_data %>%
#' transposer(ids="samples", header_name="taxon_id")
#' }
#' }
#' @export
#' @family Data Manipulators
#' @rdname transposer
#' @seealso
#' \code{\link[tibble]{is_tibble}}
#'
#' \code{\link[dplyr]{select_all}}, \code{\link[dplyr]{select}}, \code{\link[dplyr]{reexports}}, \code{\link[dplyr:tidyeval]{sym}}, \code{\link[dplyr:reexports]{one_of}}
#'
#' \code{\link[tidyr]{gather}}, \code{\link[tidyr]{unite}}, \code{\link[tidyr]{spread}}, \code{\link[tidyr]{separate}}
#'
#' \code{\link[stringr]{str_detect}}, \code{\link[stringr]{str_count}}
#' @importFrom tibble is.tibble
#' @importFrom dplyr select_if select as_tibble sym one_of
#' @importFrom tidyr gather unite spread separate
#' @importFrom stringr str_detect str_count
#' @importFrom crayon red yellow silver
transposer <- function(.data, ids = NULL, header_name, preserved_categories = TRUE, separated_categories = NULL) {
# Verify format
if (!(is.matrix(.data) | is.data.frame(.data) | tibble::is.tibble(.data))) {
stop(crayon::red("Data not transposable."))
}
input <- .data
# Get ids if none are given, defaults to the first column
if (is.null(ids)) {
ids <- input[1] %>% colnames()
message(crayon::yellow(sprintf("There were no ids given. Defaulting to the first column: %s", ids)))
}
# Get numeric data (columns)
num_cols <- input %>% dplyr::select_if(is.numeric) %>% purrr::keep(!names(.) %in% ids) %>% colnames()
# Transform
if (preserved_categories == TRUE) { # All categorical data is preserved
message(crayon::yellow("Categorical data will be united as a string, which can be tidyr::separated after re-transposing."))
preserved_categories <- input %>% dplyr::select(-dplyr::one_of(c(num_cols))) %>% colnames()
trans_data <- input %>%
dplyr::as_tibble() %>%
tidyr::gather(key = !!dplyr::sym(header_name), "_data", -c(preserved_categories)) %>% # Gather columns other that aren't preserved
tidyr::unite("_categories", c(preserved_categories), sep = "<_>") %>% # Preserve the categorical data in 1 column
tidyr::spread("_categories", "_data") # Spread categorical data over the numerical data
} else if (preserved_categories == FALSE) { # Only the ids are preserved
trans_data <- input %>% dplyr::select(c(ids, num_cols)) %>%
dplyr::as_tibble() %>%
tidyr::gather(key = !!dplyr::sym(header_name), "_data", -c(ids)) %>%
tidyr::spread(!!dplyr::sym(ids), "_data")
}
# Look for previously transformed tibbles and separate any united colums
if (all(suppressWarnings({stringr::str_detect(trans_data[header_name], "\\<\\_\\>")}))) {
if (!is.null(separated_categories)) {
trans_data <- trans_data %>% tidyr::separate(col = header_name, into = separated_categories, sep = "<_>")
} else {
message(crayon::yellow("Separated categories have not been supplied. Columns will be named as \"category_#\"."))
n_cats <- stringr::str_count(trans_data[[header_name]][1], pattern = "<_>") + 1
trans_data <- trans_data %>% tidyr::separate(col = header_name, into = paste("category", seq(1:n_cats), sep = "_"), sep = "<_>")
}
message(crayon::silver("Re-Transposed Data."))
return(trans_data)
} else {
message(crayon::silver("Transposed Data."))
return(trans_data)
}
}
#' @title Transforming Tidy Data
#' @description This function transforms a table by rows or by columns.
#' @param .data A matrix/data_frame/tibble for transforming.
#' @param func A function, which can be anonymous, that will be used to transform the data. (e.g. proportions:
#' \emph{x/sum(x)}).
#' @param by This denotes how the data should be transformed (\strong{column/row}). Default: 'column'
#' @param ids The column to transpose by if \emph{by = 'row'}. Default: The first column.
#' @param header_name A name for the numeric data that will be transposed if \emph{by = 'row'}.
#' @param preserved_categories A logical denoting weather categorical data should be conserved if \emph{by = 'row'}. A
#' value of FALSE will cause all categorical data except the \emph{ids} to be dropped. A value of
#' TRUE will cause the categorical data to preserved by \emph{tidyr::unite}ing these columns. Default: TRUE
#' @param separated_categories A vector containing ordered column names to use in a previously transposed
#' and categorically preserved table if \emph{by = 'row'}. Retransposing with this set should yield an exact replicate of
#' the original data. Default: NULL
#' @param ... Additional arguments passed on to \emph{func}.
#' @return A tibble that has been transformed.
#' @details This function transforms the supplied data using the \emph{func} parameter, which is used
#' in the purrr package. The purr package allows the use of anonymous functions as described in the link below:
#'
#' \url{https://jennybc.github.io/purrr-tutorial/ls03_map-function-syntax.html#anonymous_function,_formula}
#' @pretty_print TRUE
#' @examples
#' \dontrun{
#' if(interactive()){
#' # This example uses data that are no longer available in the MicrobiomeR package,
#' # however, they can be easily generated with \code{\link{MicrobiomeR}{as_basic_format}}.
#' library(MicrobiomeR)
#' basic_silva <- as_MicrobiomeR_format(MicrobiomeR::raw_silva_2, "basic_format")
#' data <- basic_silva$data$taxa_abundance
#' # Get proportions using the anonymous functions
#' tax_props <- data %>% transformer(~./sum(.))
#' # Get proportions using explicit functions
#' alt_tax_props <- data %>% transformer(function(x)x/sum(x))
#' }
#' }
#' @export
#' @family Data Manipulators
#' @rdname transformer
#' @seealso
#' \code{\link[MicrobiomeR]{transposer}}
#' \code{\link[dplyr]{select_all}}, \code{\link[dplyr]{select}}
#' \code{\link[purrr]{modify}}
#' @importFrom dplyr select_if select
#' @importFrom purrr modify_at
#' @importFrom crayon red silver
transformer <- function(.data, func, by = "column", ids = NULL, header_name = NULL, preserved_categories = TRUE, separated_categories = NULL, ...) {
if (!(is.matrix(.data) | is.data.frame(.data) | tibble::is.tibble(.data))) {
stop(crayon::red("Data not transformable."))
}
input <- .data
if (by == "row") {
# Transpose table and unite all categorical data into one column for row based transformations
input <- input %>% transposer(ids = ids, header_name = header_name, preserved_categories = preserved_categories)
}
# Get numeric column names
num_cols <- input %>% dplyr::select_if(is.numeric) %>% purrr::keep(!names(.) %in% ids) %>% colnames()
# Get all other columsn as preserved columns
preserved_categories <- input %>% dplyr::select(-dplyr::one_of(num_cols)) %>% colnames()
# Transform data
trans_data <- purrr::modify_at(input, num_cols, func, ...)
if (by == "row") {
# Retranspose the table and separate the categorical data for row based transformations
trans_data <- trans_data %>% transposer(ids = header_name, header_name = ids, separated_categories = separated_categories, preserved_categories = FALSE)
}
message(crayon::silver("Transformed Data."))
return(trans_data)
}
#' @title Mock Excel "VlookUp" Function
#' @description A function that mimics excels vlookup, but for R's dataframe.
#' @param lookup_vector A vector of items to look up.
#' @param df The dataframe to search.
#' @param match_var The column name to search in the dataframe.
#' @param return_var The column of data to return when matched.
#' @return A vector that contains the items of interest.
#' @pretty_print TRUE
#' @details A function that works like the VLOOKUP function in excel. This function was
#' borrowed from \url{https://www.r-bloggers.com/an-r-vlookup-not-so-silly-idea/}.
#' @examples
#' \dontrun{
#' if(interactive()){
#' #EXAMPLE1
#' }
#' }
#' @export
#' @family Data Manipulators
#' @rdname vlookup
vlookup <- function(lookup_vector, df, match_var, return_var) {
# TODO: Update the way this returns data. Allow it to add data to a new column.
m <- match(lookup_vector, df[[match_var]])
df[[return_var]][m]
}
#' @title Create Publication Table
#'
#' @description Creates an image of a table from a dataframe.
#'
#' @param dataframe A dataframe object.
#' @return The text table as an object that can be saved to png/tiff/jpg/pdf.
#' @export
#' @examples
#' \dontrun{
#' if(interactive()){
#' # This example uses data that are no longer available in the MicrobiomeR package,
#' # however, they can be easily generated with \code{\link{MicrobiomeR}{as_basic_format}}.
#' library(MicrobiomeR)
#' data <- MicrobiomeR::basic_silva$data$taxa_abundance
#' create_pub_table(data)
#' }
#' }
#' @rdname create_pub_table
#'
#' @importFrom ggpubr ggtexttable ttheme colnames_style tbody_style
create_pub_table <- function(dataframe) {
ggpubr::ggtexttable(dataframe,
theme = ggpubr::ttheme(
colnames.style = ggpubr::colnames_style(color = "black", fill = "white", linewidth = 0, linecolor = "white", face = "bold"),
tbody.style = ggpubr::tbody_style(
color = "black", face = "plain", size = 12,
fill = "white", linewidth = 0, linecolor = "white"
)
)
)
}
####################### Private Package Variables #######################
pkg.private <- new.env(parent = emptyenv())
pkg.private$format_level_list <- list(unknown_format = -1, mixed_format = -1,phyloseq_format = 0,
raw_format = 1, basic_format = 2, analyzed_format = 3)
pkg.private$format_table_list <- list(
raw_format = c("otu_abundance", "otu_annotations"),
basic_format = c("otu_abundance", "otu_annotations",
"taxa_abundance", "otu_proportions", "taxa_proportions"),
analyzed_format = c("otu_abundance", "otu_annotations",
"taxa_abundance", "otu_proportions", "taxa_proportions",
"statistical_data", "stats_tax_data"),
phyloseq_format = c("otu_table", "tax_data", "sample_data", "phy_tree"),
all_formats = c("otu_abundance", "otu_annotations",
"taxa_abundance", "otu_proportions", "taxa_proportions",
"statistical_data", "stats_tax_data",
"otu_table", "tax_data", "sample_data", "phy_tree"),
expected_table_order = list(otu_abundance = "otu_abundance",
otu_annotations = "otu_annotations",
otu_proportions = "otu_proportions",
sample_data = "sample_data",
phy_tree = "phy_tree",
taxa_abundance = "taxa_abundance",
taxa_proportions = "taxa_proportions",
statistical_data = "statistical_data",
stats_tax_data = "stats_tax_data"),
otu_tables = c("otu_abundance", "otu_annotations", "otu_proportions"),
taxa_tables = c("taxa_abundance", "taxa_proportions",
"statistical_data", "stats_tax_data")
)
pkg.private$ranks <- list(c("Kingdom"),
c("Phylum"),
c("Class"),
c("Order"),
c("Family"),
c("Genus"),
c("Species"))
pkg.private$rank_index <- list(Kingdom = 1,
Phylum = 2,
Class = 3,
Order = 4,
Family = 5,
Genus = 6,
Species = 7)
pkg.private$mc_df_rank_list <- list(Kingdom = 4,
Phylum = 5,
Class = 6,
Order = 7,
Family = 8,
Genus = 9,
Species = 10)
pkg.private$input_files = list(
biom_files = list(
silva = system.file("extdata", "silva_OTU.biom", package = "MicrobiomeR"),
greengenes = system.file("extdata", "greengenes_OTU.biom", package = "MicrobiomeR")),
tree_files = list(
silva = system.file("extdata", "silva.tre", package = "MicrobiomeR"),
greengenes = system.file("extdata", "greengenes.tre", package = "MicrobiomeR")),
metadata = list(
two_groups = system.file("extdata", "nephele_metadata2.txt", package = "MicrobiomeR"),
three_groups = system.file("extdata", "nephele_metadata3.txt", package = "MicrobiomeR"))
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.