#' Clean data for the KOMODO2 workflow
#'
#' This script implements the second step of the LCFD workflow of KOMODO2.
#' It is responsible for dealing with data inconsistencies, including
#' missing values, outliers and undesired characteres, as well as data
#' merging. It also preprocesses data to allow for more flexible inputs form
#' the user, such as automatically converting common annotation output to a
#' single standard format.
#'
#' The script expects enriched `KOMODO2`-type lists, which are generated by
#' [load_data()].
#'
#'
#' @param defs an enriched KOMODO2-type list object (see Details).
#'
#' @return updated \code{defs} list containing information from parsed
#' genome maps (e.g., for test and back genomes if `type == "significance"`)
#'
#' @importFrom assertthat assert_that
#'
#' @export
#'
#' @examples
#' \dontrun{
#' # Build an input list:
#' fpath1 <- system.file("extdata", "gene2GO", package="KOMODO2")
#' fpath2 <- system.file("extdata", "metadata/GO_metadata_Pan_proxy.txt", package="KOMODO2")
#' fpath3 <- system.file("extdata", "trees/tree_genome_IDs.nwk", package="KOMODO2")
#'
#' defs <- list(annotation_files_dir = fpath1,
#' output_dir = "./results/GO_Pan_proxy/",
#' dataset.info = fpath2,
#' x.column = 2,
#' ontology = "GO",
#' dict.path = "",
#' column = "GO",
#' denominator.column = "",
#' tree_path = fpath3,
#' tree_type = "newick",
#' linear_model_cutoff = 0.5,
#' type = "correlation")
#'
#' defs <- load_data(defs, cores = 2)
#' defs <- clean_data(defs)
#' }
clean_data <- function(defs){
# ================== Sanity checks ==================
assert_that(all(c("list", "KOMODO2") %in% class(defs)))
# Perform data preprocessing
cat("\nPreliminary data cleaning:\n")
defs <- switch(tolower(defs$type),
significance = clean_data_significance(defs),
correlation = clean_data_correlation(defs))
if (defs$ontology == "other") {
assert_that(!is.null(defs$dictionary))
# Convert to a named list
defs$dictionary <- unique(defs$dictionary)
defs$temp <- as.list(defs$dictionary[, 2])
names(defs$temp) <- defs$dictionary[, 1]
defs$dictionary <- defs$temp
defs$temp <- NULL
}
return(defs)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.