#'@title Check and correct types of metadata table
#'@description TBD
#'@param df.meta a tibble of the metadata with `SampleID` column
#'@param meta.sub default NULL or a character vector containing a subset of variables
#'@param exceptions default NULL or a character vector containing variables with
#' more than 5 factor levels that should not be typecast as a character
#'@param analysis default NULL or a string triggering specific actions (e.g. 'metacardis')
#'@return a tibble with `meta.sub` columns and all variable types updated
#'@importFrom dplyr select
#'@importFrom purrr imap_dfc map_chr
#'@importFrom stringr str_split
#'@export
correct.metadata <- function(df.meta, meta.sub = NULL,
exceptions = NULL, analysis = NULL) {
if (is.null(meta.sub)) meta.vars <- colnames(df.meta)
else if (!('SampleID' %in% meta.sub)) meta.vars <- c('SampleID', meta.sub)
else meta.vars <- meta.sub #check validity later
out <- df.meta %>%
dplyr::select(dplyr::one_of(meta.vars)) %>%
purrr::imap_dfc(function(.x, .y) {
browser()
if ((!is.null(exceptions)) & (.y %in% exceptions)) as.factor(.x)
else if (is.factor(.x) || is.double(.x)) .x
else if (length(unique(.x)) == 1) as.character(.x)
else if (length(unique(.x)) == 2) {
# yes/no values with or without NAs
if ('yes' %in% tolower(.x)) {
if (any(is.na(.x))) factor(tolower(.x),
levels = c('yes','no', NA),
labels = c(1,0,NA))
else factor(tolower(.x),
levels = c('yes','no'),
labels = c(1,0))}
# binary values with NAs
else if (any(is.na(.x))) factor(.x,
levels = c(unique(.x), NA),
labels = c(seq(length(unique(.x))), NA))
# binary values without NAs
else factor(.x, levels = c(unique(.x)), labels = c(1,2))}
else if (length(unique(.x)) == 3) { #might be binary +NAs
if (NA %in% unique(.x)) {factor(tolower(.x),
levels = c('yes','no',NA), #assumption its yes/no+NA...
labels = c(1,0)) %>%
forcats::fct_explicit_na(na_level = NA)}
else factor(.x, levels = c(unique(.x)), labels = c(seq(length(unique(.x)))))}
else if (length(unique(.x)) < 6) {
if (any(is.na(.x))) forcats::fct_explicit_na(.x, na_level = NA)
else factor(.x,
levels = unique(.x),
labels = seq(length(unique(.x))))}
else as.character(.x)
})
if (analysis == 'metacardis') {
vars.update <- meta.vars %>%
stringr::str_split('_') %>%
purrr::map_chr(~ head(., n=1))
colnames(out) <- vars.update
}
return(out)
}
#'@title Get name and type of metadata variables available in data
#'@description TBD
#'@param df.stats a tibble with feature rows + nested data column
#'@return a character vector with names = variables and content = variable type
#'@importFrom magrittr use_series extract2
get.metadata.vars <- function(df.stats) {
df.stats %>%
magrittr::use_series(data) %>%
magrittr::extract2(1) %>%
ansimo::determine.meta.var.classes()
}
prune.metadata.vars <- function(meta.vars) {
vars <- list()
vars$keep <- meta.vars[which(meta.vars == 'factor')]
vars$discard <- meta.vars[which(meta.vars != 'factor')]
return(vars)
}
determine.meta.var.classes <- function(df.single.feat) {
df.single.feat %>%
dplyr::select(-c('SampleID','abundance')) %>%
purrr::map_chr(~ class(.))
}
extract.metadata.abundances <- function(df.stats, s.var, label) {
#browser()
df.stats %>%
dplyr::select(c('feature','data')) %>%
dplyr::mutate(data = purrr::map(data, ~ dplyr::select(., c('abundance', s.var, label))))
}
#'@title Represent a metadata table as a single string
#'@description TBD
#'@param df.meta a metadata tibble
#'@return a character vector of length 1 (a single string)
#'@importFrom purrr imap
#'@importFrom stringr str_replace_na str_flatten
characterize.metadata <- function(df.meta) {
data <- df.meta %>%
purrr::imap(function(.x, .y) {
vals <- .x %>%
stringr::str_replace_na(replacement = 'NA') %>%
stringr::str_flatten(collapse = '.')
paste0('_', .y, '_', vals)}) %>%
unlist(use.names = FALSE) %>%
stringr::str_flatten()
}
#'@title Reconstruct a metadata table from string representation
#'@description TBD
#'@param meta.str metadata represented as a string with '_' flanking
#' variable/column names and '.' separating individual values
#'@return a tibble
#'@importFrom purrr imap pluck map flatten
#'@importFrom stringr str_split
#'@importFrom magrittr extract set_names
#'@importFrom dplyr bind_cols mutate_if
reconstruct.metadata <- function(meta.str) {
meta.list <- meta.str %>%
stringr::str_split('[_]') %>%
purrr::pluck(1) %>%
magrittr::extract(-1) %>%
as.list()
cols <- meta.list[seq(1, length(meta.list), 2)]
raw.data <- meta.list[seq(2, length(meta.list), 2)]
raw.data %>%
magrittr::set_names(cols) %>%
purrr::map(~ stringr::str_split(., '[.]')) %>%
purrr::flatten() %>%
dplyr::bind_cols() %>%
dplyr::mutate_if(is.character, ~ as.factor(.))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.