#' Calculate an association network
#' @description
#' ## Description: The `anansi()` function
#' The main workspider function in the anansi package is called
#' `anansi`. It manages the individual functionalities of anansi, including
#' correlation analysis, correlation by group and differential associations.
#' @param web An `AnansiWeb` object, containing two tables with 'omics data
#' and a dictionary that links them. See `weaveWebFromTables()` for how to
#' weave a web.
#' @param metadata A vector or data.frame of categorical or continuous value
#' necessary for differential correlations. Typically a state or treatment.
#' If no argument provided, anansi will let you know and still to regular
#' correlations according to your dictionary.
#' @param groups A vector of the column names of categorical values in the
#' metadata object to control which groups should be assessed for simple
#' correlations.
#' If no argument provided, anansi will let you know and still to regular
#' correlations according to your dictionary.
#' @param formula A formula object. Used to assess differential associations.
#' @param adjust.method Method to adjust p-values for multiple comparisons.
#' `adjust.method = "BH"` is the default value. See `p.adjust()` in
#' the base R `stats` package.
#' @param verbose `Logical scalar`. Whether to print diagnostic information
#' (Default: `TRUE`).
#' while running. Useful for debugging errors on large datasets.
#' @param return.format `Character scalar`. Should be one of `"table"`,
#' `"list"`, or `"raw"`. Should the output of [anansi()]
#' respectively be a wide `data.frame` of results, a list containing the results
#' and input, or a list of raw output (used for testing purposes).
#' convenient use. (Default: `"table"`)
#' @param ... additional arguments (currently not used).
#' @return A list of lists containing correlation coefficients, p-values and
#' q-values for all operations.
#' @importFrom stats model.frame
#' @export
#' @examples
#' # Load example data:
#'
#' data(FMT_data)
#'
#' # Make sure that columns are features and rows are samples.
#'
#' t1 <- t(FMT_metab)
#' t2 <- t(FMT_KOs)
#'
#' # Run anansi pipeline.
#'
#' web <- weaveWeb(
#' formula = cpd ~ ko,
#' tableY = t1,
#' tableX = t2,
#' link = kegg_link()
#' )
#'
#' anansi_out <- anansi(
#' web = web,
#' formula = ~Legend,
#' groups = "Legend",
#' metadata = FMT_metadata,
#' adjust.method = "BH",
#' verbose = TRUE
#' )
#'
#'
#' library(tidyr)
#'
#' # Use tidyr to wrangle the correlation r-values to a single column
#' anansiLong <- anansi_out |>
#' pivot_longer(starts_with("All") | contains("FMT")) |>
#' separate_wider_delim(
#' name,
#' delim = "_", names = c("cor_group", "param")
#' ) |>
#' pivot_wider(names_from = param, values_from = value)
#'
#' # Only consider interactions where the entire model fits well enough.
#' library(ggplot2)
#' anansiLong <- anansiLong[anansiLong$full_p.values < 0.05, ]
#'
#'
#'
#' ggplot(
#' data = anansiLong,
#' aes(
#' x = r.values,
#' y = feature_X,
#' fill = cor_group,
#' alpha = disjointed_Legend_p.values < 0.05
#' )
#' ) +
#'
#' # Make a vertical dashed red line at x = 0
#' geom_vline(xintercept = 0, linetype = "dashed", colour = "red") +
#'
#' # Points show raw correlation coefficients
#' geom_point(shape = 21, size = 3) +
#'
#' # facet per compound
#' ggforce::facet_col(~feature_Y, space = "free", scales = "free_y") +
#' scale_fill_manual(values = c(
#' "Young yFMT" = "#2166ac",
#' "Aged oFMT" = "#b2182b",
#' "Aged yFMT" = "#ef8a62",
#' "All" = "gray"
#' )) +
#' theme_bw()
#'
#' # Using miaViz style function:
#'
#' p <- plotAnansi(anansi_out,
#' association.type = "disjointed",
#' model.var = "Legend",
#' fill_by = "group",
#' signif.threshold = 0.05,
#' x_lab = "Pearson's rho"
#' )
#' p <- p +
#' scale_fill_manual(values = c(
#' "Young yFMT" = "#2166ac",
#' "Aged oFMT" = "#b2182b",
#' "Aged yFMT" = "#ef8a62",
#' "All" = "gray"
#' )) +
#' theme_bw()
#'
#' p
#'
anansi <- function(
web,
formula,
groups = NULL,
metadata = NULL,
adjust.method = "BH",
verbose = TRUE,
return.format = "table",
...
) {
return.format <-
match.arg(return.format, choices = c("table", "list", "raw"))
# generate anansiYarn input object
input <- prepInput(
web = web,
formula = formula,
groups = groups,
metadata = metadata,
verbose = verbose
)
int.terms <- input$int.terms
groups <- input$groups
group.id <- input$group.id
errorterm <- input$error.term
sat_model <- input$lm.formula
metadata <- input$metadata
out.list <- vector(
"list",
length = 1 + length(group.id) + (2 * length(int.terms))
)
out.list[seq_along(group.id)] <- call_groupwise(
web,
groups,
metadata,
verbose
)
# Sort out metadata formatting for differential association testing
meta.frame <- model.frame(formula = sat_model, cbind(x = 1, metadata))
out.list[
length(group.id) +
seq_len(1L + (2L * length(int.terms)))
] <- anansiDiffCor(
web,
sat_model,
errorterm,
int.terms,
meta.frame,
verbose
)
if (return.format != "raw") {
results <- result.df(out.list, Matrix::as.matrix(web@dictionary))
results <- anansi.p.adjust(results, adjust.method)
attr(results, "group_terms") <-
named_group_list(group.id, groups, metadata)
attr(results, "model_terms") <- named_term_list(int.terms, metadata)
}
switch(
return.format,
"table" = return(results),
"list" = return(list(results, input = input)),
"raw" = return(out.list)
)
}
#' Assess formula, trim metadata and prepare output for anansi workflow
#' @description Initialize `anansiInput` component of output. Should not be
#' called by user.
#' @importFrom S4Vectors as.data.frame.DataFrame
#' @noRd
#'
prepInput <- function(web, formula, groups, metadata, verbose) {
# If no metadata argument try web slot. If list select one named "metadata".
if (is.null(metadata)) metadata <- metadata(web, simplify = FALSE)
if (!is.data.frame(metadata)) metadata <- metadata[["metadata"]]
stopifnot(
"No metadata argument provided or found in AnansiWeb" = prod(dim(
metadata
)) >
0
)
raw_terms <- terms.formula(formula, "Error", data = metadata)
indErr <- attr(raw_terms, "specials")$Error
groups <- check_groups(groups, raw_terms, indErr, metadata, verbose)
all_terms <- if (is.null(indErr)) {
labels(raw_terms)
} else {
labels(raw_terms)[-indErr]
}
sat_model <- make_saturated_model(formula, raw_terms, indErr, verbose)
error.term <- if (is.null(indErr)) {
NULL
} else {
deparse1(
attr(raw_terms, "variables")[[1L + indErr]][[2L]],
backtick = TRUE
)
}
input <- list(
web = web,
lm.formula = sat_model,
error.term = error.term,
int.terms = all_terms,
groups = groups[[1]],
group.id = groups[[2]],
metadata = `row.names<-.data.frame`(metadata, NULL)
)
return(input)
}
#' Check group argument.
#' @noRd
#' @importFrom stats as.formula update.formula
#'
check_groups <- function(groups, raw_terms, indErr, metadata, verbose) {
# If user input, check it
if (!is.null(groups)) {
missing_groups <- !groups %in% colnames(metadata)
stopifnot(
"Grouping variable(s) not recognised. Please check input." = !any(
missing_groups
)
)
group.id <- check_missing_combos(metadata[groups])
return(list(groups, group.id))
}
# If no input, look for categorical variables
ind_o1 <- attr(raw_terms, "order") == 1
if (!is.null(indErr)) {
ind_o1[indErr] <- FALSE
}
if (!any(ind_o1)) {
if (verbose) {
message("No grouping variable found for groupwise correlations.")
}
return(list(NULL, 1))
}
groups <- labels(raw_terms)[ind_o1]
sub_meta <- metadata[, groups, drop = FALSE]
sub_meta <- sub_meta[, vapply(sub_meta, is.categorical, NA), drop = FALSE]
if (NCOL(sub_meta) == 0L) {
if (verbose) {
message("No grouping variable found for groupwise correlations.")
}
return(list(NULL, 1L))
}
group.id <- check_missing_combos(sub_meta)
return(list(groups, group.id))
}
#' Prepare saturated model, deal with `Error` terms.
#' @noRd
#' @importFrom stats as.formula update.formula
#'
make_saturated_model <- function(formula, raw_terms, indErr, verbose) {
# Simple case; No random intercept; make regular saturated model
if (is.null(indErr)) {
sat_model <- update.formula(old = formula, ~ x * 1 * (.))
if (verbose) {
message(
paste0(
"Fitting least-squares for following model:\n",
paste0(as.character(sat_model), " ", collapse = "")
)
)
}
return(sat_model)
}
# Case with repeated measures:
stopifnot(
"Only one Error() term allowed; more detected." = length(indErr) < 2
)
errorterm <- attr(raw_terms, "variables")[[1L + indErr]]
sat_model <- update.formula(
old = formula,
new = as.formula(
paste(
"~",
deparse1(errorterm[[2L]], backtick = TRUE),
"+ x * 1 * (. -",
deparse1(errorterm, backtick = TRUE),
")"
),
env = environment(formula)
)
)
if (verbose) {
message(paste0(
"Fitting least-squares for following model:\n",
paste0(
as.character(update.formula(
old = formula,
new = as.formula(
paste(
"~ x * 1 * (. -",
deparse1(errorterm, backtick = TRUE),
")"
),
env = environment(formula)
)
)),
" ",
collapse = ""
),
"\nwith '",
deparse1(errorterm[[2L]], backtick = TRUE),
"' as random intercept."
))
}
return(sat_model)
}
#' Is character, factor or ordered factor
#' @description wrapper around
#' `is.character(x) || is.factor(x) || is.ordered(x)`
#' @param x an object to be evaluated as being categorical
#' @returns a boolean.
#'
is.categorical <- function(x) is.character(x) || is.factor(x) || is.ordered(x)
#' Return levels or 'numeric'
#' @noRd
#'
lvs_or_num <- function(x) {
if (is.categorical(x)) {
return(unique(as.character(x)))
} # else
return("numeric")
}
#' Provide name and levels for categories for group terms
#' @description convenience function to generate group terms output
#' @param g character vector of all 'All' followed by all unique terms
#' @param t character vector of term(s)
#' @param m data.frame of provided metadata
#' @returns a named list of character vectors, where names are terms and
#' containing vectors are unique levels or 'numeric' if not categorical. First
#' Entry is named 'All' and contains all levels.
#'
named_group_list <- function(g, t, m) c(list(All = g), lapply(m[t], lvs_or_num))
#' Provide name and levels for categories
#' @description convenience function to generate output
#' @param t character vector of term(s)
#' @param m data.frame of provided metadata
#' @returns a named list of character vectors, where names are terms and
#' containing vectors are unique levels or 'numeric' if not categorical.
#'
named_term_list <- function(t, m) {
order_one <- t %in% colnames(m)
f_order <- t[order_one]
h_order <- t[!order_one]
f_list <- lapply(m[, f_order, drop = FALSE], lvs_or_num)
h_list <- NULL
if (length(h_order) != 0) {
m_num <- !unlist(lapply(m, is.categorical))
m[m_num] <- "numeric"
h_terms <- strsplit(h_order, split = ":", fixed = TRUE)
h_list <- lapply(
h_terms,
function(x) unique(do.call(paste, c(m[x], sep = "_")))
)
names(h_list) <- h_order
}
return(c(f_list, h_list))
}
#' Check whether any combination of categories in group argument is missing
#'
#' @noRd
#'
check_missing_combos <- function(metadata) {
group.factor <- interaction(metadata, sep = "_")
empir.factor <- factor(group.factor)
if ("All" %in% levels(group.factor)) {
stop("'All' cannot be present in columns of 'group' argument.")
}
if (nlevels(group.factor) > nlevels(empir.factor)) {
missing_lv <- paste(
setdiff(
levels(group.factor),
levels(empir.factor)
),
collapse = ", "
)
warning(
"Missing combinations of categorical variables: ",
missing_lv,
"\n",
"NAs introduced; Estimated effects may be unbalanced.",
call. = FALSE
)
}
group.id <- c("All", levels(empir.factor))
return(group.id)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.