anansi: Annotation-Based Analysis of Specific Interactions

Documented in anansi

#' Calculate an association network
#' @description
#' ## Description: The `anansi()` function
#' The main workspider function in the anansi package is called
#' `anansi`. It manages the individual functionalities of anansi, including
#' correlation analysis, correlation by group and differential associations.
#' @param web An `AnansiWeb` object, containing two tables with 'omics data
#' and a dictionary that links them. See `weaveWebFromTables()` for how to
#' weave a web.
#' @param metadata A vector or data.frame of categorical or continuous value
#' necessary for differential correlations. Typically a state or treatment.
#' If no argument provided, anansi will let you know and still to regular
#' correlations according to your dictionary.
#' @param groups A vector of the column names of categorical values in the
#' metadata object to control which groups should be assessed for simple
#' correlations.
#' If no argument provided, anansi will let you know and still to regular
#' correlations according to your dictionary.
#' @param formula A formula object. Used to assess differential associations.
#' @param adjust.method Method to adjust p-values for multiple comparisons.
#' `adjust.method = "BH"` is the default value. See `p.adjust()` in
#' the base R `stats` package.
#' @param verbose `Logical scalar`. Whether to print diagnostic information
#'     (Default: `TRUE`).
#' while running. Useful for debugging errors on large datasets.
#' @param return.format `Character scalar`. Should be one of `"table"`,
#'     `"list"`, or `"raw"`. Should the output of [anansi()]
#' respectively be a wide `data.frame` of results, a list containing the results
#' and input, or a list of raw output (used for testing purposes).
#' convenient use. (Default: `"table"`)
#' @param ... additional arguments (currently not used).
#' @return A list of lists containing correlation coefficients, p-values and
#'     q-values for all operations.
#' @importFrom stats model.frame
#' @export
#' @examples
#' # Load example data:
#'
#' data(FMT_data)
#'
#' # Make sure that columns are features and rows are samples.
#'
#' t1 <- t(FMT_metab)
#' t2 <- t(FMT_KOs)
#'
#' # Run anansi pipeline.
#'
#' web <- weaveWeb(
#'     formula = cpd ~ ko,
#'     tableY = t1,
#'     tableX = t2,
#'     link = kegg_link()
#' )
#'
#' anansi_out <- anansi(
#'     web = web,
#'     formula = ~Legend,
#'     groups = "Legend",
#'     metadata = FMT_metadata,
#'     adjust.method = "BH",
#'     verbose = TRUE
#' )
#'
#'
#' library(tidyr)
#'
#' # Use tidyr to wrangle the correlation r-values to a single column
#' anansiLong <- anansi_out |>
#'     pivot_longer(starts_with("All") | contains("FMT")) |>
#'     separate_wider_delim(
#'         name,
#'         delim = "_", names = c("cor_group", "param")
#'     ) |>
#'     pivot_wider(names_from = param, values_from = value)
#'
#' # Only consider interactions where the entire model fits well enough.
#' library(ggplot2)
#' anansiLong <- anansiLong[anansiLong$full_p.values < 0.05, ]
#'
#'
#'
#' ggplot(
#'     data = anansiLong,
#'     aes(
#'         x = r.values,
#'         y = feature_X,
#'         fill = cor_group,
#'         alpha = disjointed_Legend_p.values < 0.05
#'     )
#' ) +
#'
#'     # Make a vertical dashed red line at x = 0
#'     geom_vline(xintercept = 0, linetype = "dashed", colour = "red") +
#'
#'     # Points show  raw correlation coefficients
#'     geom_point(shape = 21, size = 3) +
#'
#'     # facet per compound
#'     ggforce::facet_col(~feature_Y, space = "free", scales = "free_y") +
#'     scale_fill_manual(values = c(
#'         "Young yFMT" = "#2166ac",
#'         "Aged oFMT" = "#b2182b",
#'         "Aged yFMT" = "#ef8a62",
#'         "All" = "gray"
#'     )) +
#'     theme_bw()
#'
#' # Using miaViz style function:
#'
#' p <- plotAnansi(anansi_out,
#'     association.type = "disjointed",
#'     model.var = "Legend",
#'     fill_by = "group",
#'     signif.threshold = 0.05,
#'     x_lab = "Pearson's rho"
#' )
#' p <- p +
#'     scale_fill_manual(values = c(
#'         "Young yFMT" = "#2166ac",
#'         "Aged oFMT" = "#b2182b",
#'         "Aged yFMT" = "#ef8a62",
#'         "All" = "gray"
#'     )) +
#'     theme_bw()
#'
#' p
#'
anansi <- function(
    web,
    formula,
    groups = NULL,
    metadata = NULL,
    adjust.method = "BH",
    verbose = TRUE,
    return.format = "table",
    ...
) {
    return.format <-
        match.arg(return.format, choices = c("table", "list", "raw"))
    # generate anansiYarn input object
    input <- prepInput(
        web = web,
        formula = formula,
        groups = groups,
        metadata = metadata,
        verbose = verbose
    )
    int.terms <- input$int.terms
    groups <- input$groups
    group.id <- input$group.id
    errorterm <- input$error.term
    sat_model <- input$lm.formula
    metadata <- input$metadata

    out.list <- vector(
        "list",
        length = 1 + length(group.id) + (2 * length(int.terms))
    )
    out.list[seq_along(group.id)] <- call_groupwise(
        web,
        groups,
        metadata,
        verbose
    )
    # Sort out metadata formatting for differential association testing
    meta.frame <- model.frame(formula = sat_model, cbind(x = 1, metadata))

    out.list[
        length(group.id) +
            seq_len(1L + (2L * length(int.terms)))
    ] <- anansiDiffCor(
        web,
        sat_model,
        errorterm,
        int.terms,
        meta.frame,
        verbose
    )
    if (return.format != "raw") {
        results <- result.df(out.list, Matrix::as.matrix(web@dictionary))
        results <- anansi.p.adjust(results, adjust.method)
        attr(results, "group_terms") <-
            named_group_list(group.id, groups, metadata)
        attr(results, "model_terms") <- named_term_list(int.terms, metadata)
    }
    switch(
        return.format,
        "table" = return(results),
        "list" = return(list(results, input = input)),
        "raw" = return(out.list)
    )
}


#' Assess formula, trim metadata and prepare output for anansi workflow
#' @description Initialize `anansiInput` component of output. Should not be
#' called by user.
#' @importFrom S4Vectors as.data.frame.DataFrame
#' @noRd
#'
prepInput <- function(web, formula, groups, metadata, verbose) {
    # If no metadata argument try web slot. If list select one named "metadata".
    if (is.null(metadata)) metadata <- metadata(web, simplify = FALSE)
    if (!is.data.frame(metadata)) metadata <- metadata[["metadata"]]

    stopifnot(
        "No metadata argument provided or found in AnansiWeb" = prod(dim(
            metadata
        )) >
            0
    )
    raw_terms <- terms.formula(formula, "Error", data = metadata)
    indErr <- attr(raw_terms, "specials")$Error

    groups <- check_groups(groups, raw_terms, indErr, metadata, verbose)

    all_terms <- if (is.null(indErr)) {
        labels(raw_terms)
    } else {
        labels(raw_terms)[-indErr]
    }
    sat_model <- make_saturated_model(formula, raw_terms, indErr, verbose)
    error.term <- if (is.null(indErr)) {
        NULL
    } else {
        deparse1(
            attr(raw_terms, "variables")[[1L + indErr]][[2L]],
            backtick = TRUE
        )
    }
    input <- list(
        web = web,
        lm.formula = sat_model,
        error.term = error.term,
        int.terms = all_terms,
        groups = groups[[1]],
        group.id = groups[[2]],
        metadata = `row.names<-.data.frame`(metadata, NULL)
    )

    return(input)
}

#' Check group argument.
#' @noRd
#' @importFrom stats as.formula update.formula
#'
check_groups <- function(groups, raw_terms, indErr, metadata, verbose) {
    # If user input, check it
    if (!is.null(groups)) {
        missing_groups <- !groups %in% colnames(metadata)
        stopifnot(
            "Grouping variable(s) not recognised. Please check input." = !any(
                missing_groups
            )
        )
        group.id <- check_missing_combos(metadata[groups])
        return(list(groups, group.id))
    }

    # If no input, look for categorical variables
    ind_o1 <- attr(raw_terms, "order") == 1
    if (!is.null(indErr)) {
        ind_o1[indErr] <- FALSE
    }
    if (!any(ind_o1)) {
        if (verbose) {
            message("No grouping variable found for groupwise correlations.")
        }
        return(list(NULL, 1))
    }
    groups <- labels(raw_terms)[ind_o1]

    sub_meta <- metadata[, groups, drop = FALSE]
    sub_meta <- sub_meta[, vapply(sub_meta, is.categorical, NA), drop = FALSE]

    if (NCOL(sub_meta) == 0L) {
        if (verbose) {
            message("No grouping variable found for groupwise correlations.")
        }
        return(list(NULL, 1L))
    }
    group.id <- check_missing_combos(sub_meta)
    return(list(groups, group.id))
}

#' Prepare saturated model, deal with `Error` terms.
#' @noRd
#' @importFrom stats as.formula update.formula
#'
make_saturated_model <- function(formula, raw_terms, indErr, verbose) {
    # Simple case; No random intercept; make regular saturated model
    if (is.null(indErr)) {
        sat_model <- update.formula(old = formula, ~ x * 1 * (.))
        if (verbose) {
            message(
                paste0(
                    "Fitting least-squares for following model:\n",
                    paste0(as.character(sat_model), " ", collapse = "")
                )
            )
        }
        return(sat_model)
    }

    # Case with repeated measures:
    stopifnot(
        "Only one Error() term allowed; more detected." = length(indErr) < 2
    )
    errorterm <- attr(raw_terms, "variables")[[1L + indErr]]
    sat_model <- update.formula(
        old = formula,
        new = as.formula(
            paste(
                "~",
                deparse1(errorterm[[2L]], backtick = TRUE),
                "+ x * 1 * (. -",
                deparse1(errorterm, backtick = TRUE),
                ")"
            ),
            env = environment(formula)
        )
    )
    if (verbose) {
        message(paste0(
            "Fitting least-squares for following model:\n",
            paste0(
                as.character(update.formula(
                    old = formula,
                    new = as.formula(
                        paste(
                            "~ x * 1 * (. -",
                            deparse1(errorterm, backtick = TRUE),
                            ")"
                        ),
                        env = environment(formula)
                    )
                )),
                " ",
                collapse = ""
            ),
            "\nwith '",
            deparse1(errorterm[[2L]], backtick = TRUE),
            "' as random intercept."
        ))
    }
    return(sat_model)
}

#' Is character, factor or ordered factor
#' @description wrapper around
#' `is.character(x) || is.factor(x) || is.ordered(x)`
#' @param x an object to be evaluated as being categorical
#' @returns a boolean.
#'
is.categorical <- function(x) is.character(x) || is.factor(x) || is.ordered(x)

#' Return levels or 'numeric'
#' @noRd
#'
lvs_or_num <- function(x) {
    if (is.categorical(x)) {
        return(unique(as.character(x)))
    } # else
    return("numeric")
}

#' Provide name and levels for categories for group terms
#' @description convenience function to generate group terms output
#' @param g character vector of all 'All' followed by all unique terms
#' @param t character vector of term(s)
#' @param m data.frame of provided metadata
#' @returns a named list of character vectors, where names are terms and
#' containing vectors are unique levels or 'numeric' if not categorical. First
#' Entry is named 'All' and contains all levels.
#'
named_group_list <- function(g, t, m) c(list(All = g), lapply(m[t], lvs_or_num))

#' Provide name and levels for categories
#' @description convenience function to generate output
#' @param t character vector of term(s)
#' @param m data.frame of provided metadata
#' @returns a named list of character vectors, where names are terms and
#' containing vectors are unique levels or 'numeric' if not categorical.
#'
named_term_list <- function(t, m) {
    order_one <- t %in% colnames(m)
    f_order <- t[order_one]
    h_order <- t[!order_one]

    f_list <- lapply(m[, f_order, drop = FALSE], lvs_or_num)
    h_list <- NULL
    if (length(h_order) != 0) {
        m_num <- !unlist(lapply(m, is.categorical))
        m[m_num] <- "numeric"
        h_terms <- strsplit(h_order, split = ":", fixed = TRUE)
        h_list <- lapply(
            h_terms,
            function(x) unique(do.call(paste, c(m[x], sep = "_")))
        )
        names(h_list) <- h_order
    }

    return(c(f_list, h_list))
}

#' Check whether any combination of categories in group argument is missing
#'
#' @noRd
#'
check_missing_combos <- function(metadata) {
    group.factor <- interaction(metadata, sep = "_")
    empir.factor <- factor(group.factor)
    if ("All" %in% levels(group.factor)) {
        stop("'All' cannot be present in columns of 'group' argument.")
    }
    if (nlevels(group.factor) > nlevels(empir.factor)) {
        missing_lv <- paste(
            setdiff(
                levels(group.factor),
                levels(empir.factor)
            ),
            collapse = ", "
        )
        warning(
            "Missing combinations of categorical variables: ",
            missing_lv,
            "\n",
            "NAs introduced; Estimated effects may be unbalanced.",
            call. = FALSE
        )
    }

    group.id <- c("All", levels(empir.factor))
    return(group.id)
}