R/weaveWeb-methods.R

Defines functions web_missing_link trimByInput mapFromLink dictionaryMatrix subsetByPath stepSeq termSeq weaveKEGG weaveWeb.formula weaveWeb.default weaveWeb

Documented in weaveKEGG weaveWeb weaveWeb.default weaveWeb.formula

#' Weave an AnansiWeb
#' @name weaveWeb
#' @param formula `formula` of the form y ~ x, denoting desired output
#'     format; assigns y to rows and columns to x. Equivalent to using `x`
#'     and `y` arguments.
#' @param x,y `Character scalar`, names of feature types that should be
#'     linked. Should be found in the column names of `link`.
#' @param link One of the following:
#'     \itemize{
#'         \item `Character scalar` with value `"none"`.
#'         \item `data.frame` with two columns
#'         \item `list` with two such `data.frame`s.
#'         }
#' @param tableY,tableX A table containing features of interest. Rows should be
#'     samples and columns should be features. Y and X refer to the position of
#'     the features in a formula: Y ~ X.
#' @param metadata Optional `data.frame` of sample metadata, to be included with
#'     output. Can be accessed from `AnansiWeb` generated by `weaveWeb()` with
#'     `metadata(output)`.
#' @param ... further arguments.
#' @details
#' If the `link` argument is `"none"`, all features will be considered
#' linked. If one or more `data.frame`s, colnames should be as specified in
#' `x` and `y`.
#' @param verbose `Logical scalar`. Whether to print diagnostic information
#'     (Default: `TRUE`).
#' @seealso \itemize{
#'     \item [AnansiWeb]: For general constructor and methods.
#'     \item [kegg_link()]: For examples of input for link argument.
#'     \item [getWeb()]: For [MultiAssayExperiment::MultiAssayExperiment()]
#'     methods.
#' }
#'
#' @returns an `AnansiWeb` object, with sparse binary biadjacency matrix
#' with features from `y` as rows and features from `x` as columns in
#' `dictionary` slot.
#' @description
#' Generate a biadjacency matrix, linking the features between two tables.
#' Return an `AnansiWeb` object which contains all three.
#'
#' `weaveWeb()` is for general use and has flexible default settings.
#'
#' `weaveKEGG()` is a wrapper that sets `link` to `kegg_link()`.
#' All variants are special cases of `weaveWeb()`.
#' @examples
#' # Setup demo tables
#' ec2ko <- kegg_link()[["ec2ko"]]
#' ec2cpd <- kegg_link()[["ec2cpd"]]
#'
#' # Basic usage
#' weaveWeb(cpd ~ ko, link = kegg_link())
#' weaveWeb(x = "ko", y = "ec", link = ec2ko)
#' weaveWeb(ec ~ cpd, link = ec2cpd)
#'
#' # A wrapper is available for kegg ko, ec and cpd data
#' generic <- weaveWeb(cpd ~ ko, link = kegg_link())
#' kegg_wrapper <- weaveKEGG(cpd ~ ko)
#'
#' identical(generic, kegg_wrapper)
#'
#' # The following are equivalent to transposition:
#' a <- weaveWeb(ko ~ cpd, link = kegg_link()) |> dictionary()
#' b <- weaveWeb(cpd ~ ko, link = kegg_link()) |> dictionary()
#'
#' identical(a, Matrix::t(b))
#'
NULL

#' @rdname weaveWeb
#' @export
#' @order 0
#'
weaveWeb <- function(x, ...) UseMethod("weaveWeb")

#' @rdname weaveWeb
#' @importFrom Matrix Matrix
#' @order 1
#' @export
#'
weaveWeb.default <- function(
    x,
    y,
    link = NULL,
    tableX = NULL,
    tableY = NULL,
    metadata = NULL,
    verbose = TRUE,
    ...
) {
    terms <- c(y, x)
    stopifnot(
        "both 'x' and 'y' terms must be provided as character" = is(
            terms,
            "character"
        ) &&
            length(terms) == 2L
    )
    if (identical(link, "none")) {
        return(web_missing_link(tableX, tableY, terms))
    }

    # Ensure link is a MultiFactor
    link <- MultiFactor(link)
    # Determine required ids in order, only keep relevant elements of link.
    all_terms <- termSeq(x, y, link)
    link <- subsetByPath(link, all_terms)

    # Trim link levels and tables based on feature overlap
    if (!is.null(tableX)) {
        keep <- sort(intersect(colnames(tableX), levels(link)[[x]]))
        if (verbose && length(keep) < NCOL(tableX)) {
            message("Dropped features in tableX: ", length(keep), " remain. ")
        }
        tableX <- tableX[, keep]
        link <- trimByInput(link, tableX, x)
    }
    if (!is.null(tableY)) {
        keep <- sort(intersect(colnames(tableY), levels(link)[[y]]))
        if (verbose && length(keep) < NCOL(tableY)) {
            message("Dropped features in tableY: ", length(keep), " remain. ")
        }
        tableY <- tableY[, keep]
        link <- trimByInput(link, tableY, y)
    }
    # Construct dictionary
    d <- dictionaryMatrix(link, all_terms)
    dimnames(d) <- list(y = colnames(tableY), x = colnames(tableX))
    names(dimnames(d)) <- c(y, x)

    # Dummy tables if missing
    if (is.null(tableX) && is.null(tableY)) {
        dimnames(d) <- levels(link)[terms]
        tableY <- matrix(ncol = NROW(d), dimnames = list(NULL, rownames(d)))
        tableX <- matrix(ncol = NCOL(d), dimnames = list(NULL, colnames(d)))
    }
    #
    AnansiWeb(
        tableY = as.matrix(tableY)[, rownames(d), drop = FALSE],
        tableX = as.matrix(tableX)[, colnames(d), drop = FALSE],
        dictionary = d,
        metadata = metadata
    )
}

#' @rdname weaveWeb
#' @export
#' @order 2
#'
weaveWeb.formula <- function(
    formula,
    link = NULL,
    tableX = NULL,
    tableY = NULL,
    ...
) {
    if (missing(formula) || (length(formula) != 3L)) {
        stop("'formula' missing or incorrect")
    }

    terms <- all.vars(formula)
    if (is.null(link) || identical(link, "none")) {
        return(
            weaveWeb.default(
                x = terms[2],
                y = terms[1],
                link,
                tableX,
                tableY
            )
        )
    }

    link <- MultiFactor(link)

    if (sum(terms %in% colnames(link)) != 2L) {
        stop("Variables from 'formula' not found in 'link'.")
    }

    weaveWeb.default(
        x = terms[2],
        y = terms[1],
        link,
        tableX,
        tableY,
        ...
    )
}

#' @rdname weaveWeb
#' @export
#'
weaveKEGG <- function(x, ...) weaveWeb(x, link = kegg_link(), ...)


###############################################################################
###############################################################################

#' Find a path through different feature types.
#' @importFrom igraph shortest_paths
#' @inheritParams weaveWeb
#' @returns a Character vector of the ids to walk in order.
#' @noRd
#'
termSeq <- function(x, y, link) {
    stopifnot(
        "both 'x' and 'y' terms must be found as colnames in 'link'" = all(
            c(x, y) %in% colnames(link)
        )
    )
    g <- getGraph(link)
    sp <- igraph::shortest_paths(g, from = y, to = x, output = "vpath")
    names(unlist(sp, FALSE, FALSE)[[1]])
}

#' Find the order in which link data frames should be listed
#' @param term_list list of `Character vectors`, each with length of two.
#' @param d dictionary(link).
#' @returns a numeric vector with order in which row data frames should be
#'     traversed.
#' @noRd
#'
stepSeq <- function(term_list, d) {
    vapply(
        term_list,
        rowsWithCol,
        d = d,
        name = FALSE,
        FUN.VALUE = 0L,
        USE.NAMES = FALSE
    )
}


#' @noRd
#'
subsetByPath <- function(link, all_terms) {
    term_list <- lapply(
        seq_len(length(all_terms) - 1L),
        FUN = function(x) all_terms[c(x, x + 1L)]
    )
    steps <- stepSeq(term_list, dictionary(link))
    link@index <- link[steps]
    link@levels <- link@levels[all_terms]
    link@map <- mapMultiFactor(link[steps], mode = "counts")

    return(link)
}

#' Generate dictionary Matrix from link input
#' @inheritParams weaveWeb
#' @param all_terms `Character vector` of all path terms in sequence.
#'     `termSeq(x, y, link)`
#' @importMethodsFrom Matrix %&%
#' @noRd
#'
dictionaryMatrix <- function(link, all_terms) {
    term_list <- lapply(
        seq_len(length(all_terms) - 1L),
        FUN = function(x) all_terms[c(x, x + 1L)]
    )
    steps <- stepSeq(term_list, dictionary(link))

    lv_len <- vapply(levels(link), length, 0L, USE.NAMES = TRUE)

    # Handle simple case of one link df first, return sparse matrix.
    if (length(steps) == 1L) {
        return(mapFromLink(
            all_terms,
            df = link@index[[steps]],
            dims = lv_len[all_terms]
        ))
    }
    lv_list <- lapply(term_list, function(x) lv_len[x])
    # Otherwise, make a list of matrices to Reduce to final dictionary
    mat_list <- mapply(
        mapFromLink,
        terms = term_list,
        df = link@index[steps],
        dims = lv_list
    )
    Reduce(Matrix::`%&%`, mat_list)
}

#' @param terms id of cols. `c(y, x)`.
#' @param df element of a `MultiFactor` object
#' @param dims length-2 integer vector of matrix dimensions.
#' @importFrom Matrix sparseMatrix
#' @returns a sparse biadjacency Matrix
#' @noRd
#'
mapFromLink <- function(terms, df, dims) {
    sparseMatrix(i = df[[terms[1]]], j = df[[terms[2]]], dims = dims)
}

#' @description Called by weaveWeb to subset link to inly include the features
#'     found in the input table.
#' @returns a MultiFactor subsetted by relevant features
#' @param link a `MultiFactor` .
#' @param id `Character scalar`, naming the x term to be trimmed
#' @param tableID A table containing features of interest, `tableX` or `tableY`.
#' @noRd
#'
trimByInput <- function(link, tableID, id) {
    lv <- levels(link)[[id]]
    d <- dictionary(link)
    r <- rowsWithCol(d, id)
    stopifnot(
        "Feature names appeared in several index elements. " = length(r) == 1L
    )
    # Subset index by table columns
    xr <- link@index[[r]]
    x.names <- match(colnames(tableID), lv)
    xr <- xr[xr[, id] %in% x.names, ]
    xr.id <- xr[, id]

    # Subset levels
    link@levels[[id]] <- lv[sort(unique(xr.id))]
    # Reorder and replace indices
    xr[, id] <- match(xr.id, sort(unique(xr.id)))
    link@index[[r]] <- xr

    link@map <- mapMultiFactor(link@index, mode = "counts")

    return(link)
}


#' Make a full web; for all vs all association testing
#' @description
#' Make a fully TRUE biadjacency matrix with dimensions of the two input tables.
#' @param tableX,tableY `matrix` of features of table `X`.
#' @param terms `character vector` names of x & y terms
#' @returns
#' An `AnansiWeb` object with both tables and a fully `TRUE`
#' (non-sparse) matrix from the `Matrix` package.
#' @importFrom Matrix Matrix
#' @noRd
#'
web_missing_link <- function(tableX, tableY, terms, metadata = NULL) {
    d <- Matrix(
        data = TRUE,
        nrow = NCOL(tableY),
        ncol = NCOL(tableX),
        dimnames = list(sort(colnames(tableY)), sort(colnames(tableX)))
    )
    names(dimnames(d)) <- rev(terms)

    AnansiWeb(
        tableY = as.matrix(tableY)[, rownames(d)],
        tableX = as.matrix(tableX)[, colnames(d)],
        dictionary = d,
        metadata = metadata
    )
}
thomazbastiaanssen/anansi documentation built on June 9, 2025, 3:59 p.m.