R/randomAnansi.R

Defines functions randomLinkDF randomWebMetadata randomWebDic randomWebTab randomWebFull krebsDemoWeb randomMultiFactor randomWeb

Documented in krebsDemoWeb randomMultiFactor randomWeb

#' Generate a random AnansiWeb or MultiFactor
#' @name randomAnansi
#' @description
#' Randomly generate a valid `AnansiWeb` or `MultiFactor` object.
#' @param n_samples,n_reps `Numeric scalar` Number of samples and repeated
#'     measures of those samples to be generated. Ignored if `tableY` and
#'     `tableX` are provided. (defaults: 10 samples without repeats)
#' @param n_features_y,n_features_x `Numeric scalar` Number of features to be
#'     generated. Ignored if `tableY` and `tableX` are provided.
#' @param tableY,tableX A table containing features of interest. Rows should be
#'     samples and columns should be features. Y and X refer to the position of
#'     the features in a formula: Y ~ X.
#' @param dictionary A binary adjacency matrix of class `Matrix`, or
#' coercible to `Matrix`.
#' @returns a randomly generated object of the specified class.
#' @examples
#' # Make a random AnansiWeb object
#' randomWeb()
#' krebsDemoWeb()
#' randomMultiFactor()
#' @seealso [AnansiWeb()], [MultiFactor()]
#'
NULL

#' @rdname randomAnansi
#' @name randomWeb
#' @export
#'
randomWeb <- function(
    n_samples = 10,
    n_reps = 1L,
    n_features_x = 8,
    n_features_y = 12,
    sparseness = 0.5,
    tableY = NULL,
    tableX = NULL,
    dictionary = NULL
) {
    stopifnot(
        "'sparseness' must be a proportion [0-1]. " = sparseness <= 1 &&
            sparseness > 0
    )
    stopifnot(
        "At least one of 'tableY,tableX', 'dictionary' should be NULL." = any(c(
            is.null(tableY),
            is.null(tableX),
            is.null(dictionary)
        ))
    )
    stopifnot(
        "Both 'tableY,tableX' should be provided or NULL. " = is.null(
            tableY
        ) ==
            is.null(tableX)
    )

    density <- 1 - sparseness
    # All missing: return full random Web
    if (all(c(is.null(tableY), is.null(tableX), is.null(dictionary)))) {
        return(
            randomWebFull(
                n_samples,
                n_reps,
                n_features_x,
                n_features_y,
                density
            )
        )
    }
    # Dictionary missing: make random fitting dictionary, return filled Web
    if (is.null(dictionary)) {
        return(
            randomWebDic(tableY, tableX, density)
        )
    }
    # Tables missing: make random fitting tables, return filled Web
    return(randomWebTab(n_samples, n_reps, dictionary))
}

#' @rdname randomAnansi
#' @param n_types `Numeric scalar`, number of types of features to generate
#' @param n_features `Numeric scalar`, number of features per type
#' @param sparseness `Numeric scalar`, proportion: How rare are connections
#' @export
#'
randomMultiFactor <- function(n_types = 6, n_features = 100, sparseness = 0.5) {
    stopifnot(
        "'sparseness' must be a proportion [0-1]. " = sparseness <= 1 &&
            sparseness > 0
    )
    n_types <- max(min(n_types, 26), 2)
    ids <- letters[seq_len(n_types)]
    out_names <- paste0(ids[-n_types], "2", ids[-1L])
    id_list <- lapply(ids, function(x) {
        paste(
            x,
            formatC(seq_len(n_features), digits = 2, flag = "0"),
            sep = "_"
        )
    })

    out <- lapply(seq_len(n_types - 1), FUN = function(x) {
        randomLinkDF(
            l = id_list[-n_types][[x]],
            r = id_list[-1L][[x]],
            l_id = ids[-n_types][x],
            r_id = ids[-1L][x],
            p = (1 - sparseness)
        )
    })
    names(out) <- out_names
    asMultiFactor(out)
}

#' @rdname randomAnansi
#' @aliases krebsDemoWeb
#' @importFrom Matrix sparseMatrix
#' @importFrom stats pnorm
#' @export
#'
krebsDemoWeb <- function(n_samples = 100, n_reps = 4L) {
    # Load krebs edgelist
    kd <- local({
        data("krebs", package = "anansi", envir = environment())
        # explicitly assign from environment
        krebs <- get("krebs")
        # Define dictionary
        return(
            Matrix::sparseMatrix(
                i = as.integer(krebs$Enzyme),
                j = as.integer(krebs$Metabolite),
                dimnames = lapply(krebs, levels)
            )
        )
    })
    # Generate web with metadata
    w <- randomWeb(n_samples, n_reps, dictionary = kd)

    # Spike demo associations
    int_ab <- metadata(w)$group_ab == "a"
    int_pr <- pnorm(metadata(w)$score_a)

    # Positive association aconitase ~ citrate
    tableY(w)[, 1L] <- scale(tableY(w)[, 1L] * 0.25 + tableX(w)[, 1L] * 0.75)
    # Negative association aconitase ~ cis-aconitate
    tableX(w)[, 2L] <- scale(tableX(w)[, 2L] * 0.25 + tableY(w)[, 1L] * -0.75)
    # Disjointed association isocitrate dehydrogenase ~ isocitrate
    tableY(w)[, 2L] <- scale(tableY(w)[, 2L] * 0.25 + tableX(w)[, 3L] * 0.75)
    tableY(w)[int_ab, 2L] <- tableY(w)[int_ab, 2L] * -1L
    # Disjointed association ketoglutarate dehydrogenase ~ ketoglutarate
    tableY(w)[, 3L] <- scale(
        tableY(w)[, 3L] * 0.25 + tableX(w)[, 4L] * 0.75 * metadata(w)$score_a
    )
    # Emergent association succinyl-CoA synthetase ~ succinyl-CoA
    tableY(w)[, 4L] <- scale(
        tableY(w)[, 4L] *
            (0.25 + 0.50 * !int_ab) +
            tableX(w)[, 5L] * (0.25 + 0.50 * int_ab)
    )
    # Emergent association succinate dehydrogenase ~ succinate
    tableY(w)[, 5L] <- scale(
        tableY(w)[, 5L] * int_pr + tableX(w)[, 6L] * (1 - int_pr)
    )

    return(w)
}

#' Generate a random AnansiWeb, without any prior components
#' @description
#' called by randomWeb, not for user.
#' @importFrom Matrix rsparsematrix
#' @importFrom stats rnorm
#' @rdname randomAnansi
#' @noRd
#'
randomWebFull <- function(n_samp, n_reps, n_x, n_y, density) {
    rn <- paste0(
        "sample_",
        rep(seq_len(n_samp), each = n_reps),
        "_",
        seq_len(n_reps)
    )
    tableY <- matrix(
        data = rnorm(n_y * n_samp * n_reps),
        nrow = n_samp * n_reps,
        ncol = n_y,
        dimnames = list(
            sample_id = rn,
            y = paste0("y_", seq_len(n_y))
        )
    )
    tableX <- matrix(
        data = rnorm(n_x * n_samp * n_reps),
        nrow = n_samp * n_reps,
        ncol = n_x,
        dimnames = list(
            sample_id = rn,
            x = paste0("x_", seq_len(n_x))
        )
    )
    randomWebDic(tableY, tableX, density)
}

#' Generate a random AnansiWeb, only missing tables
#' @description
#' called by randomWeb, not for user.
#' @rdname randomAnansi
#' @noRd
#'
randomWebTab <- function(n_samp, n_reps, dictionary, metadata) {
    d <- dim(dictionary)
    rn <- paste0(
        "sample_",
        rep(seq_len(n_samp), each = n_reps),
        "_",
        seq_len(n_reps)
    )
    tableY <- matrix(
        data = rnorm(d[1] * n_samp * n_reps),
        nrow = n_samp * n_reps,
        ncol = d[1],
        dimnames = c(
            list(
                sample_id = rn
            ),
            dimnames(dictionary)[1]
        )
    )
    tableX <- matrix(
        data = rnorm(d[2] * n_samp * n_reps),
        nrow = n_samp * n_reps,
        ncol = d[2],
        dimnames = c(
            list(
                sample_id = rn
            ),
            dimnames(dictionary)[2]
        )
    )
    names(dimnames(tableX))[2] <- names(dimnames(dictionary))[2]
    metadata <- randomWebMetadata(tableY, n_samp, n_reps)
    # return AnansiWeb
    AnansiWeb(
        tableY = tableY,
        tableX = tableX,
        dictionary = dictionary,
        metadata = list(metadata = metadata)
    )
}

#' Generate a random AnansiWeb, only missing dictionary.
#' @description
#' called by randomWeb, not for user.
#' @importFrom Matrix rsparsematrix
#' @rdname randomAnansi
#' @noRd
#'
randomWebDic <- function(tableY, tableX, density, metadata) {
    dictionary <- rsparsematrix(
        nrow = NCOL(tableY),
        ncol = NCOL(tableX),
        density = density,
        rand.x = NULL,
        dimnames = list(
            y = colnames(tableY),
            x = colnames(tableX)
        )
    )
    names(dimnames(dictionary)) <- c(
        names(dimnames(tableY))[2L],
        names(dimnames(tableX))[2L]
    )
    metadata <- randomWebMetadata(tableY)
    # return AnansiWeb
    AnansiWeb(
        tableY = tableY,
        tableX = tableX,
        dictionary = dictionary,
        metadata = list(metadata = metadata)
    )
}

#' Generate random metadata for AnansiWeb
#' @description
#' called by randomWeb, not for user.
#' @param table a web table
#' @rdname randomAnansi
#' @noRd
#'
randomWebMetadata <- function(table, n_samples = NULL, n_reps = NULL) {
    if (is.null(n_samples)) {
        n_samples <- NROW(table)
        n_reps <- 1L
    }
    m <- data.frame(
        sample_id = paste0("sample_", rep(seq_len(n_samples), each = n_reps)),
        repeated = paste0("rep_", seq_len(n_reps)),
        group_ab = rep(
            sample(c("a", "b"), n_samples, replace = TRUE),
            each = n_reps
        ),
        subtype = rep(
            sample(c("x", "y", "z"), n_samples, replace = TRUE),
            each = n_reps
        ),
        score_a = rnorm(n_samples),
        score_b = rnorm(n_samples),
        score_c = rnorm(n_samples),
        row.names = row.names(table)
    )
    return(m)
}

#' Make a single df for a random MultiFactor
#' @rdname randomAnansi
#' @description called by `randomMultiFactor`, shouldn't be called by user.
#' @param l,r character vector of left, right features
#' @param l_id,r_id character scalar of left, right feature names
#' @param p proportion of connections to keep
#' @noRd
#'
randomLinkDF <- function(l, r, l_id, r_id, p) {
    len <- length(l) * length(r)
    ind <- sort(sample(seq_len(len), size = ceiling(p * len)))
    out <- expand.grid(l, r, KEEP.OUT.ATTRS = FALSE)[ind, ]
    names(out) <- c(l_id, r_id)
    out
}
thomazbastiaanssen/anansi documentation built on June 9, 2025, 3:59 p.m.