R/CoRecMotif-class.R

Defines functions CoRecMotif

Documented in CoRecMotif

#' CoRecMotif class
#'
#' A container for motifs generated by a CoRec experiment.
#'
#' @slot probe_set `character(1)`. The probe set name.
#' @slot pbm_condition `character(1)`. The PBM condition (e.g., cell type,
#'   treatment, and factor profiled).
#' @slot zscore_motif `matrix(numeric)`. The z-score motif. See
#'   [make_zscore_motif()] for a description of the expected format.
#' @slot array_id `character(1)`. The name of the array/experiment the z-score
#'   motif is from.
#' @slot motif_strength `numeric(1)`. Calculated automatically. The motif
#'   strength is the 93rd percentile of the z-scores in the z-score motif (or
#'   approximately the median of the top 15% of z-scores).
#' @slot rolling_ic `numeric(1)`. Calculated automatically. The rolling IC is
#'   the maximum value obtained when taking a moving average of the information
#'   content of the motif over a sliding window of length 5.
#' @slot seed_sequence `character(1)`. The sequence of the seed probe.
#' @slot beta `numeric(1)`. Calculated automatically. Beta is defined as \eqn{4
#'   - (0.5 * z)}, where `z` is the z-score of the seed probe of this probe set.
#'   If beta falls outside the range of 1 to 4 (inclusive), it is set to the
#'   nearest endpoint. It is used to convert the z-score motif to a PPM.
#' @slot motif [universalmotif][universalmotif::universalmotif-class].
#'   Calculated automatically. To convert the z-score motif into a PPM, each
#'   z-score is first multiplied by beta, and then the product is exponentiated.
#'   Each column (or position in the motif) is then normalized to sum to 1.
#' @slot match_motif [universalmotif][universalmotif::universalmotif-class]. The
#'   best match reference motif.
#' @slot match_pvalue `numeric(1)`. The adjusted p-value of the best match to a
#'   reference motif.
#' @slot match_cluster `character(1)`. The cluster of the best match reference
#'   motif.
#'
#' @export
#'
#' @name CoRecMotif-class
#' @rdname CoRecMotif-class
methods::setClass(
    # Name the class CoRecMotif
    "CoRecMotif",

    # Define the names and types of the slots the class should have
    slots = list(
        probe_set = "character",
        pbm_condition = "character",
        zscore_motif = "matrix",
        array_id = "character",
        motif_strength = "numeric",
        rolling_ic = "numeric",
        seed_sequence = "character",
        beta = "numeric",
        motif = "ANY",
        match_motif = "ANY",
        match_pvalue = "numeric",
        match_cluster = "character"
    ),

    # Provide a default example object
    prototype = list(
        probe_set = NA_character_,
        pbm_condition = NA_character_,
        zscore_motif = matrix(NA_real_),
        array_id = NA_character_,
        motif_strength = NA_real_,
        rolling_ic = NA_real_,
        seed_sequence = NA_character_,
        beta = NA_real_,
        motif = NA,
        match_motif = NA,
        match_pvalue = NA_real_,
        match_cluster = NA_character_
    )
)

#' Create a CoRecMotif object
#'
#' Create a CoRecMotif from a 4 x n matrix of z-scores.
#'
#' @param probe_set `character(1)`. The probe set name.
#' @param pbm_condition `character(1)`. The PBM condition (e.g., cell type,
#'   treatment, and factor profiled).
#' @param zscore_motif `matrix(numeric())`. The z-score motif. See
#'   [make_zscore_motif()] for a description of the expected format.
#' @param array_id `character(1)` or `NULL`. The name of the array/experiment
#'   the z-scores are from. If `NULL`, a random ID will be generated.
#'   (Default: NULL)
#' @param seed_sequence `character(1)` or `NULL`. The sequence of the seed
#'   probe. (Default: NULL)
#'
#' @return An object of class [CoRecMotif][CoRecMotif-class].
#'
#' @export
#'
#' @examples
#' print("FILL THIS IN")
CoRecMotif <-
    function(
        probe_set,
        pbm_condition,
        zscore_motif,
        array_id = NULL,
        seed_sequence = NULL
    ) {
    # Make sure all the arguments are the right type
    assertthat::assert_that(
        assertthat::is.string(probe_set),
        assertthat::is.string(pbm_condition),
        is.matrix(zscore_motif) && is.numeric(zscore_motif),
        assertthat::is.string(array_id) || is.null(array_id),
        assertthat::is.string(seed_sequence) || is.null(seed_sequence)
    )

    # Make sure zscore_motif is the right format
    zscore_motif <- check_valid_zscore_motif(zscore_motif)

    # If no array ID is given, generate a random ID
    if (is.null(array_id)) {
        array_id <- create_array_id()
    }

    # If seed_sequence is NULL, switch it to NA_character_
    if (is.null(seed_sequence)) {
        seed_sequence <- NA_character_
    }

    # Paste together the probe set, PBM condition, and array ID to make a name
    motif_name <- paste(probe_set, pbm_condition, array_id, sep = "_")

    # Calculate the motif strength
    motif_strength <- calculate_strength(zscore_motif)

    # Calculate beta
    beta <- calculate_beta(motif_strength)

    # Convert the z-score motif into a universalmotif
    motif <- zscore_to_universalmotif(zscore_motif, beta, motif_name)

    # Calculate the rolling IC
    rolling_ic <- calculate_rolling_ic(motif)

    # Create a new CoRecMotif
    methods::new(
        "CoRecMotif",
        probe_set = probe_set,
        pbm_condition = pbm_condition,
        zscore_motif = zscore_motif,
        array_id = array_id,
        beta = beta,
        motif_strength = motif_strength,
        rolling_ic = rolling_ic,
        seed_sequence = seed_sequence,
        motif = motif
    )
}

methods::setValidity("CoRecMotif", function(object) {
    # Check the types of slots that don't get fully type-checked automatically
    type_checks <-
        dplyr::case_when(
            !is.numeric(get_zscore_motif(object)) ~
                "@zscore_motif must be numeric",
            !methods::is(get_motif(object), "universalmotif") ~
                "@motif must be an object of class universalmotif",
            !methods::is(get_match_motif(object), "universalmotif") &&
                !is.na(get_match_motif(object)) ~
                "@match_motif must be an object of class universalmotif or NA"
        )

    # If any types are incorrect, return the corresponding message
    if (!is.na(type_checks)) {
        return(type_checks)
    }

    # Check the dimensions of all the slots
    dim_checks <-
        dplyr::case_when(
            length(get_probe_set(object)) != 1 ~
                "@probe_set must be a character vector of length 1",
            length(get_pbm_condition(object)) != 1 ~
                "@pbm_condition must be a character vector of length 1",
            nrow(get_zscore_motif(object)) != 4 ~
                "@zscore_motif must have four rows",
            length(get_array_id(object)) != 1 ~
                "@array_id must be a character vector of length 1",
            length(get_motif_strength(object)) != 1 ~
                "@motif_strength must be a numeric vector of length 1",
            length(get_rolling_ic(object)) != 1 ~
                "@rolling_ic must be a numeric vector of length 1",
            length(get_seed_sequence(object)) != 1 ~
                "@seed_sequence must be a character vector of length 1",
            length(get_beta(object)) != 1 ~
                "@beta must be a numeric vector of length 1",
            length(get_match_pvalue(object)) != 1 ~
                "@match_pvalue must be a numeric vector of length 1",
            length(get_match_cluster(object)) != 1 ~
                "@match_cluster must be a character vector of length 1"
        )

    # If any dimensions are incorrect, return the corresponding message
    if (!is.na(dim_checks)) {
        return(dim_checks)
    }

    # Figure out some expected values
    expected_rows <- c("A", "C", "G", "T")
    expected_cols <- as.character(1:ncol(get_zscore_motif(object)))
    expected_strength <- calculate_strength(get_zscore_motif(object))
    expected_rolling_ic <- calculate_rolling_ic(get_motif(object))
    expected_beta <- calculate_beta(get_motif_strength(object))
    expected_ppm <-
        zscore_to_universalmotif(
            get_zscore_motif(object), expected_beta, get_motif_name(object)
        )@motif

    # Check that the CoRecMotif is internally consistent
    consistency_checks <-
        dplyr::case_when(
            !identical(rownames(get_zscore_motif(object)), expected_rows) ~
                "@zscore_motif rows must be named 'A', 'C', 'G', and 'T'",
            !identical(colnames(get_zscore_motif(object)), expected_cols) ~
                "@zscore_motif columns must be named '1', '2', '3', etc.",
            get_motif_strength(object) != expected_strength ~
                "@motif_strength is inconsistent with @zscore_motif",
            get_rolling_ic(object) != expected_rolling_ic ~
                "@rolling_ic is inconsistent with @zscore_motif",
            get_beta(object) != expected_beta ~
                "@beta is inconsistent with @zscore_motif",
            !all.equal(get_ppm(object), expected_ppm) ~
                "@motif is inconsistent with @zscore_motif"
        )

    # If anything is inconsistent, return the corresponding message
    if (!is.na(consistency_checks)) {
        return(consistency_checks)
    } else {
        return(TRUE)
    }
})
Siggers-Lab/hTF_array documentation built on Feb. 7, 2024, 11:25 p.m.