R/entropy.R

Defines functions gene_het gene_hom normalise

Documented in gene_het gene_hom normalise

#' Normalise Counts into a Distribution
#'
#' A function that takes frequency count data and normalises it into a
#' probability distribution. Only available internally within SCEnt.
#'
#' @param dist A vector of a frequency distribution.
#'
#' @return A vector of a probability distribution relative to the
#' frequencies.
normalise <- function(dist) {
  dist / sum(dist)
}

#' Find the Homogeneity of a Gene Within a Population
#'
#' @param expr A vector or matrix of gene expressions. For the matrix, genes
#' should be represented as rows and cells as columns.
#' @param unit The units to be parsed to the entropy function.
#' @param normalise A logical value representing whether the gene frequencies
#' should be normalised into a distribution.
#' @param transpose A legical value representing whether the matrix should be
#' transposed before any calculations are performed.
#'
#' @return A vector of the information contained in the distribution of each
#' gene. The higher this is, the more homogeneous the gene is within the cell
#' population.
#' @export
#'
#' @examples
#' # Creating Data
#' gene1 <- c(0, 0, 0, 0, 1, 2, 3)
#' gene2 <- c(5, 5, 3, 2, 0, 0, 0)
#' gene3 <- c(2, 0, 2, 1, 3, 0, 1)
#' gene4 <- c(3, 3, 3, 3, 3, 3, 3)
#' gene5 <- c(0, 0, 0, 0, 5, 0, 0)
#' gene_counts <- matrix(c(gene1, gene2, gene3, gene4, gene5), ncol = 5)
#' rownames(gene_counts) <- paste0("cell", 1:7)
#' colnames(gene_counts) <- paste0("gene", 1:5)
#'
#' # Calculating Homogeneity For Each Gene
#' gene_hom(gene1)
#' gene_hom(gene2)
#' gene_hom(gene3)
#' gene_hom(gene4)
#' gene_hom(gene5)
#'
#' # Calculating Homogeneity For a Matrix
#' gene_hom(gene_counts)
gene_hom <- function(expr, unit = "log2", normalise = TRUE, transpose = FALSE) {
  # Checking if the expression is a vector
  if (is.vector(expr)) {
    # Normalising the data if needed
    if (normalise) {
      expr <- normalise(expr)
    }
    # Calculating the entroy of the expression distribution
    entropy::entropy(expr, unit = unit)
    # Checking if the expression is a matrix
  } else if (is.matrix(expr)) {
    # Transposing matrix if needed
    if (transpose) {
      expr <- t(expr)
    }
    # Normalising the data if needed
    if (normalise) {
      expr <- t(apply(expr, 1, normalise))
    }
    # Calculating the entropy of each expression distribution
    apply(expr, 1, function(x) {
      entropy::entropy(x, unit = unit)
    })
  } else {
    # throwing an error if a vector or a matrix was not passed in
    stop("\n gene_hom() should be passed either a matrix or a vector")
  }
}

#' Find the Heterogeneity of a Gene Within a Population
#'
#' @param expr A vector or matrix of gene expressions. For the matrix, genes
#' should be represented as rows and cells as columns.
#' @param unit The units to be parsed to the entropy function.
#' @param normalise A logical value representing whether the gene frequencies
#' should be normalised into a distribution.
#' @param transpose A logical value representing whether the matrix should be
#' transposed before any calculations are performed.
#'
#' @return A vector of the information gained from the gene distribution
#' compared to the uniform distribution. The higher the value more
#' heterogeneous the cell is within the population.
#' @export
#'
#' @examples
#' # Creating Data
#' gene1 <- c(0, 0, 0, 0, 1, 2, 3)
#' gene2 <- c(5, 5, 3, 2, 0, 0, 0)
#' gene3 <- c(2, 0, 2, 1, 3, 0, 1)
#' gene4 <- c(3, 3, 3, 3, 3, 3, 3)
#' gene5 <- c(0, 0, 0, 0, 5, 0, 0)
#' gene_counts <- matrix(c(gene1, gene2, gene3, gene4, gene5), ncol = 5)
#' rownames(gene_counts) <- paste0("cell", 1:7)
#' colnames(gene_counts) <- paste0("gene", 1:5)
#'
#' # Calculating Heterogeneity For Each Gene
#' gene_het(gene1)
#' gene_het(gene2)
#' gene_het(gene3)
#' gene_het(gene4)
#' gene_het(gene5)
#'
#' # Calculating Heterogeneity For a Matrix
#' gene_het(gene_counts)
gene_het <- function(expr, unit = "log2", normalise = TRUE, transpose = FALSE) {
  # Checking if the expression is a matrix
  if (is.vector(expr)) {
    # Normalising the expression if needed
    if (normalise) {
      expr <- normalise(expr)
    }
    # Generating the uniform distribution vector
    num_cell <- length(expr)
    unif_dist <- rep(1 / num_cell, num_cell)
    # Finding the KL Divergence of the expression distribution from the
    # uniform distribution
    entropy::KL.plugin(expr, unif_dist, unit = unit)
    # Checking if the data is a matrix
  } else if (is.matrix(expr)) {
    # Transposing the matrix if needed
    if (transpose) {
      expr <- t(expr)
    }
    # Normalising the data if needed
    if (normalise) {
      expr <- t(apply(expr, 1, normalise))
    }
    # Generating the uniform distribution vector
    num_cell <- dim(expr)[2]
    unif_dist <- rep(1 / num_cell, num_cell)
    # Finding the KL Divergence of the expression distribution from the
    # uniform distribution for each exoression distribution
    apply(expr, 1, function(x) {
      entropy::KL.plugin(x, unif_dist, unit = unit)
    })
  } else {
    # Throwing an error if the input is not a vector or a matrix
    stop("\n gene_het() should be passed either a matrix or a vector")
  }
}
hwarden162/SCEnt documentation built on Dec. 20, 2021, 5:52 p.m.