#' Calculate the Jensen-Shannon divergence.
#'
#' Given two probability distributions, calculate the Jensen-Shannon (JS)
#' divergence between them.
#'
#' @param p,q Numerical vectors representing the probability distributions.
#' @return A numeric for the JS divergence between the vectors.
#' @seealso \code{\link{generate2dJsDivergenceDataFrame}}
#' @export
calculateJsDivergence <- function(p, q) {
# Verify that p and q are probability distributions.
if (!isTRUE(all.equal(sum(p), 1)) ||
!isTRUE(all.equal(sum(q), 1)) ||
any(p < 0) || any(q < 0)) {
stop("p and q should be probability distributions")
}
# Calculate the JS divergence.
m <- 0.5 * (p + q)
0.5 * (sum(p * log(p / m)) + sum(q * log(q / m)))
}
#' Calculate the probability distribution of a matrix.
#'
#' Calculate the two-dimensional density of a matrix using kernel density
#' estimation (KDE), convert it into a vector, and normalize it to sum 1.
#'
#' @param mtx Numeric matrix for which to calculate the probability
#' distribution.
#' @param n Number of grid points for KDE.
#' @param lims The limits of the rectangle covered by the KDE.
#' @return A one-dimensional numeric vector for the probability density.
#' @importFrom MASS kde2d
.calculateMatrixKde <- function(mtx, n = 25, lims = NULL) {
if (is.null(lims)) {
stop("Kernel density estimation limits parameter is missing")
}
density <- kde2d(mtx[, 1], mtx[, 2], n = n, lims = lims)
density_vector <- as.vector(density$z)
density_vector / sum(density_vector)
}
#' Jensen-Shannon divergence between two-dimensional maps.
#'
#' Calculate the Jensen-Shannon (JS) divergence between two two-dimensional maps.
#' These maps may be generated by a dimensionality reduction (DR) method; however
#' this is not a requirement.
#'
#' @param x,y Two-dimensional numeric matrices to compare.
#' @inheritParams .calculateMatrixKde
#' @return A numeric for the JS divergence between the maps.
#' @seealso \code{\link{calculateJsDivergence}}
#' @export
calculate2dJsDivergence <- function(x, y, n = 2 ^ 8) {
# Use range over combined data as KDE limits.
lims <- c(range(c(x[, 1], y[, 1])), range(c(x[, 2], y[, 2])))
# Convert matrices to probability distributions.
x_prob <- .calculateMatrixKde(x, n, lims)
y_prob <- .calculateMatrixKde(y, n, lims)
calculateJsDivergence(x_prob, y_prob)
}
#' Two-dimensional maps for Jensen-Shannon divergence calculation.
#'
#' Create a list with keys pointing to filenames and values pointing to 2-D
#' matrices that were generated by extracting columns of interest from the files.
#' @inheritParams generate2dJsDivergenceDataFrame
#' @import flowCore
.generate2dMaps <- function(source_filepaths, two_d_map_column_names) {
two_d_maps <- list()
for(file_path in source_filepaths) {
flow_frame <- flowCore::read.FCS(file_path)
map <- flow_frame@exprs[, two_d_map_column_names]
filename <- basename(file_path)
two_d_maps[[filename]] <- map
}
two_d_maps
}
#' Jensen-Shannon divergence between multiple two-dimensional maps.
#'
#' Calculate the Jensen-Shannon (JS) divergence between two or more two-dimensional
#' maps and generate a data frame displaying the divergence value for each possible
#' pair of two-dimensional maps. The two-dimensional maps are generated from
#' the source files and columns of interest (two_d_map_column_names). The maps
#' may be generated by a dimensionality reduction (DR) method; however this is
#' not a requirement.
#'
#' @param source_filepaths A vector containing the file paths for the data of interest.
#' @param two_d_map_column_names A vector containing the column names of interest. Defaults to Cytobank's default tSNE column names.
#' @return A data frame that includes a numeric for the JS divergence between each
#' possible combination of generated 2-D maps.
#' @seealso \code{\link{calculateJsDivergence}}
#' @inheritParams .calculateMatrixKde
#' @export
generate2dJsDivergenceDataFrame <- function(source_filepaths,
two_d_map_column_names = c("tSNE1", "tSNE2"),
n = 2 ^ 8) {
if (length(two_d_map_column_names) != 2) {
stop("We must generate two-dimensional numeric matrices with two columns.")
}
two_d_maps <- .generate2dMaps(source_filepaths, two_d_map_column_names)
filenames <- names(two_d_maps)
filename_pairs <- combn(filenames, 2)
num_pairs <- length(filename_pairs) / 2
pair_divergences <- data.frame(matrix(ncol=3, nrow=0))
colnames(pair_divergences) <- c('file1', 'file2', 'divergence_value')
for (num in 1:num_pairs) {
current_pair = filename_pairs[, num]
filename_1 = current_pair[1]
filename_2 = current_pair[2]
df_1 = as.data.frame(two_d_maps[filename_1])
df_2 = as.data.frame(two_d_maps[filename_2])
divergence_value <- calculate2dJsDivergence(df_1, df_2, n)
new_row <- data.frame(file1 = filename_1,
file2 = filename_2,
divergence_value = divergence_value)
pair_divergences <- rbind(pair_divergences, new_row)
}
pair_divergences
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.