Nothing
utils::globalVariables(c("x", "y", "hue", "window", "vals", "type", "scaled"))
#' Computes Pearson's correlation between pairs of columns.
#' If one matrix is provided, the output is the pairwise correlations between its columns.
#' If two matrices are provided, the output is the pairwise correlations between their columns.
#'
#' @param matrix1 The first input matrix.
#' @param matrix2 The second input matrix (optional).
#'
#' @return Returns a correlation matrix.
#'
correlation <- function(matrix1, matrix2 = NULL){
# correlation with columns of one matrix
if (is.null(matrix2)){
output = (t(scale(matrix1)) %*% scale(matrix1)) / (nrow(matrix1)-1)
} else { # correlation with columns of two matrices
output = (t(scale(matrix1)) %*% scale(matrix2)) / (nrow(matrix1)-1)
}
# return output
return(output)
}
#' Calculates the number of elements common between columns of two matrices.
#' This function performs a simple dot product when binarize = FALSE.
#'
#' @param matrix1 The first input matrix.
#' @param matrix2 The second input matrix.
#' @param sparse If TRUE, coerces matrices to sparse format for efficiency.
#' @param binarize If TRUE, converts matrices to binary format to count the number of common elements.
#'
#' @return Returns a column by column matrix, where entries represent the number of common elements.
#'
get_cooccurrence_stats <- function(matrix1, matrix2, sparse = TRUE, binarize = TRUE){
# convert to sparse matrices
if (sparse){
matrix1 = Matrix::Matrix(data = Matrix::as.matrix(matrix1), sparse = TRUE)
matrix2 = Matrix::Matrix(data = Matrix::as.matrix(matrix2), sparse = TRUE)
}
# binarize for count-based calculation
if (binarize){
matrix1[matrix1 > 0] = 1
matrix2[matrix2 > 0] = 1
}
# number of shared elements
cooccur = t(matrix1) %*% matrix2
cells = t(matrix1) %*% matrix1
# return output
output = list(cooccur, cells); names(output) = c("cooccur", "cells")
return(output)
}
#' This function takes x and y coordinates and rotates them counterclockwise
#' by the specified number of degrees, and mean centers the points.
#'
#' @param x The x coordinates.
#' @param y The y coordinates.
#' @param n_degrees The degree of rotation in counterclockwise direction.
#' @param center If TRUE, mean centers the points.
#' @param scale If TRUE, standardizes the points.
#' @param flip_x Reflects the points across y = 0 line.
#' @param flip_y Reflects the points across x = 0 line.
#'
#' @return Returns a data frame containing the new x and y coordinates.
#'
#' @examples
#' data("locations")
#' locations = as.data.frame(as.matrix(locations))
#' ggplot2::ggplot(data = locations) + ggplot2::aes(x = pos_x, y = pos_y) +
#' ggplot2::geom_point(size = 0) + ggplot2::theme_classic()
#'
#' locations = rotate_coordinates(x = locations$pos_x, y = locations$pos_y, n_degrees = 45)
#' ggplot2::ggplot(data = locations) + ggplot2::aes(x = pos_x, y = pos_y) +
#' ggplot2::geom_point(size = 0) + ggplot2::theme_classic()
#'
#' @export
rotate_coordinates <- function(x, y, n_degrees = 0, center = FALSE, scale = FALSE, flip_x = FALSE, flip_y = FALSE) {
# convert degrees to radians
theta <- n_degrees * (pi / 180)
# apply rotation matrix to x and y coordinates
x_rotated <- (cos(theta) * x) - (sin(theta) * y)
y_rotated <- (sin(theta) * x) + (cos(theta) * y)
# center and/or scales the coordinates
x_rotated <- scale(x = x_rotated, center = center, scale = scale)
y_rotated <- scale(x = y_rotated, center = center, scale = scale)
# flip x or y coordinates
if (flip_x){
x_rotated <- -1 * x_rotated
}
if (flip_y){
y_rotated <- -1 * y_rotated
}
# return output
output <- data.frame(pos_x = x_rotated, pos_y = y_rotated)
return(output)
}
#' Computes cross-expression and co-expression p-values between all gene pairs.
#'
#' @param data A cells by genes expression matrix.
#' @param locations A cells by coordinates (x-y or higher dimensions) matrix.
#' @param neighbor The n-th nearest neighbor for calculating cross-expression.
#' @param alpha_cross The significance threshold for cross-expression.
#' @param alpha_co The significance threshold for co-expression.
#' @param output_matrix If TRUE, outputs the cross-expression p-value matrix.
#'
#' @return Returns a list containing gene pairs with co-expression and cross-expression p-values
#' before and after false discovery rate (FDR) correction.
#'
#' @examples
#' data("locations")
#' data("expression")
#' locations = as.matrix(locations)
#' expression = as.matrix(expression)
#' expression = expression[,1:5]
#' results = cross_expression(data = expression, locations = locations)
#'
#' @export
cross_expression <- function(data, locations, neighbor = 1, alpha_cross = 0.05, alpha_co = 0, output_matrix = FALSE){
# binarize data
data[data > 0] = 1
# find nth nearest neighbors and create neighbors x genes matrix
distances = RANN::nn2(locations, locations, k = neighbor + 1, searchtype = "standard", treetype = "kd")
distances = distances$nn.idx[, neighbor + 1]
data_temp = data[distances,]
# number of shared genes between cell-neighbor pairs (cross-expression) and within the same cells (co-expression)
hyper_matrices = get_cooccurrence_stats(data, data_temp, sparse = FALSE, binarize = FALSE)
cell_total = nrow(data)
# re-order matrices in ascending order by gene name
names_seq = colnames(data)
data = data[,order(colnames(data))]
data_temp = data_temp[,order(colnames(data_temp))]
# co-expression p-values
hyper_matrices$cells = hyper_matrices$cells[order(rownames(hyper_matrices$cells)), order(colnames(hyper_matrices$cells))]
m = diag(hyper_matrices$cells)
k = rep(m, each = nrow(hyper_matrices$cells))
m = rep(m, times = nrow(hyper_matrices$cells))
n = cell_total - m
q = as.vector(hyper_matrices$cells) - 1
names(q) = colnames(hyper_matrices$cells)
pval = stats::phyper(q = q, m = m, n = n, k = k, lower.tail = FALSE)
names(pval) = NULL
pval = stats::p.adjust(pval, method = "BH")
co_expression = matrix(pval, nrow = nrow(hyper_matrices$cells), byrow = TRUE, dimnames = dimnames(hyper_matrices$cells))
name_order = match(names_seq, colnames(co_expression))
co_expression = co_expression[name_order, name_order]
# cross-expression p-values
hyper_matrices$cooccur = hyper_matrices$cooccur[order(rownames(hyper_matrices$cooccur)), order(colnames(hyper_matrices$cooccur))]
q = t(data * (1 - data_temp)) %*% ((1 - data) * data_temp)
coexp = as.vector(hyper_matrices$cells)
k = diag(hyper_matrices$cells)
k = rep(k, each = nrow(hyper_matrices$cells))
k = k - coexp
neig = t(data_temp) %*% data_temp
m = diag(neig)
m = rep(m, times = nrow(hyper_matrices$cells))
m = m - as.vector(neig)
n = cell_total - m
q = t(q)
q = as.vector(q) - 1
names(q) = rep(rownames(hyper_matrices$cells), times = nrow(hyper_matrices$cells))
pval = stats::phyper(q = q, m = m, n = n, k = k, lower.tail = FALSE)
pvalx= pval # w/o FDR correction
pval = stats::p.adjust(pval, method = "BH")
cross_expression = matrix(pval, nrow = nrow(hyper_matrices$cells), byrow = TRUE, dimnames = dimnames(hyper_matrices$cells))
cross_expression = cross_expression[name_order, name_order]
cross_x = matrix(pvalx, nrow = nrow(hyper_matrices$cells), byrow = TRUE, dimnames = dimnames(hyper_matrices$cells))
cross_x = cross_x[name_order, name_order]
# cross-expression is significant if A-B or B-A direction is significant
cross_expression = Rfast::Pmin(cross_expression, t(cross_expression))
dimnames(cross_expression) = dimnames(co_expression)
cross_x = Rfast::Pmin(cross_x, t(cross_x))
dimnames(cross_x) = dimnames(co_expression)
# vectorized versions of co-expression and cross-expression p-values
co_expr = Rfast::upper_tri(co_expression)
cross_expr = Rfast::upper_tri(cross_expression)
cross_x = Rfast::upper_tri(cross_x)
# co-expressing gene pairs are considered to not cross-express
cross_expr[co_expr <= alpha_co] = 1
cross_x[co_expr <= alpha_co] = 1
# assemble output with gene pairs and corresponding p-values for co-expression and cross-expression
output = data.frame(which(upper.tri(cross_expression), arr.ind = TRUE))
output = data.frame(gene1 = colnames(cross_expression)[output$row],
gene2 = colnames(cross_expression)[output$col],
co_pvalue = co_expr,
cross_pvalue = cross_x,
cross_padj = cross_expr,
cross_sig = as.integer(cross_expr <= alpha_cross))
# assemble output as a gene-gene matrix of cross-expression p-values
if (output_matrix){
# output matrices with and w/o FDR correction
mat_out = vector(mode = "list", length = 3)
names(mat_out) = c("Cross_without_FDR","Cross_with_FDR","Co_with_FDR")
# cross without FDR
output_mat = matrix(0, nrow = nrow(co_expression), ncol = ncol(co_expression))
dimnames(output_mat) = dimnames(co_expression)
output_mat[upper.tri(output_mat)] = output$cross_pvalue
output_mat = output_mat + t(output_mat)
diag(output_mat) = 1
mat_out[[1]] = output_mat
# cross with FDR
output_mat = matrix(0, nrow = nrow(co_expression), ncol = ncol(co_expression))
dimnames(output_mat) = dimnames(co_expression)
output_mat[upper.tri(output_mat)] = output$cross_padj
output_mat = output_mat + t(output_mat)
diag(output_mat) = 1
mat_out[[2]] = output_mat
# cross with FDR
output_mat = matrix(0, nrow = nrow(co_expression), ncol = ncol(co_expression))
dimnames(output_mat) = dimnames(co_expression)
output_mat[upper.tri(output_mat)] = output$co_pvalue
output_mat = output_mat + t(output_mat)
diag(output_mat) = 1
mat_out[[3]] = output_mat
# return output
return(mat_out)
}
# return output
return(output)
}
#' Computes gene-gene correlations between cross-expressing cell-neighbor pairs.
#' Cell and neighbor masks are used to consider mutually exclusive expression per gene pair.
#'
#' @param data A cells by genes expression matrix.
#' @param locations A cells by coordinates (x-y or higher dimensions) matrix.
#' @param neighbor The n-th nearest neighbor for computing correlations.
#' @param output_matrix If TRUE, outputs the cross-expression correlation matrix.
#'
#' @return Returns a gene list with cross-expression correlation for each gene pair.
#'
#' @examples
#' data("locations")
#' data("expression")
#' locations = as.matrix(locations)
#' expression = as.matrix(expression)
#' expression = expression[,1:5]
#' results = cross_expression_correlation(data = expression, locations = locations)
#'
#' @export
cross_expression_correlation <- function(data, locations, neighbor = 1, output_matrix = FALSE){
# find nth nearest neighbors and create neighbors x genes matrix
neighbor = neighbor + 1
distances = RANN::nn2(locations, locations, k = neighbor, searchtype = "standard", treetype = "kd")
distances = distances$nn.idx[,neighbor]
data_temp = data[distances,]
# create masks for mutually exclusive gene expression
mask_data = data
mask_data[mask_data > 0] = 1
mask_data_temp = data_temp
mask_data_temp[mask_data_temp > 0] = 1
X = mask_data * (1 - mask_data_temp)
Y = mask_data_temp * (1 - mask_data)
# keep mutually exclusive gene pairs
X = X * data
Y = Y * data_temp
# gene pair correlations between cells and their neighbors
corr = correlation(X, Y)
corr = (corr + t(corr)) / 2
# return correlations if matrix form requested
if (output_matrix){
return(corr)
}
# return correlations as an edge list
ids = which(upper.tri(corr), arr.ind = TRUE)
corr = data.frame(gene1 = rownames(corr)[ids[,1]], gene2 = colnames(corr)[ids[,2]], correlation = Rfast::upper_tri(corr))
if (!output_matrix){
return(corr)
}
}
#' Smooths cells' gene expression by averaging its expression by the nearest neighbors.
#' Optionally computes genes by genes Pearson's correlation matrix between cells by genes
#' and neighbors by genes matrices.
#'
#' @param data A cells by genes expression matrix.
#' @param locations A cells by coordinates (x-y or higher dimensions) matrix.
#' @param neighbors_smoothed Numbers of neighbors used for smoothing (0 is the cell itself; 1 is the nearest neighbor).
#' @param corr If TRUE, computes genes by genes correlation matrix between regions.
#'
#' @return Returns a smoothed gene expression matrix. If corr = TRUE, additionally returns a gene-gene correlation matrix.
#'
#' @examples
#' data("locations")
#' data("expression")
#' locations = as.matrix(locations)
#' expression = as.matrix(expression)
#' expression = expression[,1:5]
#' results = smooth_cells(data = expression, locations = locations)
#'
#' @export
smooth_cells <- function(data, locations, neighbors_smoothed = 1, corr = TRUE){
# convolution kernel (0 is cell itself; 1 is first neighbor)
neighbors_smoothed = neighbors_smoothed + 2
# include nearest n cells (incl. cell itself) and exclude others
distances = RANN::nn2(locations, locations, k = neighbors_smoothed, searchtype = "standard", treetype = "kd")
distances = distances$nn.idx
neighbor_regions = distances[,ncol(distances)]
distances = distances[,ncol(distances) - 1]
cellbycell = Matrix::Matrix(0, nrow = nrow(locations), ncol = nrow(locations), doDiag = FALSE)
# insert 1's for nearest neighbors (incl. cell itself) but 0's otherwise
ids = cbind(rep(1:nrow(locations), neighbors_smoothed),
as.vector(distances))
cellbycell[ids] = 1
# smooth gene expression
data_smooth = cellbycell %*% data
data_smooth = data_smooth / neighbors_smoothed
# gene x gene correlation between smoothed cells x genes matrix and smoothed neighbors x genes matrix
# 'neighbors' are regions at n + 1 cells
if (corr){
corr = data_smooth[neighbor_regions,]
corr = correlation(matrix1 = data_smooth, matrix2 = corr)
# output smoothed expression matrix and correlation matrix
output = list(data_smooth, corr); names(output) = c("smooth_expression", "smooth_correlation")
# return output
return(output)
}
# return output
return(data_smooth)
}
#' Calculates bullseye statistics for ALL gene pairs.
#' Counts the number of cells with gene B in those with gene A
#' And in their neighbors
#' Neighbor scores are cumulative and normalized by window size.
#'
#' @param data A cells by genes expression matrix.
#' @param locations A cells by coordinates (x-y or higher dimensions) matrix.
#' @param window_sizes Vector of window sizes.
#' @param ratio_to_co If TRUE, results are normalized by co-expressing cells.
#' @param log_2 If TRUE, results are transformed by log2.
#' @param output_matrix If TRUE, results are provided in matrix form.
#'
#' @return Returns a dataframe with each row representing a gene pair and each column representing
#' the bullseye score for that pair in each window size.
#' output_matrix = TRUE presents a matrix for each window size.
#'
#' @examples
#' data("locations")
#' data("expression")
#' locations = as.matrix(locations)
#' expression = as.matrix(expression)
#' expression = expression[,1:5]
#' results = bullseye_scores(data = expression, locations = locations)
#'
#' @export
bullseye_scores <- function(data, locations, window_sizes = 1:5, ratio_to_co = FALSE, log_2 = FALSE, output_matrix = FALSE){
# binarize data
data[data > 0] = 1
# find neighbors of each cell per window
distances = RANN::nn2(locations, locations, k = max(window_sizes) + 1, searchtype = "standard", treetype = "kd")
distances = distances$nn.idx
# storage across window sizes
outcome = matrix(data = 0, ncol = length(c(0,window_sizes)), nrow = choose(ncol(data),2) * 2)
colnames(outcome) = c("Cell", stringr::str_c("Neig = ", window_sizes))
# compute scores
for (i in 2:ncol(outcome)){
hits = t(data) %*% data[distances[,i],]
outcome[,i] = c(hits[upper.tri(hits)], t(hits)[upper.tri(hits)])
}
# cumulative sums and normalize by window size
outcome = t(apply(outcome, 1, cumsum))
outcome = sweep(outcome, 2, c(1,window_sizes), FUN = "/")
# add co-expression
cells = t(data) %*% data[distances[,1],]
outcome[,1] = c(cells[upper.tri(cells)], t(cells)[upper.tri(cells)])
# if ratio_to_co = TRUE, transform results as ratio
if (ratio_to_co){
outcome = sweep(outcome + 1, 1, outcome[,"Cell"] + 1, FUN = "/")
}
# if log_2 = TRUE, transform results as log2
if (log_2){
if (ratio_to_co) {outcome = log2(outcome)}
if (!ratio_to_co){outcome = log2(outcome + 1)}
}
# append gene names
upper_indices = which(upper.tri(cells), arr.ind = TRUE)
row = rownames(cells)[upper_indices[, 1]]
col = rownames(cells)[upper_indices[, 2]]
row1 = c(row, col)
col1 = c(col, row)
outcome = data.frame(gene1 = row1, gene2 = col1, outcome)
colnames(outcome)[4:ncol(outcome)] = stringr::str_c("Neig = ", window_sizes)
# if output_matrix = TRUE, then return outcome as matrix
if (output_matrix){
# storage list
outcome_matrix = vector(mode = "list", length = length(c(1,window_sizes)))
names(outcome_matrix) = colnames(outcome)[3:ncol(outcome)]
# indices
reconstructed_data = matrix(data = 0, nrow = ncol(data), ncol = ncol(data))
upper_triangle_indices = which(upper.tri(reconstructed_data), arr.ind = TRUE)
lower_triangle_indices = which(upper.tri(t(reconstructed_data)), arr.ind = TRUE)
# create matrices
for (i in 3:ncol(outcome)){
reconstructed_data[upper_triangle_indices] = outcome[,i][1:length(upper_triangle_indices[,1])]
reconstructed_data = t(reconstructed_data)
reconstructed_data[lower_triangle_indices] = outcome[,i][(length(upper_triangle_indices[,1]) + 1):nrow(outcome)]
reconstructed_data = t(reconstructed_data)
rownames(reconstructed_data) = colnames(data); colnames(reconstructed_data) = colnames(data)
outcome_matrix[[i-2]] = reconstructed_data
}
# return outcome matrix
return(outcome_matrix)
}
# return outcome
return(outcome)
}
#' Outputs a circular bullseye plot for a gene pair.
#' The central circle is gene B in cells expressing gene A.
#' Rings indicate neighbors with gene B, where the first ring is the first neighbor.
#'
#' @param scores Bullseye score as a vector.
#'
#' @return Returns a circular bullseye plot.
#'
#' @examples
#' data("locations")
#' data("expression")
#' locations = as.matrix(locations)
#' expression = as.matrix(expression)
#' expression = expression[,1:5]
#' results = bullseye_scores(data = expression, locations = locations)
#' results = as.numeric(results[1, 3:ncol(results)]) # choose gene pair of interest (row index)
#' bullseye_plot(results)
#'
#' @export
bullseye_plot <- function(scores){
# initialization
vec_values = as.numeric(scores)
window_sizes = 1:length(vec_values)
radius_ratio = 1
# initial radius, ratio for subsequent circles, and no. of circles
initial_radius = 1/length(window_sizes)
n_circles = length(window_sizes)
# calculate the radii
radii = vector(mode = "numeric", length = length(window_sizes))
radii[1] = initial_radius
for (i in 2:n_circles) {radii[i] = radii[i-1] + initial_radius * radius_ratio^(i-1)}
# dataframe for coordinates and hues of each circle
df_list = list()
for (i in 1:length(radii)) {
theta = seq(0, 2 * pi, length.out = 100)
if (i > 1) {inner_radius = radii[i - 1]} else {inner_radius = 0}
outer_radius = radii[i]
df_circle = data.frame(x = c(cos(theta) * inner_radius, rev(cos(theta) * outer_radius)),
y = c(sin(theta) * inner_radius, rev(sin(theta) * outer_radius)),
id = rep(i, length(theta) * 2), vec_value = rep(vec_values[i], length(theta) * 2))
df_list[[i]] = df_circle
}
df = dplyr::bind_rows(df_list); colnames(df) = c("x","y","window","hue")
# create bullseye plot
p = ggplot2::ggplot(df, ggplot2::aes(x = x, y = y, fill = hue)) +
ggplot2::geom_polygon(ggplot2::aes(group = window)) + ggplot2::coord_fixed(ratio = 1) +
ggplot2::scale_fill_gradient(low = "lightblue", high = "#08306B") +
ggplot2::labs(x = "", y = "", fill = "") +
ggplot2::theme_classic() +
ggplot2::theme(axis.line = ggplot2::element_blank(),
axis.text = ggplot2::element_blank(),
axis.ticks = ggplot2::element_blank(),
legend.text = ggplot2::element_text(size = 10))
# return plot
return(p)
}
#' Determines whether the supplied genes show spatial enrichment in cross-expression.
#' Spatial enrichment can be interpreted as delineating anatomical boundaries.
#'
#' @param data A cells by genes expression matrix.
#' @param locations A cells by coordinates (x-y or higher dimensions) matrix.
#' @param gene1 Name of gene1.
#' @param gene2 Name of gene2.
#' @param neighbor The n-th nearest neighbor.
#' @param max_pairs Specify maximum number of cell pairs to consider. Lower number increases computational efficiency.
#'
#' @return Returns a p-value and distance distributions between cross-expressing cells and cross-expressing and random cells.
#'
#' @examples
#' data("locations")
#' data("expression")
#' locations = as.matrix(locations)
#' expression = as.matrix(expression)
#' expression = expression[,1:5]
#' results = spatial_enrichment(data = expression, locations = locations,
#' gene1 = "Calb1", gene2 = "Rasgrf2", max_pairs = 100)
#'
#' @export
spatial_enrichment <- function(data, locations, gene1, gene2, neighbor = 1, max_pairs = 20000){
# subset genes
data = data[,c(gene1,gene2)]
data[data > 0] = 1
# find nth nearest neighbors and create neighbors x genes matrix
neighbor = neighbor + 1
distances = RANN::nn2(locations, locations, k = neighbor, searchtype = "standard", treetype = "kd")
distances = distances$nn.idx[,neighbor]
data_temp = data[distances,]
# create masks for mutually exclusive gene expression
mask_data = data
mask_data_temp = data_temp
X = mask_data * (1 - mask_data_temp)
Y = (1 - mask_data) * mask_data_temp
# keep mutually exclusive gene pairs
X = X * data
Y = Y * data_temp
# average using locations of neighboring cells and append cross-expressing pairs
locations = (locations + locations[distances,]) / 2
locations = data.frame(locations, cross = as.numeric((X[,1] > 0 & Y[,2] > 0) | (X[,2] > 0 & Y[,1] > 0)))
# subset by cross-expressing and random cells
locations1 = locations[locations$cross == 1,]; locations1 = locations1[,c(1,2)]
locations1 = locations1[sample(1:nrow(locations1), size = min(c(max_pairs, nrow(locations1))), replace = FALSE),]
locations0 = locations[locations$cross == 0,]; locations0 = locations0[,c(1,2)]
locations0 = locations0[sample(1:nrow(locations0), size = nrow(locations1), replace = FALSE),]
# compute distances
dist1 = Rfast::Dist(locations1)
dist1 = Rfast::upper_tri(dist1)
dist0 = rbind(locations1, locations0)
dist0 = Rfast::Dist(dist0)
dist0 = dist0[1:nrow(locations1),(nrow(locations0)+1):ncol(dist0)]
dist0 = as.numeric(dist0)
# compute p-value and return p-value plus two distance distributions
pval = stats::wilcox.test(x = scale(dist1), y = scale(dist0), alternative = "less")
pval = pval$p.value
# make plot comparing distances
target = dist1
null = dist0
pp = data.frame(vals = c(target, null), type = rep(c("Cross-expressing", "Random"), times = c(length(target), length(null))))
pp$type = factor(pp$type, levels = c("Random","Cross-expressing"))
pp = ggplot2::ggplot(pp) + ggplot2::aes(x = vals, fill = type, y = ggplot2::after_stat(scaled)) +
ggplot2::geom_density(alpha = 0.8) +
ggplot2::labs(x = "Distance to cells", y = "Density", fill = "") + ggplot2::theme_classic() +
ggplot2::guides(fill = ggplot2::guide_legend(reverse = TRUE)) +
ggplot2::scale_fill_manual(values = c("Cross-expressing" = "lightblue", "Random" = "gray88")) +
ggplot2::theme(legend.position = "top",
axis.text = ggplot2::element_text(size = 10),
axis.title = ggplot2::element_text(size = 12),
legend.text = ggplot2::element_text(size = 12))
# return p-value, two distance distributions, and plot
pval = list(pvalue = pval, target = dist1, null = dist0, plot = pp)
return(pval)
}
#' Plots gene expression and cross-expression on tissue by coloring cells.
#'
#' @param data A cells by genes expression matrix.
#' @param locations A cells by coordinates (x-y or higher dimensions) matrix.
#' @param gene1 Name of gene 1.
#' @param gene2 Name of gene 2.
#' @param cross_expression If TRUE, only cross-expressing cell pairs are shown.
#' @param neighbor The nearest neighbor (for cross-expression).
#' @param point_size Point size on the scatter plot.
#' @param scale_bar Length of the scale bad in microns.
#'
#' @return Returns a plot with cells shown as points and color indicating the genes it expresses.
#'
#' @examples
#' data("locations")
#' data("expression")
#' locations = as.matrix(locations)
#' expression = as.matrix(expression)
#' expression = expression[,1:5]
#' tissue_expression_plot(data = expression, locations = locations, gene1 = "Calb1", gene2 = "Rasgrf2")
#'
#' @export
tissue_expression_plot <- function(data, locations, gene1, gene2, cross_expression = TRUE, neighbor = 1, point_size = 0, scale_bar = 0){
# convert locations to dataframe
locations = as.data.frame(locations)
# subset and binarize data
data = data[,c(gene1,gene2)]
gene_names = colnames(data)
colnames(data) = c("gene1","gene2")
data[data > 0] = 1
colnames(locations) = c("x","y")
# scale bar
x_start = as.numeric(stats::quantile(locations$x, probs = 0.95))
x_end = x_start + scale_bar
y_start = as.numeric(stats::quantile(locations$y, probs = 0))
y_end = y_start
# plot without cross-expression
if (!cross_expression){
# type of cell by gene expression
type = vector(mode = "character", length = nrow(data))
type[data[,1] == 1] = "Gene1"
type[data[,2] == 1] = "Gene2"
type[(data[,1] == 1) & (data[,2] == 1)] = "Both"
type[(data[,1] == 0) & (data[,2] == 0)] = "Neither"
# plot cells and color by type
locations = data.frame(locations, type)
locations$type = factor(locations$type, levels = c("Neither", "Both", "Gene2", "Gene1"))
locations = locations[order(locations$type), ]
p = ggplot2::ggplot(locations) + ggplot2::aes(x = x, y = y, color = type) +
ggplot2::geom_point(size = point_size) +
ggplot2::scale_color_manual(values = c("Neither" = "gray88", "Both" = "chartreuse3", "Gene2" = "deepskyblue4", "Gene1" = "brown3"),
labels = c("Neither" = "Neither", "Both" = "Both", "Gene2" = gene_names[2], "Gene1" = gene_names[1])) +
ggplot2::labs(x = "x coordinates", y = "y coordinates", color = "") +
ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 2), reverse = TRUE)) +
ggplot2::annotate("segment", x = x_start, xend = x_end, y = y_start, yend = y_end, linewidth = 1, color = "black") +
ggplot2::theme_classic() +
ggplot2::theme(axis.text = ggplot2::element_text(size = 10),
axis.title = ggplot2::element_text(size = 12),
legend.text = ggplot2::element_text(size = 10))
# return plot
return(p)
}
# genes' cell-neighbor pairs
neighbor = neighbor + 1
distances = RANN::nn2(locations, locations, k = neighbor, searchtype = "standard", treetype = "kd")
distances = distances$nn.idx[,neighbor]
df_neig = data[distances,]
cells = rownames(data)
neigs = rownames(data)[distances]
# cell-neighbor pairs to light up
pair1 = data[,1] > 0 & data[,2] == 0 & df_neig[,2] > 0 & df_neig[,1] == 0
pair2 = data[,2] > 0 & data[,1] == 0 & df_neig[,1] > 0 & df_neig[,2] == 0
# cells lit up with genes 1 or 2
gene1 = unique(c(cells[pair1], neigs[pair2]))
gene2 = unique(c(cells[pair2], neigs[pair1]))
# vector for lighting up cells with genes 1 or 2
light = vector(mode = "character", length = nrow(data))
light[rownames(data) %in% gene1] = "Gene1"
light[rownames(data) %in% gene2] = "Gene2"
light[light == ""] = "Neither"
# process for plotting
meta = data.frame(locations, light)
meta$light = factor(meta$light)
meta_neither = meta[meta$light == "Neither", ]
meta_others = meta[meta$light != "Neither", ]
meta_reordered = rbind(meta_neither, meta_others)
# plot cross-expression
p = ggplot2::ggplot(meta_reordered) + ggplot2::aes(x = x, y = y, color = light) +
ggplot2::geom_point(size = point_size) +
ggplot2::scale_color_manual(values = c("Neither" = "gray88", "Both" = "chartreuse3", "Gene2" = "deepskyblue4", "Gene1" = "brown3"),
labels = c("Neither" = "Neither", "Both" = "Both", "Gene2" = gene_names[2], "Gene1" = gene_names[1])) +
ggplot2::labs(x = "x coordinates", y = "y coordinates", color = "") +
ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 2))) +
ggplot2::annotate("segment", x = x_start, xend = x_end, y = y_start, yend = y_end, linewidth = 1, color = "black") +
ggplot2::theme_classic() +
ggplot2::theme(axis.text = ggplot2::element_text(size = 10),
axis.title = ggplot2::element_text(size = 12),
legend.text = ggplot2::element_text(size = 10))
# return plot
return(p)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.