#' Build and plot dendrogram from gene panel
#'
#' Build and plot a dendrogram using correlation-based average linkage hierarchical
#' clustering and only using a specified set of genes. The output is the expected
#' accuracy of mapping to each node in the tree, which gives an idea of the best-case
#' expected results for mFISH analysis.
#'
#' @param dend dendrogram for mapping. Ignored if medianDat is passed
#' @param refDat normalized data of the REFERENCE data set. Ignored if medianDat is passed
#' @param mapDat normalized data of the MAPPING data set. Default is to map the data onto itself.
#' @param medianDat representative value for each leaf and node. If not entered, it is calculated
#' @param requiredGenes minimum number of genes required to be expressed in a cluster (column
#' of medianDat) for the cluster to be included (default=2)
#' @param clusters cluster calls for each cell
#' @param mappedAsReference if TRUE, returns the fraction of cells mapped to a node which are
#' were orginally clustered from that node; if FALSE (default) returns the fraction of cells
#' clustered under a node which are mapped to the correct node.
#' @param genesToMap which genes to include in the correlation mapping
#' @param plotdendro should the dendrogram be plotted (default = TRUE)
#' @param returnDendro should the dendrogram be returned (default = TRUE)
#' @param mar margins (for use with par)
#' @param main,ylab add title and labels to plot (default is NULL)
#' @param use,... additional parameters for cor
#'
#' @return a list where the first entry is the resulting tree and the second entry is the
#' fraction of cells correctly mapping to each node using the inputted gene panel.
#'
#' @export
buildTreeFromGenePanel <- function(dend = NA,
refDat = NA,
mapDat = refDat,
medianDat = NA,
requiredGenes = 2,
clusters = NA,
mappedAsReference = FALSE,
genesToMap = rownames(mapDat),
plotdendro = TRUE,
returndendro = TRUE,
mar = c(12, 5, 5, 5),
main = NULL,
ylab = NULL,
use = "p",
...) {
library(dendextend)
# Calculate the median, if needed.
if (is.na(medianDat[1])) {
names(clusters) <- colnames(refDat)
medianDat <- do.call("cbind", tapply(
names(clusters), clusters, function(x) rowMedians(refDat[, x])
))
rownames(medianDat) <- rownames(refDat)
if (!is.na(dend)) medianDat <- leafToNodeMedians(dend, medianDat)
}
gns <- intersect(genesToMap, intersect(rownames(mapDat), rownames(medianDat)))
# Subset the data to relevant genes and clusters
medianDat <- medianDat[gns, ]
mapDat <- mapDat[gns, ]
medianDat <- medianDat[, colSums(medianDat > 0) >= requiredGenes]
kpDat <- (colSums(mapDat > 0) >= requiredGenes) & (is.element(clusters, colnames(medianDat)))
mapDat <- mapDat[, kpDat]
# Perform the correlation mapping
facsCor <- corTreeMapping(medianDat = medianDat, mapDat = mapDat, use = use, ...)
facsCl <- colnames(facsCor)[apply(facsCor, 1, which.max)]
# Build a new tree based on mapping
sCore <- function(x, use, ...) return(as.dist(1 - WGCNA::cor(x, use = use, ...)))
dend <- getDend(medianDat, sCore, use = use, ...)
# Which leaves have which nodes?
has_any_labels <- function(sub_dend, the_labels) any(labels(sub_dend) %in% the_labels)
node_labels <- NULL
for (lab in labels(dend)) node_labels <- cbind(
node_labels, noded_with_condition(dend, has_any_labels, the_labels = lab)
)
rownames(node_labels) <- get_nodes_attr(dend, "label")
colnames(node_labels) <- labels(dend)
# Swap the mapped and reference nodes if
# mappedAsReference=TRUE
clTmp <- as.character(clusters[kpDat])
if (mappedAsReference) {
temp <- clTmp
clTmp <- facsCl
facsCl <- temp
}
# Which clusters agree at the node level?
agreeNodes <- apply(cbind(facsCl, clTmp), 1, function(lab, node_labels) {
rowSums(node_labels[, lab]) == 2
}, node_labels)
colnames(agreeNodes) <- clTmp
# Which clusters are in each nodes?
isInNodes <- t(apply(node_labels, 1, function(node, cl, dend) {
is.element(cl, labels(dend)[node])
}, clTmp, dend))
colnames(isInNodes) <- clTmp
# For each node, plot the fraction of cells that
# match if desired?
fracAgree <- rowSums(agreeNodes) / rowSums(isInNodes)
if (plotdendro) {
par(mar = mar)
dend %>% set("nodes_cex", 0) %>% set("branches_col", "grey") %>% plot()
text(get_nodes_xy(dend)[, 1], get_nodes_xy(dend)[, 2], round(fracAgree * 100))
title(main = main, ylab = ylab)
}
# Return the results (if desired)
if (returndendro) return(list(dend, fracAgree))
}
#' Build a dendrogram from gene panel
#'
#' Build a dendrogram from an inputted data matrix.
#'
#' @param dat matrix of values (e.g., genes x clusters) for calculating the dendrogram
#' @param distFun function for calculating distance matrix (default is correlation-based)
#' @param ... additional variables for distFun
#'
#' @return dendrogram
#'
#' @export
getDend <- function(dat,
distFun = function(x) return(as.dist(1 - WGCNA::cor(x))),
...) {
distCor <- distFun(dat, ...)
distCor[is.na(distCor)] <- max(distCor, na.rm = TRUE) * 1.2
# Avoid crashing by setting NA values to hang off the side of the tree.
avgClust <- hclust(distCor, method = "average")
dend <- as.dendrogram(avgClust)
dend <- labelDend(dend)[[1]]
return(dend)
}
#' Label dendrogram nodes
#'
#' Add numeric node labels to a dendrogram.
#'
#' @param dend dendrogram object
#' @param distFun starting numeric node value (default=1)
#'
#' @return a list where the first item is the new dendrogram object and the
#' second item is the final numeric node value.
#'
#' @export
labelDend <- function(dend, n = 1) {
if (is.null(attr(dend, "label"))) {
attr(dend, "label") <- paste0("n", n)
n <- n + 1
}
if (length(dend) > 1) {
for (i in 1:length(dend)) {
tmp <- labelDend(dend[[i]], n)
dend[[i]] <- tmp[[1]]
n <- tmp[[2]]
}
}
return(list(dend, n))
}
#' Summarize matrix
#'
#' Groups columns in a matrix by a specified group vector and summarizes using a specificed function.
#' Optionally binarizes the matrix using a specified cutoff parameter. This is a wrapper for tapply.
#'
#' @param mat matrix where the columns (e.g., samples) are going to be grouped
#' @param group vector of length dim(mat)[2] corresponding to the groups
#' @param scale either 'none' (default),'row', or 'column'
#' @param scaleQuantile what quantile of value should be set as 1 (default=1)
#' @param binarize should the data be binarized? (default=FALSE)
#' @param binMin minimum ON value for the binarized matrix (ignored if binarize=FALSE)
#' @param summaryFunction function (or function name) to be used for summarization
#' @param ... additional parameters for summaryFunction
#'
#' @return matrix of summarized values
#'
#' @export
summarizeMatrix <- function(mat,
group,
scale = "none",
scaleQuantile = 1,
binarize = FALSE,
binMin = 0.5,
summaryFunction = median,
...) {
# Make sure the names match up
if (is.null(colnames(mat))) colnames(mat) <- names(group)
if (is.null(colnames(mat))) colnames(mat) <- 1:length(group)
names(group) <- colnames(mat)
# Calculate the summary
summaryFunction <- match.fun(summaryFunction)
runFunction <- function(x, ...) {
if (length(x) > 1) {
return(apply(mat[, x], 1, summaryFunction, ...))
}
return(mat[, x])
}
summarizedMat <- do.call("cbind", tapply(names(group), group, runFunction, ...))
if (is.factor(group)) summarizedMat <- summarizedMat[, levels(group)]
rownames(summarizedMat) <- rownames(mat)
# Scale the data if desired
if (substr(scale, 1, 1) == "r") {
for (i in 1:dim(summarizedMat)[1])
summarizedMat[i, ] <- summarizedMat[i, ] /
max(1e-06, quantile(summarizedMat[i, ], probs = scaleQuantile))
}
if (substr(scale, 1, 1) == "c") {
for (i in 1:dim(summarizedMat)[2])
summarizedMat[, i] <- summarizedMat[, i] /
max(1e-06, quantile(summarizedMat[, i], probs = scaleQuantile))
}
# Binarize the data if desired
if (binarize) {
summarizedMat <- summarizedMat > binMin
summarizedMat <- summarizedMat + 1 - 1
}
summarizedMat
}
#' Scale mFISH data and map to RNA-seq reference
#'
#' This function is a wrapper for several other functions which aim to scale mFISH data to
#' more closely match RNA-seq data and then map the mFISH data to the closest reference
#' classes. There are several parameters allowing flexability in filtering and analysis.
#'
#' @param mapDat normalized data of the MAPPING data set. Default is to map the data onto itself.
#' @param refSummaryDat normalized summary data of the REFERENCE data set (e.g., what to map against)
#' @param genesToMap which genes to include in the mapping (calculated in not entered)
#' @param mappingFunction which function to use for mapping (default is cellToClusterMapping_byCor)
#' The function must include at least two parameters with the first one being mapped data and the
#' second data the reference. Additional parameters are okay. Output must be a data frame where
#' the first value is a mapped class. Additional columns are okay and will be returned)
#' @param transform function for transformation of the data (default in none)
#' @param noiselevel scalar value at or below which all values are set to 0 (default is 0)
#' @param scaleFunction which function to use for scaling mapDat to refSummaryDat (default is setting
#' 90th quantile of mapDat to max of refSummaryDat and truncating higher mapDat values)
#' @param omitGenes genes to be included in the data frames but excluded from the mapping
#' @param metadata a data frame of possible metadata (additional columns are okay and ignored):
#' \describe{
#' \item{area}{a vector of cell areas for normalization}
#' \item{experiment}{a vector indicating if multiple experiments should be scaled separately}
#' \item{x,y}{x (e.g., parallel to layer) and y (e.g., across cortical layers) coordinates in tissue}
#' }
#' @param integerWeights if not NULL (default) a vector of integers corresponding to how many times
#' each gene should be counted as part of the correlation. This is equivalent to calculating
#' a weighted correlation, but only allows for integer weight values (for use with cor).
#' @param binarize should the data be binarized? (default=FALSE)
#' @param binMin minimum ON value for the binarized matrix (ignored if binarize=FALSE)
#' @param ... additional parameters for passthrough into other functions
#'
#' @return a list with the following entrees:
#' \describe{
#' \item{mapDat}{mapDat data matrix is passed through}
#' \item{scaleDat}{scaled mapDat data matrix}
#' \item{mappingResults}{Results of the mapping and associated confidence values (if any)}
#' \item{metadata=metadata}{metadata is passed through unchanged}
#' \item{scaledX/Y}{scaled x and y coordinates (or unscaled if scaling was not performed)}
#' }
#'
#' @export
fishScaleAndMap <- function(mapDat,
refSummaryDat,
genesToMap = NULL,
mappingFunction = cellToClusterMapping_byCor,
transform = function(x) x,
noiselevel = 0,
scaleFunction = quantileTruncate,
omitGenes = NULL,
metadata = data.frame(experiment = rep("all", dim(mapDat)[2])),
integerWeights = NULL,
binarize = FALSE,
binMin = 0.5,
...) {
# Setup
mappingFunction <- match.fun(mappingFunction)
scaleFunction <- match.fun(scaleFunction)
transform <- match.fun(transform)
if (is.null(genesToMap)) genesToMap <- colnames(mapDat)
genesToMap <- intersect(genesToMap, rownames(refSummaryDat))
params <- colnames(metadata)
refSummaryDat <- refSummaryDat[genesToMap, ]
mapDat <- mapDat[genesToMap, ]
# Transform the data to be mapped
scaleDat <- as.matrix(mapDat[genesToMap, ])
scaleDat[scaleDat <= noiselevel] <- 0 # Set values less than or equal to noiselevel to 0
if (is.element("area", params)) {
# Account for spot area in gene expression calculation
scaleDat <- t(t(scaleDat) / metadata$area) * mean(metadata$area)
}
scaleDat <- transform(scaleDat)
# Scale to the reference data
for (ex in unique(metadata$experiment)) {
isExp <- metadata$experiment == ex
for (g in genesToMap) scaleDat[g, isExp] <- scaleFunction(scaleDat[
g, isExp
], maxVal = max(refSummaryDat[g, ]), ...)
}
# Binarize, if desired
if (binarize) {
scaleDat <- scaleDat > binMin
scaleDat <- scaleDat + 1 - 1
}
# Omit genes and weight scaling, if desired
genesToMap2 <- genesToMap
if (!is.null(integerWeights)) genesToMap2 <- rep(genesToMap2, integerWeights)
genesToMap2 <- genesToMap2[!is.element(genesToMap2, omitGenes)]
# Map the map data to the reference data
scaleDat2 <- scaleDat[genesToMap2, ]
refSummaryDat2 <- refSummaryDat[genesToMap2, ]
mappingResults <- mappingFunction(refSummaryDat2, scaleDat2, ...)
# Scale x and y coordinates to (0,1) within experiment, if desired
for (ex in unique(metadata$experiment)) {
isExp <- metadata$experiment == ex
metadata$x[isExp] <- metadata$x[isExp] - min(metadata$x[isExp])
metadata$x[isExp] <- metadata$x[isExp] / max(metadata$x[isExp])
metadata$y[isExp] <- metadata$y[isExp] - min(metadata$y[isExp])
metadata$y[isExp] <- metadata$y[isExp] / max(metadata$y[isExp])
}
# Return the results
out <- list(
mapDat = mapDat, scaleDat = scaleDat,
mappingResults = mappingResults, metadata = metadata,
scaledX = metadata$x, scaledY = metadata$y
)
}
#' Filter (subset) fishScaleAndMap object
#'
#' Subsets all components in a fishScaleAndMap object
#'
#' @param datFish a fishScaleAndMap output list
#' @param subset a boolean or numeric vector of the elements to retain
#'
#' @return a fishScaleAndMap output subsetted to the requested elements
#'
#' @export
filterCells <- function(datFish,
subset) {
## Error checking
if ((length(subset) != length(datFish$scaledX)) & (!is.numeric(subset))) {
print("subset is incorrect format. Returning original entry.")
return(datFish)
}
if (is.numeric(subset)) subset <- intersect(subset, 1:length(datFish$scaledX))
## Subset all of the elements
datFish$mapDat <- datFish$mapDat[, subset]
datFish$scaleDat <- datFish$scaleDat[, subset]
datFish$metadata <- datFish$metadata[subset, ]
datFish$scaledX <- datFish$scaledX[subset]
datFish$scaledY <- datFish$scaledY[subset]
if (!is.null(datFish$mappingResults)) {
datFish$mappingResults <- datFish$mappingResults[subset, ]
}
return(datFish)
}
#' Merge two fishScaleAndMap objects
#'
#' Merges all components of two fishScaleAndMap objects to create a new
#' one. Note: only meta-data and mappingResults that is present in BOTH
#' objects will be returned.
#'
#' @param datFish1 a fishScaleAndMap output list
#' @param datFish2 a second fishScaleAndMap output list.
#'
#' @return a new fishScaleAndMap output list with the two original ones merged
#'
#' @export
mergeFish <- function(datFish1,
datFish2) {
datFish <- datFish1
datFish$mapDat <- cbind(datFish1$mapDat, datFish2$mapDat)
datFish$scaleDat <- cbind(datFish1$scaleDat, datFish2$scaleDat)
datFish$metadata <- rbind(datFish1$metadata, datFish2$metadata)
datFish$scaledX <- c(datFish1$scaledX, datFish2$scaledX)
datFish$scaledY <- c(datFish1$scaledY, datFish2$scaledY)
if ((!is.null(datFish1$mappingResults)) & (!is.null(datFish2$mappingResults))) {
datFish$mappingResults <- rbind(datFish1$mappingResults, datFish2$mappingResults)
}
return(datFish)
}
#' Rotate coordinates
#'
#' Rotates the scaledX and scaledY elements of a fishScaleAndMap output list so that the
#' axis of interest (e.g., cortical layer) is paralled with the x cooridate plan.
#' Rotation code is from https://stackoverflow.com/questions/15463462/rotate-graph-by-angle
#'
#' @param datFish a fishScaleAndMap output list
#' @param flatVector a TRUE/FALSE vector ordred in the same way as the elements (e.g., cells)
#' in datIn where all TRUE values correspond to cells who should have the same Y coordinate
#' (e.g., be in the same layer). Alternatively a numeric vector of cell indices to include
#' @param flipVector a numeric vector of values to ensure proper reflection on Y-axes (e.g.,
#' layer; default=NULL)
#' @param subset a boolean or numeric vector of the elements to retain
#'
#' @return a fishScaleAndMap output list with updated scaledX and scaleY coordinates
#'
#' @export
rotateXY <- function(datFish,
flatVector = NULL,
flipVector = NULL,
subset = NULL) {
## Error checking
datFishIn <- datFish
if ((length(flatVector) != length(datFish$scaledX)) & (!is.numeric(flatVector))) {
print("flatVector is incorrect format. Returning original entry.")
return(datFish)
}
if (!is.null(subset)) {
if ((length(subset) != length(datFish$scaledX)) & (!is.numeric(subset))) {
print("subset is incorrect format. Returning original entry.")
return(datFish)
}
}
if (((length(flipVector) != length(datFish$scaledX)) &
(!is.numeric(flipVector))) & (!is.null(flipVector))) {
print("flipVector is incorrect format. Returning original entry.")
return(datFish)
}
if (is.numeric(flatVector)) {
flatVector <- intersect(flatVector, 1:length(datFish$scaledX))
}
## Subset the data if needed
datFish <- datFishIn
if (!is.null(subset)) {
datFish <- filterCells(datFishIn, subset)
flatVector <- flatVector[subset]
flipVector <- flipVector[subset]
}
## Caculate best angle
v <- prcomp(cbind(datFish$scaledX, datFish$scaledY)[flatVector, ])$rotation
beta <- as.numeric(atan(-v[2, 1]/v[1, 1]))
## Rotate coordinates (internal function)
rotCor <- function(datFish,
beta) {
M <- cbind(datFish$scaledX, datFish$scaledY)
rotm <- matrix(c(cos(beta), sin(beta), -sin(beta), cos(beta)), ncol = 2) # rotation matrix
M2.1 <- t(t(M) - c(M[1, 1], M[1, 2])) # shift points, so that turning point is (0,0)
M2.2 <- t(rotm %*% (t(M2.1))) # rotate
M2.3 <- t(t(M2.2) + c(M[1, 1], M[1, 2])) # shift back
x <- M2.3[, 1]
y <- M2.3[, 2]
x <- x - min(x)
x <- x / max(x)
y <- y - min(y)
y <- y / max(y)
datFish$scaledX <- x
datFish$scaledY <- y
datFish
}
datFish2 <- rotCor(datFish, beta)
if (!is.null(flipVector)) {
if (sum(datFish2$scaledY * flipVector) < sum((1 - datFish2$scaledY) * flipVector)) {
datFish2 <- rotCor(datFish, beta)# + pi) # Pi should not be needed
}
}
datFish <- datFish2
## Unsubset and return the data
if (is.null(subset)) return(datFish)
datFishIn$scaledX[subset] <- datFish$scaledX
datFishIn$scaledY[subset] <- datFish$scaledY
datFishIn
}
#' Plot distributions
#'
#' Plot the distributions of cells across the tissue with overlaying color information. This is
#' a wrapper function for plot
#'
#' @param datIn a fishScaleAndMap output list
#' @param group a character vector (or factor) indicating how to split the data (e.g., cluster
#' call) or a metadata/mappingResults column name
#' @param groups a character vector of groups to show (default is levels of group)
#' @param colors a character vector (or factor) indicating how to color the plots (e.g., layer
#' or gene expression) or a metadata/mappingResults column name (default is all black)
#' @param colormap function to use for the colormap for the data (default gray.colors)
#' @param maxrow maximum number of plots to show in one row (default=12)
#' @param xlim,ylim for plot, but will be calculated if not entered
#' @param pch,cex for plot. Can be single values or vectors
#' @param main,xlab,ylab,... other parameters for plot (must be single values)
#' @param singlePlot should everything be plot on a single page (default=TRUE)
#'
#' @return Only returns if there is an error
#'
#' @export
plotDistributions <- function(datIn,
group,
groups = NULL,
colors = rep("black", dim(datIn$mapDat)[2]),
colormap = gray.colors,
maxrow = 12,
pch = 19,
cex = 1.5,
xlim = NULL, ylim = NULL,
main = "",
xlab = "", ylab = "",
singlePlot = TRUE,
...) {
colormap <- match.fun(colormap)
meta <- cbind(datIn$metadata, datIn$mappingResults)
if (length(group) == 1) {
if (is.element(group, colnames(meta))) {
group <- as.factor(meta[, group])
if (is.null(groups)) groups <- levels(group)
} else {
return(paste(group, "is not an available column name for division."))
}
}
if (length(colors) == 1) {
if (is.element(colors, colnames(meta))) {
colors <- as.numeric(as.factor(meta[, colors]))
colors <- colormap(length(unique(colors)))[colors]
} else {
return(paste(colors, "is not an available column name for coloring."))
}
} else {
colors <- as.numeric(as.factor(colors))
colors <- colormap(length(unique(colors)))[colors]
}
if (is.null(xlim)) xlim <- range(datIn$scaledX)
if (is.null(ylim)) ylim <- range(-datIn$scaledY)
# Make the plot!
if(singlePlot){
ncolv <- min(length(groups), maxrow)
nrowv <- ceiling(length(groups)/maxrow)
par(mfrow = c(nrowv, ncolv))
}
for (gp in groups) {
kp <- group == gp
pch2 <- pch
if (length(pch) > 1) pch2 <- pch[kp]
cex2 <- cex
if (length(cex) > 1) cex2 <- cex[kp]
plot(datIn$scaledX[kp], -datIn$scaledY[kp],
pch = pch2, col = colors[kp], xlim = xlim,
ylim = ylim, main = paste(main, gp), xlab = xlab,
ylab = ylab, cex = cex2, ...
)
}
}
#' Plot heatmap
#'
#' Plot the heatmap of cells ordering by a specified order. This is a wrapper for heatmap.2
#'
#' @param datIn a fishScaleAndMap output list
#' @param group a character vector (or factor) indicating how to order the heatmap (e.g., cluster
#' call) or a metadata/mappingResults column name
#' @param groups a character vector of groups to show (default is levels of group)
#' @param grouplab label for the grouping in the heatmap (default is 'Grouping' or the value for group)
#' @param useScaled plot the scaled (TRUE) or unscaled (FALSE; default) values
#' @param capValue values above capValue will be capped at capValue (default is none)
#' @param colormap set of values to use for the colormap for the data (default heat_colors)
#' @param Rowv,Colv,dendrogram,trace,margins,rowsep,colsep,key,... other parameters for heatmap.2
#' (some default values are different)
#'
#' @return Only returns if there is an error
#'
#' @export
plotHeatmap <- function(datIn,
group,
groups = NULL,
grouplab = "Grouping",
useScaled = FALSE,
capValue = Inf,
colormap = grey.colors(1000),
pch = 19,
xlim = NULL, ylim = NULL,
Rowv = FALSE,
Colv = FALSE,
dendrogram = "none",
trace = "none",
margins = c(6, 10),
rowsep = NULL,
sepwidth=c(0.4,0.4),
key = FALSE, ...) {
library(gplots)
if (useScaled) {
plotDat <- datIn$scaleDat
} else {
plotDat <- datIn$mapDat
}
plotDat <- pmin(plotDat, capValue)
meta <- cbind(datIn$metadata, datIn$mappingResults)
if (length(group) == 1) {
if (is.element(group, colnames(meta))) {
if (grouplab == "Grouping") grouplab <- group
group <- as.factor(meta[, group])
if (is.null(groups)) groups <- levels(group)
} else {
return(paste(group, "is not an available column name for division."))
}
}
# Update the cell order
groups <- c(groups, setdiff(levels(group), groups))
ord <- order(factor(group, levels = groups), -colSums(plotDat))
plotDat <- plotDat[, ord]
group <- group[ord]
# Append the cluster name to the plot data and find colseps
cn <- rep("", length(colnames(plotDat)))
colseps <- NULL
for (g in unique(as.character(group))){
wg <- which(as.character(group)==g)
cn[round(mean(wg))] <- g
colseps <- c(colseps,min(wg)-1)
}
colnames(plotDat) <- paste(cn,colnames(plotDat))
# Make the plot!
heatmap.2(plotDat,
Rowv = Rowv, Colv = Colv, dendrogram = dendrogram,
trace = trace, margins = margins, rowsep = rowsep,
colsep = colseps, key = key, col = colormap,
...
)
}
#' Return top mapped correlation-based cluster and confidence
#'
#' Primary function for doing correlation-based mapping to cluster medians and also reporting the
#' correlations and confidences. This is wrapper for getTopMatch and corTreeMapping.
#'
#' @param medianDat representative value for each leaf and node. If not entered, it is calculated
#' @param mapDat normalized data of the MAPPING data set. Default is to map the data onto itself.
#' @param refDat normalized data of the REFERENCE data set. Ignored if medianDat is passed
#' @param clusters cluster calls for each cell. Ignored if medianDat is passed
#' @param genesToMap which genes to include in the correlation mapping
#' @param use additional parameter for cor (use='p' as default)
#' @param method additional parameter for cor (method='p' as default)
#' @param returnCor should the correlation matrix be appended to the return?
#' @param ... not used
#'
#' @return data frame with the top match and associated correlation
#'
#' @export
cellToClusterMapping_byCor <- function(medianDat,
mapDat,
refDat = NA,
clusters = NA,
genesToMap = rownames(mapDat),
use = "p",
method = "p",
returnCor=FALSE,
...) {
corVar <- corTreeMapping(
medianDat = medianDat,
mapDat = mapDat, refDat = refDat, clusters = clusters,
genesToMap = genesToMap, use = use, method = method
)
corMatch <- getTopMatch(corVar)
colnames(corMatch) <- c("Class", "Correlation")
dex <- apply(corVar, 1, function(x) return(diff(sort(-x)[1:2])))
corMatch$DifferenceBetweenTopTwoCorrelations <- dex
if(returnCor)
corMatch <- cbind(corMatch,corVar)
corMatch
}
#' Quantile normalize, truncate, and scale
#'
#' Quantile normalize, truncate, and scale a numeric vector (e.g. mFISH data from one gene)
#'
#' @param x input data vector
#' @param qprob probs value to result from quantile (default=0.9)
#' @param maxVal max value for scaling (default=1)
#' @param truncate should data above the qprob threshold be truncated (default=yes)
#' @param ... not used
#'
#' @return scaled vector
#'
#' @export
quantileTruncate <- function(x,
qprob = 0.9,
maxVal = 1,
truncate = TRUE,
...) {
qs <- quantile(x[x > 0], probs = qprob, na.rm = TRUE)
if (is.na(qs)) return(x)
if (truncate) x[x > qs] <- qs
x * maxVal / qs
}
#' Plot TSNE
#'
#' Plot a TSNE of the data, with assigned colors and labels from provided variables. Note that this
#' function is a modification of code from Pabloc (https://www.r-bloggers.com/author/pabloc/) from
#' https://www.r-bloggers.com/playing-with-dimensions-from-clustering-pca-t-sne-to-carl-sagan/
#'
#' @param datIn a fishScaleAndMap output list
#' @param colorGroup a character vector (or factor) indicating how to color the Tsne (e.g., cluster
#' call) or a metadata/mappingResults column name (default=NULL)
#' @param labelGroup a character vector (or factor) indicating how to label the Tsne (e.g., cluster
#' call) or a metadata/mappingResults column name (default=NULL)
#' @param useScaled plot the scaled (TRUE) or unscaled (FALSE; default) values
#' @param capValue values above capValue will be capped at capValue (default is none)
#' @param perplexity,theta other parameters for Rtsne
#' @param main title of the plot
#' @param maxNchar what is the maximum number of characters to display in the plot for each entry?
#' @param seed for reproducibility
#'
#' @return Only returns if there is an error
#'
#' @export
plotTsne <- function(datIn,
colorGroup = "none",
labelGroup = "none",
useScaled = FALSE,
capValue = Inf,
perplexity = 10,
theta = 0.5,
main = "TSNE plot",
maxNchar = 1000,
seed = 10) {
library(Rtsne)
library(ggplot2)
# Get the data
if (useScaled) {
plotDat <- datIn$scaleDat
} else {
plotDat <- datIn$mapDat
}
plotDat <- pmin(plotDat, capValue)
plotDat <- t(plotDat)
# Color and label groups
meta <- cbind(datIn$metadata, datIn$mappingResults)
if (length(colorGroup) == 1) {
if (is.element(colorGroup, colnames(meta))) {
colorGroup <- as.factor(meta[, colorGroup])
} else {
colorGroup <- as.factor(rep("none", dim(meta)[1]))
}
}
if (length(labelGroup) == 1) {
if (is.element(labelGroup, colnames(meta))) {
labelGroup <- as.factor(meta[, labelGroup])
} else {
labelGroup <- as.factor(rep("*", dim(meta)[1]))
}
}
# Subset to maxNchar characters
levs <- substr(levels(as.factor(labelGroup)), 1, maxNchar)
labelGroup <- factor(as.character(substr(
labelGroup, 1, maxNchar
)), levels = as.character(unique(levs)))
# Get the tsne corrdinates
set.seed(seed)
tsne_model_1 <- Rtsne(as.matrix(plotDat),
check_duplicates = FALSE,
pca = TRUE, perplexity = perplexity, theta = theta, dims = 2
)
d_tsne_1 <- as.data.frame(tsne_model_1$Y)
# Make the plot!
plot_k <- ggplot(d_tsne_1, aes_string(
x = "V1", y = "V2",
color = colorGroup, label = labelGroup
)) +
geom_text() + xlab("TSNE 1") + ylab("TSNE 2") +
ggtitle(main) + theme(legend.title = element_blank())
scale_colour_discrete()
plot_k
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.