R/bdm_qMap.R
In bigMap: Big Data Mapping

Documented in bdm.qMap

# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# The bigMap Package for R.

# Copyright (c) 2018, Joan Garriga <jgarriga@ceab.csic.es>, Frederic Bartumeus <fbartu@ceab.csic.es> (Blanes Centre for Advanced Studies, CEAB-CSIC).

# bigMap is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.

# bigMap is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

# You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses.
# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

#' ptSNE quantile-maps
#'
#' Shows the mapping of quantitative variables into the embedding space.
#'
#' @param bdm A \var{bdm} instance as generated by \code{bdm.init()}.
#'
#' @param data A \code{matrix/data.frame} to be mapped. By default, the input data \var{bdm$data} is mapped.
#'
#' @param labels A vector of class labels of length equal to \code{nrow(bdm$data)}. Label values are factorized as \code{as.numeric(as.factor(labels))}. If \code{!is.null(bdm$lbls)}, these labels are used by default.
#'
#' @param subset A numeric vector with the indexes of a subset of data. Data-points in the subset are heat-mapped and the rest are shown in light grey. By default all data-points are heat-mapped.
#'
#' @param qMap.levels The number of levels of the quantile-map (8 by default).
#'
#' @param qMap.cex The size of the data-points (as in \code{par()}).
#'
#' @param qMap.bg The background colour of the qMap plot. Default value is \code{ptsne.bg = #FFFFFF} (white).
#'
#' @param class.pltt If \code{!is.null(labels)} or \code{!is.null(bdm$lbls)}, a colour palette to show class labels with the qMap plots. By default (\code{qMap.pltt = NULL}) the default palette is used.
#'
#' @param layer The number of a layer (1 by default).
#'
#' @return None.
#'
#' @details This is not a heat-map but a quantile-map plot. This function splits the range of each variable into as many quantiles as specified by \var{levels} so that the color gradient will hardly ever correspond to a constant numeric gradient. Thus, the mapping will show more evenly distributed colors though at the expense of possibly exaggerating artifacts. For variables with very extrem distributions, it will be impossible to find as many quantiles as desired and the distribution of colors will not be so homogeneous.
#' @examples
#'
#' bdm.example()
#' bdm.qMap(exMap)
#' # --- show only components (1, 2, 4, 8) of the GMM
#' bdm.qMap(exMap, subset = which(exMap$lbls %in% c(1, 4, 8, 16)))

bdm.qMap <- function(bdm, data = NULL, labels = NULL, subset = NULL, qMap.levels = 8, qMap.cex = 0.3, qMap.bg = '#FFFFFF', class.pltt = NULL, layer = 1)
{
	# get labels
	if (!is.null(labels)) labels <- as.numeric(as.factor(labels))
	else labels <- bdm$lbls

	# get data
	if (is.null(data)) data <- as.matrix(bdm$data)

	# get var names
	if (is.null(colnames(data))) {
		colnames(data) <- paste('V', formatC(seq(ncol(data)), width = 2, flag = '0'), sep = '.')
	}

	# join labels & data
	data <- cbind(labels, data)

	# check number of vars
	if (ncol(data) > 16) {
		data <- data[, 1:16]
		cat('+++ WARNING: plotting first ', ncol(data) - !is.null(labels), ' columns !, \n', sep='')
	}

	# get mapping
	l <- c(1, 2) + (layer -1) *2
	Y <- bdm$ptsne$Y[ , l]

	# set graphic environment
	parbdm.set(oma = c(0.2, 1, 6, 1))
	# Att!! with this
	layout.mtx <- cbind(layout.get((ncol(data) +1)), layout.get((ncol(data) +1)))
	layout.mtx <- matrix(seq(length(layout.mtx)), dim(layout.mtx), byrow = T)
	layout(layout.mtx, widths = rep(2 /ncol(layout.mtx) *c(0.73, 0.27), ncol(layout.mtx)))

	# legend palette
	hmap.pltt <- c(pltt.heat(qMap.levels), '#DDDDDDFF')

	nulL <- lapply(seq(ncol(data)), function(j){
		if (j == 1 & !is.null(labels)) {
			X <- data[, j]
			if (is.null(class.pltt))
				pltt <- c(pltt.get(s = length(unique(X))), '#DDDDDDFF')
			else
				pltt <- class.pltt
		}
		else {
			# factor data
			X <- get.lvls(data[, j], qMap.levels)
			Q <- quantile(data[, j][!is.na(data[, j])], seq(0, 1, length.out = qMap.levels+1))
			pltt <- hmap.pltt
		}
		# plot q-maps
		par(mar = c(1.0, 1.0, 0.4, 0.4))
		if (!is.null(subset)) {
			# plot shadow
			plot(Y[-subset, ], xaxt = 'n', xlab = '', yaxt = 'n', ylab = '', xlim = range(Y[,1]), ylim = range(Y[,2]), col = pltt[length(pltt)], cex = qMap.cex, pch = 20, asp = 1)
			# plot subset q-map
			points(Y[subset, ], col = pltt[X[subset]], cex = qMap.cex, pch = 20, asp = 1)
		}
		else {
			# plot q-map
			plot(Y, xaxt = 'n', yaxt = 'n', xlab = '', ylab = '', xlim = range(Y[, 1]), ylim = range(Y[, 2]), col = pltt[X], cex = qMap.cex, pch = 20, asp = 1)
		}
		# plot title
		text(0.8 *min(Y[, 1]), 0.9 *max(Y[, 2]), labels = colnames(data)[j], cex = 1.0)
		# plot legend
		par(mar = c(2, 0.1, 0.5, 0.1))
		plot(1, 1, xlab = '', ylab = '', xaxt = "n", yaxt = "n", bty = "n", type = "n")
		if (j == 1 & !is.null(labels)) {
			s <- length(unique(labels))
			lgnd.lbls <- formatC(seq(s), width = 3)
			legend('center', legend = lgnd.lbls[s:1], bty = 'n', pch = 15, cex = 0.6, pt.cex = 1.6, y.intersp = 0.7, col = pltt[s:1])
		} else {
			lgnd.lbls <- sapply(seq(qMap.levels), function(l) {
				if (l == 1 || Q[l] != Q[l-1]) {
					formatC(Q[l], format = 'e', digits = 2)
				} else {
					' '
				}
			})
			legend('center', legend = lgnd.lbls[qMap.levels:1], bty = 'n', pch = 15, cex = 0.6, pt.cex = 1.6, y.intersp = 0.7, col = pltt[qMap.levels:1])
		}
	})

	# fill layout
	if (length(ncol(data)) < max(layout.get(length(ncol(data))))) plot.null()
	# layout title
	title(bdm$dSet, outer = T, cex.main = 1.2)
	# reset graphic environment
	parbdm.def()
}

# ------------------------------------------------------------------------------
# +++ factor data (allow discrete variables and NAs)
# ------------------------------------------------------------------------------

get.lvls <- function(x, lvls)
{
	x.isna <- which(is.na(x))
	x.fctr <- as.factor(x)
	x.lvls <- length(unique(x.fctr))
	if ( x.lvls <= lvls)
	{
		x <- round(as.numeric(x.fctr) *lvls /x.lvls, 0)
		if (length(x.isna) > 0) x[x.isna] <- lvls +1
	}
	else
	{
		x.qntl <- quantile(x[!is.na(x)], seq(0, 1, length.out = lvls+1))
		x.lbls <- c(1, which(diff(x.qntl) != 0) +1)
		x.brks <- c(x.qntl[x.lbls], max(x)+1)
		if (length(x.isna) > 0) {
			x[-x.isna] <- cut(x[-x.isna], x.brks[-length(x.brks)], labels = x.lbls[-length(x.lbls)], include.lowest = T, right = T)
			x[x.isna] <- lvls +1
		} else {
			x <- cut(x, x.brks, labels = x.lbls, include.lowest = T, right = T)
			x <- as.numeric(as.character(x))
		}
	}
	return(x)
}