# R/bctsne.R In bcTSNE: Projected t-SNE for Batch Correction

#### Documented in bctsne

#' @name bctsne
#' @title Calculate BC t-SNE by orthogonal gradient descent
#' @param X numeric matrix, input matrix
#' @param Z numeric matrix, covariate matrix
#' @param k integer of length 1, reduced dimension (number of eigenvectors)
#' @param outDim integer of length 1, the output dimension
#' @param perplexity numeric of length 1, the t-SNE perplexity
#' @param maxIter integer of length 1, the maximum iterations for the BC t-SNE
#' algorithm
#' @details
#' \code{X} should be preprocessed (e.g. PCA, centered and scaled). \code{Z}
#' is the full model matrix, excluding the intercept.
#'
#' @examples
#' ## Create small simulated dataset, A, with embeded batch effects
#' set.seed(2731)
#' kRid <- 20
#' p    <- 100
#' n    <- 200
#'
#' W <- matrix(rnorm(p*kRid), kRid)
#' S <- matrix(rnorm(n*kRid), n)
#' z <- sample(1:3, rep = TRUE, size = n)
#' Z <- model.matrix( ~ -1 + as.factor(z))
#' l <- matrix(rnorm(kRid*NCOL(Z)), kRid)
#' A <- (S - Z %*% t(l) ) %*% W
#'
#' ## Scale A to give input, X
#' X <- scale(A)
#'
#' resUnadj <- Rtsne::Rtsne(X)                 ## Standard t-SNE
#' resAdj   <- bctsne(X = X, Z = Z, k = 10)    ## Batch-corrected t-SNE
#'
#' ## Plot results, no true effects were included in the simulated data, so
#' ## we expect all batches to overlap with bcTSNE; batch membership indicated
#' ## by color
#' plot(resUnadj$Y, col = z) #' plot(resAdj$Y, col = z)
#'
#' @return \code{list} wth the following items:
#' \describe{
#'   \item{\code{Xred}}{numeric matrix, the reduced dimension input to \code{bctsne}}
#'   \item{\code{Z}}{model matrix indicating batch membership}
#'   \item{\code{perplexity}}{perpelexity value used in computing t-SNE}
#'   \item{\code{Y}}{batch-corrected projection matrix}
#'   \item{\code{maxIter}}{maximum iterations used in training}
#' }
#'
#' @useDynLib bcTSNE
#' @importFrom RSpectra svds
#' @importFrom stats lm model.matrix
#' @importFrom utils type.convert
#' @importFrom Rtsne Rtsne
#' @importFrom graphics plot
#' @export

bctsne <- function(X, Z, k = 50, outDim = 2, perplexity = 30, maxIter = 1000) {

## Data checks
stopifnot(is.numeric(X) & is.numeric(Z))
stopifnot(is.numeric(outDim) & is.numeric(perplexity))
maxIter <- type.convert(maxIter)
stopifnot(is.integer(maxIter) && length(maxIter) == 1)
if (length(outDim) > 1) {
warning("length(outDim) > 1; only using the first element.")
outDim <- outDim[1]
}
if (length(perplexity) > 1) {
warning("length(perplexity) > 1; only using the first element.")
perplexity <- perplexity[1]
}

## Get dimensions
N <- NROW(X)
K <- NCOL(Z)

## More data checks
if (N != NROW(Z)) stop("'X' and 'Z' do not have the same number of rows.")
if (K > k) stop("ncol(Z) > ncol(X) -- problem is underdetermined.")
if (k > 100) warning("k > 100; consider using fewer eigenvectors.")
if (outDim == 0) stop("'outDim' must be >= 1.")
if (outDim > 3) warning("'outDim' is greater than 3; consider values 1:3.")

SVD <- svds(A = X, k = k)
LM <- lm(SVD$u %*% diag(SVD$d) ~ -1 + Z)
S <- SVD$u %*% diag(SVD$d) - Z %*% LM\$coef
D <- NCOL(S)

J <- as.integer(outDim)
X <- as.vector(S)
Z <- as.vector(Z)
Y <- rep(0.0, N*J)
res <- .C("bctsne", X, N, D, Z, K, J, log(perplexity), Y, maxIter)
list(Xred = matrix(res[[1]], N, D),
Z = matrix(res[[4]], N, K),
perplexity = perplexity,
Y = matrix(res[[8]], N, J),
maxIter = maxIter)

}


## Try the bcTSNE package in your browser

Any scripts or data that you put into this service are public.

bcTSNE documentation built on April 28, 2020, 5:05 p.m.