Nothing
#' t-distributed Stochastic Neighbor Embedding
#'
#' \eqn{t}-distributed Stochastic Neighbor Embedding (t-SNE) is a variant of Stochastic Neighbor Embedding (SNE)
#' that mimicks patterns of probability distributinos over pairs of high-dimensional objects on low-dimesional
#' target embedding space by minimizing Kullback-Leibler divergence. While conventional SNE uses gaussian
#' distributions to measure similarity, t-SNE, as its name suggests, exploits a heavy-tailed Student t-distribution.
#'
#' @param X an \eqn{(n\times p)} matrix or data frame whose rows are observations and columns represent independent variables.
#' @param ndim an integer-valued target dimension.
#' @param perplexity desired level of perplexity; ranging [5,50].
#' @param eta learning parameter.
#' @param maxiter maximum number of iterations.
#' @param jitter level of white noise added at the beginning.
#' @param jitterdecay decay parameter in (0,1). The closer to 0, the faster artificial noise decays.
#' @param momentum level of acceleration in learning.
#' @param pca whether to use PCA as preliminary step; \code{TRUE} for using it, \code{FALSE} otherwise.
#' @param pcascale a logical; \code{FALSE} for using Covariance, \code{TRUE} for using Correlation matrix. See also \code{\link{do.pca}} for more details.
#' @param symmetric a logical; \code{FALSE} to solve it naively, and \code{TRUE} to adopt symmetrization scheme.
#' @param BHuse a logical; \code{TRUE} to use Barnes-Hut approximation. See \code{\link[Rtsne]{Rtsne}} for more details.
#' @param BHtheta speed-accuracy tradeoff. If set as 0.0, it reduces to exact t-SNE.
#'
#' @return a named \code{Rdimtools} S3 object containing
#' \describe{
#' \item{Y}{an \eqn{(n\times ndim)} matrix whose rows are embedded observations.}
#' \item{algorithm}{name of the algorithm.}
#' }
#'
#' @examples
#' \donttest{
#' ## load iris data
#' data(iris)
#' set.seed(100)
#' subid = sample(1:150,50)
#' X = as.matrix(iris[subid,1:4])
#' lab = as.factor(iris[subid,5])
#'
#' ## compare different perplexity
#' out1 <- do.tsne(X, ndim=2, perplexity=5)
#' out2 <- do.tsne(X, ndim=2, perplexity=10)
#' out3 <- do.tsne(X, ndim=2, perplexity=15)
#'
#' ## Visualize three different projections
#' opar <- par(no.readonly=TRUE)
#' par(mfrow=c(1,3))
#' plot(out1$Y, pch=19, col=lab, main="tSNE::perplexity=5")
#' plot(out2$Y, pch=19, col=lab, main="tSNE::perplexity=10")
#' plot(out3$Y, pch=19, col=lab, main="tSNE::perplexity=15")
#' par(opar)
#' }
#'
#' @seealso \code{\link{do.sne}}
#' @references
#' \insertRef{vandermaaten_visualizing_2008}{Rdimtools}
#'
#' @author Kisung You
#' @rdname nonlinear_TSNE
#' @concept nonlinear_methods
#' @export
do.tsne <- function(X,ndim=2,perplexity=30,eta=0.05,maxiter=2000,
jitter=0.3,jitterdecay=0.99,momentum=0.5,
pca=TRUE,pcascale=FALSE,symmetric=FALSE,
BHuse=TRUE, BHtheta=0.25){
# 1. typecheck is always first step to perform.
pcaratio=0.90
aux.typecheck(X)
# 1-1. (integer) ndim
if (!is.numeric(ndim)||(ndim<1)||(ndim>ncol(X))){
stop("* do.tsne : 'ndim' is an integer in [1,#(covariates)].")
}
ndim = as.integer(ndim)
# 1-2. perplexity
if (!is.numeric(perplexity)||is.na(perplexity)||is.infinite(perplexity)||(perplexity<=0)){
stop("* do.tsne : perplexity should be a positive real number.")
}
if ((perplexity < 5)||(perplexity > 50)){
message("* do.tsne : a desired perplexity value is in [5,50].")
}
# obsolete params.
BarnesHut=as.logical(BHuse)
BHtheta=as.double(BHtheta)
# 2. Input Parameters
# 2-1. (double) eta = 0.5; learning parameter
if (!is.numeric(eta)||is.na(eta)||is.infinite(eta)||(eta<=0)){
stop("* do.tsne : learning rate 'eta' should be a positive real number.")
}
# 2-2. (integer) maxiter = 2000; maximum number of iterations
if (!is.numeric(maxiter)||(maxiter<2)||(is.na(maxiter))||(is.infinite(maxiter))){
stop("* do.tsne : maxiter should be suited for the number of iterations.")
}
# 2-3. (double) jitter = 0.3; random errors
if (!is.numeric(jitter)||(is.na(jitter))||(is.infinite(jitter))||(jitter<0)){
stop("* do.tsne : 'jitter' should be a positive real number.")
}
# 2-4. (double) jitterdecay = 0.99; decaying factor of jitter
decay = jitterdecay
if (!is.numeric(decay)||(is.na(decay))||(is.infinite(decay))||(decay<=0)||(decay>=1)){
stop("* do.tsne : 'jitterdecay' is a real number between (0,1).")
}
# 2-5. (double) momentum = 0.5
if ((!is.numeric(momentum))||(is.na(momentum))||(is.infinite(momentum))||(momentum<=0)){
stop("* do.tsne : 'momentum' should be a positive real number.")
}
# 2-6. (char) preprocess = 'center'
# algpreprocess = match.arg(preprocess)
# tmplist = aux.preprocess.hidden(X,type=algpreprocess,algtype="nonlinear")
# trfinfo = tmplist$info
# pX = tmplist$pX
# 2-7. (bool) pca = TRUE/FALSE
# If pca = TRUE
# pcaratio (0,1) : variance ratio
# pcascale : TRUE/FALSE
pcaflag = pca; if(!is.logical(pcaflag)){stop("* do.tsne : 'pca' is a logical variable.")}
if (!is.numeric(pcaratio)||(pcaratio<=0)||(pcaratio>=1)||is.na(pcaratio)){
stop("* do.tsne : pcaratio should be in (0,1).")
}
scaleflag = pcascale; if (!is.logical(scaleflag)){
stop("* do.tsne : pcascale is either TRUE or FALSE.")
}
if (pcaflag){
pcadim = ceiling((ncol(X) + ndim)/2)
pcaout = do.pca(X,ndim=pcadim,cor=scaleflag)
if (ncol(pcaout$Y)<=ndim){
message("* do.tsne : PCA scaling has gone too far.")
message("* do.tsne : Pass non-scaled data to t-SNE algortihm.")
tpX = t(X)
} else {
tpX = t(pcaout$Y)
}
} else {
tpX = t(X)
}
# 2-8. (bool) BarnesHut : TRUE/FALSE
# BHtheta : 0 means exact tSNE that runs with my own code.
BHflag = BarnesHut;
if (!is.logical(BHflag)){
stop("* do.tsne : 'BarnesHut' is a logical variable.")
}
if (!is.numeric(BHtheta)||is.na(BHtheta)||(BHtheta<0)||is.infinite(BHtheta)){
stop("* do.tsne : BHtheta is invalid. It should be >= 0.")
}
BHtheta = as.double(BHtheta)
# 3. Run Main Algorithm
if (!BHflag){
Perp = aux_perplexity(tpX,perplexity);
P = as.matrix(Perp$P)
vars = as.vector(Perp$vars)
Y = t(as.matrix(method_tsne(P,ndim,eta,maxiter,jitter,decay,momentum)))
} else {
pX = t(tpX)
dX = stats::dist(X)
dfun = utils::getFromNamespace("hidden_tsne","maotai")
out = dfun(dX, ndim=round(ndim),theta=BHtheta,perplexity=perplexity,pca=FALSE,max_iter=maxiter,
momentum=momentum,eta=eta)
# out = Rtsne(pX,dims=ndim,theta=BHtheta,perplexity=perplexity,pca=TRUE,max_iter=maxiter,
# momentum=momentum,eta=eta)
Y = out$embed
}
# 5. result
if (any(is.infinite(Y))||any(is.na(Y))){
stop("* do.tsne : t-SNE not successful; having either Inf or NA values.")
}
result = list()
result$Y = Y
result$algorithm = "nonlinear:TSNE"
return(structure(result, class="Rdimtools"))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.