R/tsne_fit.r

####################################################################
# Functions for using t-SNE.
####################################################################



# Internal function to validate control list
check_ctrl <- function(control_list){
	stopifnot(is.list(control_list))
	stopifnot(c('momentum_values', 'momentum_iter') %in% names(control_list))
	stopifnot(max(control_list$momentum_iter) >= control_list$max_iter)
	stopifnot(min(control_list$momentum_iter) > 1)

	stopifnot(is.numeric(control_list$early_exageration_iter))
	stopifnot(length(control_list$early_exageration_iter) == 1)
	stopifnot(length(control_list$early_exageration_factor) == 1)
	stopifnot(control_list$early_exageration_factor >= 1)

}



#' Dimension reduction with t-SNE
#'
#' t-Distributed Stochastic Neighbor Embedding (t-SNE).
#'
#' \code{tsne_dist} assumes that dist_ is a distance matrix interpretable as a
#' nearest neighbor distribution. If you only have a distance matrix, you can use
#' the \code{\link{dist_to_sne}} function to create a SNE dissimilarity matrix.
#'
#' Function \code{tsne} provides a convenient interface to use t-SNE using raw data.
#' Under the hood it calls the \code{\link{dist_sne}} and \code{\link{tsne_dist}} functions.
#'
#' @param xdata A matrix or data frame.
#' @param perplexity The perplexity. Usually a number between 5 and 50. Default is 25.
#' @param dist_ A matrix or \code{dist} object woth pairwise dissimilarities. All elements must be positive and sum to 1.
#' @param dimensions The number of dimensions of the t-SNE embedding. Default is 2, which is usually adequate.
#' @param rng_seed Provide a seed for generating intiial values for the coordiantes. The final t-SNE result is highly sensitive for starting values, so to get reproducible results you should provide a seed.
#' @param verbose The amount of output during the t-SNE estimation. Can be 0 (no output), 1 (some output, including a progress bar) and 2 (detailed output, mostly usefull for debugging purposes)
#' @param control A list of control parameters. See 'Details'.
#'
#'
#'
#' @section Control:
#' The control argument is a list that can supply any of the following components:
#'
#' \tabular{ll}{
#' \code{max_iter}: \tab The number of iterations. Default is 1000. \cr
#' \code{step_size}: \tab The gradient descent step size. Default is 100, but it can sometimes be usefull to set this to a lower value.\cr
#' \code{momentum_values}: \tab A vector of values that determines the momentum. The default is c(0.5, 0.8), following the original t-SNE paper.\cr
#' \code{momentum_iter}: \tab A vector of values that determines the iterations the momentum_values should be used. Default is c(250, 1000), following the original t-SNE paper. \cr
#' \code{early_exageration_iter}: \tab Numerical. The number of iterations the early exageration trick should be used. Default 50. \cr
#' \code{early_exageration_factor}: \tab Numerical. The exageration factor used in the early exacegration phase. Default is 4. \cr
#' }
#'
#'
#' @return
#'
#' A list of class 'tsne' with the following elements:
#'
#'\tabular{ll}{
#' \code{par}: \tab A matrix with the t-SNE embedding. \cr
#' \code{trace}: \tab A vector of the Kullback-Leibler divergences.
#' If you used early exaggeration (default) you will see a large jump where the exageration ends. \cr
#'}
#'
#' @seealso
#'
#' The \code{\link{tsne_phyloseq}} function provides a nice interface for using t-SNE with microbiota data.
#'
#' @section References:
#'\itemize{
#' \item L.J.P. van der Maaten and G.E. Hinton. Visualizing High-Dimensional Data Using t-SNE. Journal of Machine Learning Research 9(Nov):2579-2605, 2008.
#' \item L.J.P. van der Maaten. The t-SNE FAQ \url{https://lvdmaaten.github.io/tsne/}
#'}
#'
#' @examples
#' xx <- make_swiss_roll(150)
#' tsne_res <- tsne(xx[,1:3])
#' plot(tsne_res$par, col=xx[,4], pch=16)
#'
#' @export
tsne_dist <- function(dist_, dimensions=2, verbose=0, rng_seed=NULL, control=list()){

	# This already assumes that you have calculated a distance matrix from the data.

	if ('dist' %in% class(dist_)){
		# Distance matrix. Convert to regular matrix.
		pp_mat <- as.matrix(dist_)
	} else if ('matrix' %in% class(dist_)){
		pp_mat <- dist_
	}

	stopifnot(all(pp_mat >= 0))
	# make sure the dist_is a sne matrix.
	ok_pp <- sum(pp_mat) < 1.0001 & sum(pp_mat) > 0.999
	if (!ok_pp){
		stop('Input dist_ not a proper t-SNE dissimilarity matrix. sum(as.matrix(dist_)) should equal 1.')
	}

	stopifnot(dimensions < ncol(pp_mat))

	default_control <- list(max_iter=1000,
													step_size=100,
													momentum_values = c(0.5, 0.8),
													momentum_iter = c(250, 1000),
													early_exageration_iter=50,
													early_exageration_factor=4)
	control <- utils::modifyList(default_control, control)
	check_ctrl(control)


	if (verbose > 0){
		cat('Gradient descent options:\n')
		cat(sprintf('Step size = %f\n', control$step_size))
	}

	nobs <- ncol(pp_mat)
	do_early_exag <- control$early_exageration_iter > 0

	# if (control$early_exageration){
	# 	early_exag_factor <- 4
	# 	early_exag_iter <- 50
	# }

	if (verbose > 0){
		cat(sprintf('Momentum: %.2f (iter <= %d)\n', control$momentum_values, control$momentum_iter))
	}

	pp_mat_raw <- pp_mat

	KL_trace <- numeric(control$max_iter)

	# Initial parameter estimates.
	par_old <- rep(0, dimensions*nobs) # for Gradient descent momentum

	if (!is.null(rng_seed)){
		set.seed(rng_seed)
	}

	par_init <- stats::rnorm(dimensions*nobs, mean = 0, sd=0.001) # Current parameters.

	# Reset random seed.
	set.seed(seed=NULL)

	par_cur <- par_init

	cur_momentum <- 1

	if (verbose == 1){
		tpb <- utils::txtProgressBar(min=1, max=control$max_iter, initial=1)
	}

	for (kk in 1:control$max_iter){

		if (verbose == 1){
			utils::setTxtProgressBar(tpb, kk)
		}

		# Early exaggeration
		if (do_early_exag){
			if (kk <= control$early_exageration_iter){
				pp_mat <- pp_mat_raw * control$early_exageration_factor
			} else if (kk == control$early_exageration_iter+1){
				pp_mat <- pp_mat_raw
			}
		} else {
			pp_mat <- pp_mat_raw
		}


		# Momentum
		if (kk <= control$momentum_iter[cur_momentum]){
			momentum <- control$momentum_values[cur_momentum]
			if (kk == control$momentum_iter[cur_momentum]){
				# Update the momentum.
				cur_momentum <- cur_momentum + 1
			}
		}

		par_cur_mat <- matrix(par_cur, ncol=dimensions)

		## # calculate t SNE dissimilarities for yy
		QQ_num <- matrix(0, ncol=nobs, nrow=nobs)  		# Raw t dissimilarities
		QQ <- matrix(0, ncol=nobs, nrow=nobs) 				# Normalized t-dissimilarities
		for (ii in 1:nobs){
			numerators <- 1 / (1 + (colSums((par_cur_mat[ii,] - t(par_cur_mat))^2)))
			numerators[ii] <- 0 # set dissimialrities for i = j to 0.
			QQ_num[ii,] <- numerators
			QQ[ii,] <- numerators / sum(numerators)
		}

		# Symmetrize
		QQ_sym <- symmetrize_sne_matrix(QQ)
		diag(QQ_sym) <- 1

		# KL divergence
		KL_trace[kk] <- sum(pp_mat * log(pp_mat / QQ_sym), na.rm=TRUE)

		if (verbose >= 2){
			cat(sprintf('[%d / %d] Function value: %f\n', kk, control$max_iter, KL_trace[kk]))
		}

		# Calculate the gradient. Addapted from the tsne_p.m matlab script.
		LL <- (pp_mat - QQ_sym)* QQ_num
		gradient_ <- 4 * ((diag(colSums(LL)) - LL) %*% par_cur_mat)

		# Update parameters
		par_new <- par_cur - (control$step_size * gradient_) + (momentum * (par_cur - par_old))

		par_old <- par_cur
		par_cur <- par_new
	}

	out <- list(par=par_cur, trace=KL_trace)
	class(out) <- 'tsne'

	return(out)
}




#' @rdname tsne_dist
#' @export
tsne <- function(xdata, perplexity=25, dimensions=2, verbose=0, rng_seed=NULL, control=list()){

	if ('data.frame' %in% class(xdata)){
		all_numeric <- all(sapply(xdata, is.numeric))
		if (!all_numeric) {
			stop('Not all variables in xdata is numeric.')
		}
	} else if ('matrix' %in% class(xdata)){
		matrix_numeric <- is.numeric(xdata)
		if (!matrix_numeric) {
			stop('xdata is not numeric.')
		}
	}

	dist_ <- dist_sne(xdata, perplexity = perplexity)
	res <- tsne_dist(dist_=dist_, dimensions=dimensions,
													 control=control, rng_seed=rng_seed,
													 verbose=verbose)

	return(res)
}


#' Swiss roll data set
#'
#' Simulate swiss roll data. The swiss roll data set is used to test various dimension reduction algorithms.

#'
#' @param nn The number of data points to be generated.
#'
#'
#' @return A data frame with four variables. The three first are the swiss
#' roll data points, the fourth is vector fo colors useful for plotting.
#'
#' @section Reference:
#' Dinoj Surendran, Swiss Roll Dataset
#' \url{http://people.cs.uchicago.edu/~dinoj/manifold/swissroll.html}
#'
#' @examples
#' xx <- make_swiss_roll(150)
#' pairs(xx[,1:3], col=xx[,4], pch=16)
#'
#' @export
make_swiss_roll <- function(nn){

	means1 <- c(7.5, 7.5, 12.5, 12.5)
	means2 <- c(7.5, 12.5, 7.5, 12.5)

	xx1 <- stats::rnorm(nn, mean=means1)
	xx2 <- stats::rnorm(nn, mean=means2)

	xx <- data.frame(x1=xx1 * cos(xx1), x2=xx2, x3=xx1 * sin(xx1))
	# make colors according to x1.
	mycol <- grDevices::colorRamp(c('red', 'cadetblue', 'black'))
	xx1_tmp <- xx[,1] + abs(min(xx[,1]))
	xx1_tmp <- xx1_tmp / max(xx1_tmp)
	xcolor <- mycol(xx1_tmp)

	xx$plot_colors <- grDevices::rgb(xcolor[,1]/255, xcolor[,2]/255, xcolor[,3]/255)

	return(xx)
}
opisthokonta/tsnemicrobiota documentation built on June 1, 2019, 9:44 a.m.