library(Rcpp)
#' Trains a Rectified Factor Network (RFN)
#'
#' Trains an RFN as described by Clevert et al., 2014
#' @param X The data matrix
#' @param n_hidden Number of latent variables to estimate
#' @param n_iter Number of iterations to run the algorithm
#' @param etaW Learning rate of the W parameter
#' @param etaP Learning rate of the Psi parameter (It's probably save to set this to the same value as etaW)
#' @param minP Minimal value for Psi. Should be in 1e-5 - 1e-1
#' @param batch_size If > 2, this will activate mini-batch learning instead of full batch learning
#' @param dropout_rate Dropout rate for the latent variables
#' @param input_noise_rate Noise/dropout rate for input variables
#' @param l2_weightdecay L2 penalty for weight decay
#' @param l1_weightdecay L1 penalty for weight decay
#' @param h_threshold Threshhold for rectifying/leaky activations
#' @param momentum Momentum term for learning
#' @param noise_type Type of input noise. One of "dropout", "saltpepper", "gaussian"
#' @param activation Activation function for hidden/latent variables. One of "linear", "relu", "leaky", "sigmoid", "tanh"
#' @param apply_scaling Scale the data
#' @param apply_newton_update Whether to use a Newton update (default) or a gradient descent step
#' @param seed Seed for the random number generator
#' @param use_gpu Use the gpu (default cpu). Works only for sparse input.
#' @param gpu_id If use_gpu is true, use gpu with this id (default -1 selects one available)
#' @param verbose True to print messages during the run. Default is True.
#' @return Returns a list of matrices \code{W}, \code{P}, \code{H},
#' \code{Wout}, whereas \code{W \%*\% H} is the noise-free reconstruction
#' of the data \code{X} and \code{diag(P)} is the covariance matrix
#' of the additive zero-mean noise. \code{Wout} is the matrix that
#' maps input vectors to their latent representation, usually by
#' \code{pmax(t(Wout) \%*\% X, 0)}.
#' @export
#' @useDynLib RFN
#' @importFrom Rcpp evalCpp
train_rfn <- function(X, n_hidden, n_iter, etaW, etaP, minP, batch_size=-1,
dropout_rate=0.0, input_noise_rate=0.0, l2_weightdecay=0.0,
l1_weightdecay=0.0, h_threshold=0.0, momentum=0.0, noise_type="saltpepper",
activation="relu", apply_scaling=1, apply_newton_update=1, seed=-1, use_gpu=0, gpu_id=-1, verbose=T)
{
if (is.data.frame(X))
X <- data.matrix(X)
if (!(is(X, "dgCMatrix") || is.numeric(X)) || length(dim(X)) != 2 || sum(is.na(X)) != 0)
stop("X must be a numeric matrix without NAs")
if (!is.numeric(c(n_hidden, n_iter, etaW, etaP, minP, batch_size, dropout_rate, input_noise_rate,
l2_weightdecay, l1_weightdecay, h_threshold, momentum, apply_scaling, apply_newton_update,
seed)))
stop("at least one of the numeric params is not numeric")
noise_type <- switch(noise_type, dropout=1, saltpepper=2,gaussian=3)
if (is.null(noise_type))
stop("noise_type must be one of \"dropout\", \"saltpepper\", \"gaussian\"")
activation <- switch(activation, linear=0, relu=1, leaky=2, sigmoid=3, tanh=4)
if (is.null(activation))
stop("activation must be one of \"linear\", \"relu\", \"leaky\", \"sigmoid\", \"tanh\"")
if (seed > -1) {
set.seed(seed);
}
n <- ncol(X)
m <- nrow(X)
W <- matrix(0.01*rnorm(n_hidden * m), ncol=n_hidden)
P <- rep(0.1, m)
if (!use_gpu) {
gpu_id = -2
}
if (is(X, "dgCMatrix")) {
require(Matrix)
if (noise_type == 3)
stop("cannot use Gaussian noise on sparse input matrix")
res1 <- .Call('R_train_rfn_sparse', X@x, X@p, X@i, W, P, as.integer(n), as.integer(m), as.integer(n_hidden),
as.integer(n_iter), as.integer(batch_size), etaW, etaP, minP, h_threshold, dropout_rate,
input_noise_rate, l2_weightdecay, l1_weightdecay, momentum, as.integer(noise_type),
as.integer(activation), as.integer(apply_scaling), as.integer(apply_newton_update),
as.integer(seed), as.integer(gpu_id), as.logical(verbose), PACKAGE = 'RFN')
Wout <- .Call('R_calculate_W_sparse', X@x, X@p, X@i, res1$W, res1$P, as.integer(n), as.integer(m),
as.integer(n_hidden), as.integer(activation), as.integer(apply_scaling), h_threshold,
as.integer(gpu_id), PACKAGE = 'RFN')
} else {
res1 <- .Call('R_train_rfn', X, W, P, as.integer(n), as.integer(m), as.integer(n_hidden),
as.integer(n_iter), as.integer(batch_size), etaW, etaP, minP, h_threshold, dropout_rate,
input_noise_rate, l2_weightdecay, l1_weightdecay, momentum, as.integer(noise_type),
as.integer(activation), as.integer(apply_scaling), as.integer(apply_newton_update),
as.integer(seed), as.integer(gpu_id), as.logical(verbose), PACKAGE = 'RFN')
Wout <- .Call('R_calculate_W', X, res1$W, res1$P, as.integer(n), as.integer(m),
as.integer(n_hidden), as.integer(activation), as.integer(apply_scaling), h_threshold,
as.integer(gpu_id), PACKAGE = 'RFN')
}
H <- t(Wout) %*% X
H <- pmax(as.matrix(H), h_threshold)
return(list(W=res1$W, P=res1$P, H=H, Wout=Wout, T=res1$T))
}
#' Displays a picture, such as an MNIST digit or CIFAR image
#' Assumes the image is stored in a rowwise-fashion in the vector x
#' @param x the data to depict, a grayscale channel in \code{showSinglePicture}, or in
#' \code{showSingleColorPicture} it is assumed to be a vector where the first
#' third of the elements describe the red color channel, the next ones
#' the green channel and the last third the blue channel; in \code{showPictures} its interpretation
#' is controlled by the parameter \code{isColor}.
#' @param sidelength the height of the resulting picure in pixels; on \code{NULL}, a square picture
#' is assumed
#' @param col as passed to function \code{image}, see \code{?image}
#' @param ... as passed to function \code{image}, see \code{?image}
#' @return \code{NULL}
#' @rdname showPictures
#' @export
showSinglePicture <- function(x, sidelength=NULL, col=gray(256:1/256), ...) {
if (is.null(sidelength))
sidelength <- floor(sqrt(length(x)) + 0.5)
p <- par(mar = c(0, 0, 0, 0))
# useRaster=TRUE looks much better in Latex
image(t(matrix(x, nrow = sidelength, byrow = TRUE)), col = col,
ylim = c(1, 0), yaxt='n', xaxt='n', ann=FALSE, useRaster=TRUE, ...)
par(p)
invisible(NULL)
}
#' Displays a color picture.
#'
#' Assumes the image is stored in a rowwise-fashion.
#' @param normalize logical, whether \code{x} should be normalized
#' @rdname showPictures
#' @export
showSingleColorPicture <- function(x, sidelength=NULL, normalize=TRUE, ...) {
if (is.null(sidelength))
sidelength <- as.integer(floor(sqrt(length(x)/3) + 0.5))
if (normalize) {
norm01 <- function(x) (x - min(x)) / (max(x) - min(x)+1e-10)
x <- norm01(x)
# conversion function from pylearn2
#norm01 <- function(y) (0.5*(y / max(abs(y)))+0.5)
#r = norm01(x[1:1024])
#b = norm01(x[1025:2048])
#g = norm01(x[2049:3072])
}
ss <- sidelength*sidelength
r = x[1:ss]
g = x[(ss+1):(2*ss)]
b = x[(2*ss+1):(3*ss)]
colvec <- rgb(r, g, b)
colors <- unique(colvec)
colmat <- array(match(colvec, colors), dim=c(sidelength, sidelength))
p <- par(mar = c(0, 0, 0, 0))
image(x = 0:sidelength, y=0:sidelength, z = colmat, col=colors,
ylim = c(sidelength, 0), yaxt='n', xaxt='n', ann=FALSE, useRaster=TRUE)
par(p)
invisible(NULL)
}
#' Shows multiple colored or grayscale pictures
#' @param data contains the pixel data, one picture in each column
#' @param nrow,ncol define the arrangement of the pictures
#' @param idx defines the enumeration of the pictures, i.e. the order in which the columns of
#' \code{data} should be processed; on \code{NULL}, 1,2,3... is assumed
#' @param isColor logical, indicates whether \code{data} is to be interpreted as single channel
#' or 3-way-channel (RGB)
#' @rdname showPictures
#' @export
showPictures <- function(data, nrow, ncol, idx=NULL, isColor=FALSE, ...) {
if (is.null(idx)) {
m <- min(ncol(data), nrow*ncol)
idx <- 1:m
}
# small hack: if we have a sidelength of 28, we probably display
# MNIST digits, which are easier to read with inverted colors
col = gray(0:255/255)
if (floor(sqrt(length(data[, 1])) + 0.5) == 28)
col = gray(255:0/255)
par(mfrow=c(nrow, ncol))
for (i in idx) {
if (isColor)
showSingleColorPicture(data[, i], ...)
else
showSinglePicture(data[, i], col=col, ...)
}
par(mfrow=c(1, 1))
invisible(NULL)
}
#' for testing MKL
#' @param A a matrix
#' @rdname somatcopy
#' @export
somatcopy <- function(A)
{
.Call('somatcopy', nrow(A), ncol(A), A)
}
#' for testing MKL
#' @param A a sparse matrix
#' @param B a matrix
#' @rdname somatcopy
#' @export
gemm <- function(A, B)
{
require(Matrix)
tA <- Matrix::t(A)
C <- .Call('gemm', 'r', 'n', 't', nrow(A), ncol(A), ncol(B), tA@x, tA@i, tA@p, t(B))
attributes(C)$dim <- rev(attributes(C)$dim)
C <- t(C)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.