Nothing
## ----pkgmaker_preamble, echo=FALSE, results='asis'----------------------------
library(NMF)
latex_preamble()
if(!requireNamespace("Biobase")) BiocManager::install("Biobase")
## ----bibliofile, echo=FALSE, results='asis'-----------------------------------
latex_bibliography('NMF')
## ----options, echo=FALSE------------------------------------------------------
set.seed(123456)
library(knitr)
# Helper functions:
hook_try <- function(before, options, envir){
.try_defined <- FALSE
# remove hacked version of try
if( !before ){
if( .try_defined && exists('try', envir = envir, inherits = FALSE) ){
remove(list = 'try', envir = envir)
}
.try_defined <<- FALSE
return(invisible())
}
if( !is.null(options$try) ){
# signal
do.signal <- isFALSE(options$try)
if( isManualVignette() && isTRUE(options$try) ){
do.signal <- TRUE
}
# define hacked version of try()
.try <- try_message(do.signal)
assign('try', .try, envir)
.try_defined <<- TRUE
}
}
chunkOutputHook <- function(name, hook, type = c('output', 'source', 'chunk')){
type <- match.arg(type)
function(){
if( !requireNamespace('knitr', quietly = TRUE) )
stop("Package 'knitr' is required to setup knit hook '", name, "'")
.hook_bkp <- NULL
function(before, options, envir){
# do nothing if the option is not ON
if( is.null(options[[name]]) ) return()
# set/unset hook
if( before ){
# store current hook function
if( is.null(.hook_bkp) ) .hook_bkp <<- knitr::knit_hooks$get(type)
# define hook wrapper
hook_wrapper <- function(x, options){
res <- .hook_bkp(x, options)
hook(res, options)
}
args <- list()
args[[type]] <- hook_wrapper
do.call(knitr::knit_hooks$set, args)
}else{
args <- list()
args[[type]] <- .hook_bkp
do.call(knitr::knit_hooks$set, args)
.hook_bkp <<- NULL
}
}
}
}
hook_backspace <- chunkOutputHook('backspace',
function(x, options){
if( !isTRUE(options$backspace) ) x
str_bs(x)
}
)
str_bs <- function(x){
# remove leading backspaces
x <- gsub("^\b+", "", x)
# remove backspaces at beginning of line
x <- gsub("\n\b+", '\n', x)
while( length(grep('\b', x, fixed = TRUE)) )
x <- gsub('[^\n\b][\b]', '', x)
x
}
isManualVignette <- function(){
isTRUE(getOption('R_RUNNING_MANUAL_VIGNETTE'))
}
try_message <- function(signal = FALSE){
function(expr){
tryCatch(expr, error = function(e){
if( signal ) message(e)
else message('Error: ', conditionMessage(e))
invisible()
})
}
}
knit_hooks$set(try = hook_try, backspace = hook_backspace())
## ----load_library, echo=FALSE, include=FALSE----------------------------------
# Load
library(NMF)
# limit number of cores used
nmf.options(cores = 2)
## ----load_library_fake, eval=FALSE--------------------------------------------
# # Install
# install.packages('NMF')
# # Load
# library(NMF)
## ----updateObject, eval=FALSE-------------------------------------------------
# # eg., load from some RData file
# load('object.RData')
# # update class definition
# object <- nmfObject(object)
## ----features, echo=FALSE-----------------------------------------------------
nalgo <- length(nmfAlgorithm())
nseed <- length(nmfSeed())
## ----nmfAlgorithm-------------------------------------------------------------
# list all available algorithms
nmfAlgorithm()
# retrieve a specific algorithm: 'brunet'
nmfAlgorithm('brunet')
# partial match is also fine
identical(nmfAlgorithm('br'), nmfAlgorithm('brunet'))
## ----nmfSeed------------------------------------------------------------------
# list all available seeding methods
nmfSeed()
# retrieve a specific method: 'nndsvd'
nmfSeed('nndsvd')
# partial match is also fine
identical(nmfSeed('nn'), nmfSeed('nndsvd'))
## ----show_Rversions-----------------------------------------------------------
nmfAlgorithm(all=TRUE)
# to get all the algorithms that have a secondary R version
nmfAlgorithm(version='R')
## ----perftable_setup, cache=TRUE----------------------------------------------
# retrieve all the methods that have a secondary R version
meth <- nmfAlgorithm(version='R')
meth <- c(names(meth), meth)
meth
if(requireNamespace("Biobase", quietly=TRUE)){
# load the Golub data
data(esGolub)
# compute NMF for each method
res <- nmf(esGolub, 3, meth, seed=123456)
# extract only the elapsed time
t <- sapply(res, runtime)[3,]
}
## ----perftable, echo=FALSE, results='asis'------------------------------------
# speed-up
m <- length(res)/2
su <- cbind( C=t[1:m], R=t[-(1:m)], Speed.up=t[-(1:m)]/t[1:m])
library(xtable)
xtable(su, caption='Performance speed up achieved by the optimized C++ implementation for some of the NMF algorithms.', label='tab:perf')
## ----citations, eval=FALSE----------------------------------------------------
# # plain text
# citation('NMF')
#
# # or to get the bibtex entries
# #toBibtex(citation('NMF'))
## ----esGolub------------------------------------------------------------------
if(requireNamespace("Biobase", quietly=TRUE)){
data(esGolub)
esGolub
esGolub <- esGolub[1:200,]
# remove the uneeded variable 'Sample' from the phenotypic data
esGolub$Sample <- NULL
}
## ----algo_default, cache=TRUE-------------------------------------------------
if(requireNamespace("Biobase", quietly=TRUE)){
# default NMF algorithm
res <- nmf(esGolub, 3)
}
## ----single_show--------------------------------------------------------------
if(requireNamespace("Biobase", quietly=TRUE)){
res
}
## ----single_show_model--------------------------------------------------------
if(requireNamespace("Biobase", quietly=TRUE)){
fit(res)
}
## ----single_show_estimate-----------------------------------------------------
if(requireNamespace("Biobase", quietly=TRUE)){
V.hat <- fitted(res)
dim(V.hat)
}
## ----singlerun_summary--------------------------------------------------------
if(requireNamespace("Biobase", quietly=TRUE)){
summary(res)
# More quality measures are computed, if the target matrix is provided:
summary(res, target=esGolub)
}
## ----singlerun_summary_factor-------------------------------------------------
if(requireNamespace("Biobase", quietly=TRUE)){
summary(res, class=esGolub$Cell)
}
## ----get_matrices-------------------------------------------------------------
if(requireNamespace("Biobase", quietly=TRUE)){
# get matrix W
w <- basis(res)
dim(w)
# get matrix H
h <- coef(res)
dim(h)
}
## ----subset-------------------------------------------------------------------
if(requireNamespace("Biobase", quietly=TRUE)){
# keep only the first 10 features
res.subset <- res[1:10,]
class(res.subset)
dim(res.subset)
# keep only the first 10 samples
dim(res[,1:10])
# subset both features and samples:
dim(res[1:20,1:10])
}
## ----single_extract-----------------------------------------------------------
if(requireNamespace("Biobase", quietly=TRUE)){
# only compute the scores
s <- featureScore(res)
summary(s)
# compute the scores and characterize each metagene
s <- extractFeatures(res)
str(s)
}
## ----algo_list----------------------------------------------------------------
nmfAlgorithm()
## ----algo_lee, cache=TRUE-----------------------------------------------------
if(requireNamespace("Biobase", quietly=TRUE)){
# using Lee and Seung's algorithm
res <- nmf(esGolub, 3, 'lee')
algorithm(res)
}
## ----algo_ns, cache=TRUE------------------------------------------------------
if(requireNamespace("Biobase", quietly=TRUE)){
# using the Nonsmooth NMF algorithm with parameter theta=0.7
res <- nmf(esGolub, 3, 'ns', theta=0.7)
algorithm(res)
fit(res)
}
## ----algo_pe, cache=TRUE------------------------------------------------------
if(requireNamespace("Biobase", quietly=TRUE)){
# using the PE-NMF algorithm with parameters alpha=0.01, beta=1
res <- nmf(esGolub, 3, 'pe', alpha=0.01, beta=1)
res
}
## ----seed_list----------------------------------------------------------------
nmfSeed()
## ----seed, cache=TRUE---------------------------------------------------------
if(requireNamespace("Biobase", quietly=TRUE)){
res <- nmf(esGolub, 3, seed='nndsvd')
res
}
## ----seed_numeric, cache=TRUE-------------------------------------------------
if(requireNamespace("Biobase", quietly=TRUE)){
# single run and single numeric seed
res <- nmf(esGolub, 3, seed=123456)
showRNG(res)
# multiple runs and single numeric seed
res <- nmf(esGolub, 3, seed=123456, nrun=2)
showRNG(res)
# single run with a 6-length seed
res <- nmf(esGolub, 3, seed=rep(123456, 6))
showRNG(res)
}
## ----seed_WH------------------------------------------------------------------
if(requireNamespace("Biobase", quietly=TRUE)){
# initialize a "constant" factorization based on the target dimension
init <- nmfModel(3, esGolub, W=0.5, H=0.3)
head(basis(init))
# fit using this NMF model as a seed
res <- nmf(esGolub, 3, seed=init)
}
## ----algo_multirun, cache=TRUE------------------------------------------------
if(requireNamespace("Biobase", quietly=TRUE)){
res.multirun <- nmf(esGolub, 3, nrun=5)
res.multirun
}
## ----multirun_keep, cache=TRUE------------------------------------------------
if(requireNamespace("Biobase", quietly=TRUE)){
# explicitly setting the option keep.all to TRUE
res <- nmf(esGolub, 3, nrun=5, .options=list(keep.all=TRUE))
res
}
## ----multirun_keep_alt, eval=FALSE--------------------------------------------
# if(requireNamespace("Biobase", quietly=TRUE)){
# # or using letter code 'k' in argument .options
# nmf(esGolub, 3, nrun=5, .options='k')
# }
## ----parallel_multicore_alt, eval=FALSE---------------------------------------
# if(requireNamespace("Biobase", quietly=TRUE)){
# # the default call will try to run in parallel using all the cores
# # => will be in parallel if all the requirements are satisfied
# nmf(esGolub, 3, nrun=5, .opt='v')
#
# # request a certain number of cores to use => no error if not possible
# nmf(esGolub, 3, nrun=5, .opt='vp8')
#
# # force parallel computation: use option 'P'
# nmf(esGolub, 3, nrun=5, .opt='vP')
#
# # require an improbable number of cores => error
# nmf(esGolub, 3, nrun=5, .opt='vP200')
# }
## ----mpi, eval=FALSE----------------------------------------------------------
# # file: mpi.R
#
# if(requireNamespace("Biobase", quietly=TRUE)){
# ## 0. Create and register an MPI cluster
# library(doMPI)
# cl <- startMPIcluster()
# registerDoMPI(cl)
# library(NMF)
#
# # run on all workers using the current parallel backend
# data(esGolub)
# res <- nmf(esGolub, 3, 'brunet', nrun=n, .opt='p', .pbackend=NULL)
#
# # save result
# save(res, file='result.RData')
#
# ## 4. Shutdown the cluster and quit MPI
# closeCluster(cl)
# mpi.quit()
# }
## ----force_seq, cache=TRUE, backspace = TRUE----------------------------------
if(requireNamespace("Biobase", quietly=TRUE)){
# parallel execution on 2 cores (if possible)
res1 <- nmf(esGolub, 3, nrun=5, .opt='vp2', seed=123)
# or use the doParallel with single core
res2 <- nmf(esGolub, 3, nrun=5, .opt='vp1', seed=123)
# force sequential computation by sapply: use option '-p' or .pbackend=NA
res3 <- nmf(esGolub, 3, nrun=5, .opt='v-p', seed=123)
res4 <- nmf(esGolub, 3, nrun=5, .opt='v', .pbackend=NA, seed=123)
# or use the SEQ backend of foreach: .pbackend='seq'
res5 <- nmf(esGolub, 3, nrun=5, .opt='v', .pbackend='seq', seed=123)
# all results are all identical
nmf.equal(list(res1, res2, res3, res4, res5))
}
## ----estimate_rank, cache=TRUE------------------------------------------------
if(requireNamespace("Biobase", quietly=TRUE)){
# perform 10 runs for each value of r in range 2:6
estim.r <- nmf(esGolub, 2:6, nrun=10, seed=123456)
}
## ----estimate_rank_plot, fig.width=10, fig.height=6---------------------------
if(requireNamespace("Biobase", quietly=TRUE)){
plot(estim.r)
}
## ----estimate_rank_hm_include, fig.width=14, fig.height=7, fig.keep='last'----
if(requireNamespace("Biobase", quietly=TRUE)){
consensusmap(estim.r, annCol=esGolub, labCol=NA, labRow=NA)
}
## ----estimate_rank_random, cache=TRUE, fig.width=10, fig.height=6, fig.keep='last'----
if(requireNamespace("Biobase", quietly=TRUE)){
# shuffle original data
V.random <- randomize(esGolub)
# estimate quality measures from the shuffled data (use default NMF algorithm)
estim.r.random <- nmf(V.random, 2:6, nrun=10, seed=123456)
# plot measures on same graph
plot(estim.r, estim.r.random)
}
## ----multimethod, cache=TRUE--------------------------------------------------
if(requireNamespace("Biobase", quietly=TRUE)){
# fit a model for several different methods
res.multi.method <- nmf(esGolub, 3, list('brunet', 'lee', 'ns'), seed=123456, .options='t')
}
## ----compare------------------------------------------------------------------
if(requireNamespace("Biobase", quietly=TRUE)){
compare(res.multi.method)
# If prior knowledge of classes is available
compare(res.multi.method, class=esGolub$Cell)
}
## ----errorplot_compute, cache=TRUE--------------------------------------------
if(requireNamespace("Biobase", quietly=TRUE)){
# run nmf with .option='t'
res <- nmf(esGolub, 3, .options='t')
# or with .options=list(track=TRUE)
}
## ----errorplot, out.width="0.5\\textwidth", fig.show='hold'-------------------
if(requireNamespace("Biobase", quietly=TRUE)){
plot(res)
plot(res.multi.method)
}
## ----heatmap_coef_basis_inc, fig.width=14, fig.height=7, fig.keep='last'------
if(requireNamespace("Biobase", quietly=TRUE)){
layout(cbind(1,2))
# basis components
basismap(res, subsetRow=TRUE)
# mixture coefficients
coefmap(res)
}
## ----heatmap_consensus_inc, out.width="0.49\\textwidth", crop=TRUE, echo=1:2----
if(requireNamespace("Biobase", quietly=TRUE)){
# The cell type is used to label rows and columns
consensusmap(res.multirun, annCol=esGolub, tracks=NA)
plot(1:10)
f2 <- fig_path("2.pdf")
}
## ----hack_consensus, include=FALSE--------------------------------------------
if(requireNamespace("Biobase", quietly=TRUE)){
file.copy('consensus.pdf', f2, overwrite=TRUE)
}
## ----custom_algo_sig----------------------------------------------------------
my.algorithm <- function(x, seed, param.1, param.2){
# do something with starting point
# ...
# return updated starting point
return(seed)
}
## ----custom_algo--------------------------------------------------------------
my.algorithm <- function(x, seed, scale.factor=1){
# do something with starting point
# ...
# for example:
# 1. compute principal components
pca <- prcomp(t(x), retx=TRUE)
# 2. use the absolute values of the first PCs for the metagenes
# Note: the factorization rank is stored in object 'start'
factorization.rank <- nbasis(seed)
basis(seed) <- abs(pca$rotation[,1:factorization.rank])
# use the rotated matrix to get the mixture coefficient
# use a scaling factor (just to illustrate the use of extra parameters)
coef(seed) <- t(abs(pca$x[,1:factorization.rank])) / scale.factor
# return updated data
return(seed)
}
## ----define_V-----------------------------------------------------------------
n <- 50; r <- 3; p <- 20
V <-syntheticNMF(n, r, p)
## ----custom_algo_run----------------------------------------------------------
nmf(V, 3, my.algorithm, scale.factor=10)
## ----custom_algo_run_obj------------------------------------------------------
# based on Kullback-Leibler divergence
nmf(V, 3, my.algorithm, scale.factor=10, objective='KL')
# based on custom distance metric
nmf(V, 3, my.algorithm, scale.factor=10
, objective=function(model, target, ...){
( sum( (target-fitted(model))^4 ) )^{1/4}
}
)
## ----custom_algo_run_mixed, error = TRUE, try = TRUE--------------------------
# put some negative input data
V.neg <- V; V.neg[1,] <- -1;
# this generates an error
try( nmf(V.neg, 3, my.algorithm, scale.factor=10) )
# this runs my.algorithm without error
nmf(V.neg, 3, my.algorithm, mixed=TRUE, scale.factor=10)
## ----nmf_models---------------------------------------------------------------
nmfModel()
## ----custom_algo_NMFoffset----------------------------------------------------
my.algorithm.offset <- function(x, seed, scale.factor=1){
# do something with starting point
# ...
# for example:
# 1. compute principal components
pca <- prcomp(t(x), retx=TRUE)
# retrieve the model being estimated
data.model <- fit(seed)
# 2. use the absolute values of the first PCs for the metagenes
# Note: the factorization rank is stored in object 'start'
factorization.rank <- nbasis(data.model)
basis(data.model) <- abs(pca$rotation[,1:factorization.rank])
# use the rotated matrix to get the mixture coefficient
# use a scaling factor (just to illustrate the use of extra parameters)
coef(data.model) <- t(abs(pca$x[,1:factorization.rank])) / scale.factor
# 3. Compute the offset as the mean expression
data.model@offset <- rowMeans(x)
# return updated data
fit(seed) <- data.model
seed
}
## ----custom_algo_NMFOffset_run------------------------------------------------
# run custom algorithm with NMF model with offset
nmf(V, 3, my.algorithm.offset, model='NMFOffset', scale.factor=10)
## ----custom_seed--------------------------------------------------------------
# start: object of class NMF
# target: the target matrix
my.seeding.method <- function(model, target){
# use only the largest columns for W
w.cols <- apply(target, 2, function(x) sqrt(sum(x^2)))
basis(model) <- target[,order(w.cols)[1:nbasis(model)]]
# initialize H randomly
coef(model) <- matrix(runif(nbasis(model)*ncol(target))
, nbasis(model), ncol(target))
# return updated object
return(model)
}
## ----custom_seed_run----------------------------------------------------------
nmf(V, 3, 'snmf/r', seed=my.seeding.method)
## ----options_algo, eval=1:6---------------------------------------------------
#show default algorithm and seeding method
nmf.options('default.algorithm', 'default.seed')
# retrieve a single option
nmf.getOption('default.seed')
# All options
nmf.options()
## ----print_options------------------------------------------------------------
nmf.printOptions()
## ----sessionInfo, echo=FALSE, results='asis'----------------------------------
toLatex(sessionInfo())
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.