#' Extract mutational signatures from trinucleotide context.
#' @description Decompose a matrix of 96 substitution classes into \code{n} signatures.
#' @details This function decomposes a non-negative matrix into n signatures.
#' @param mat Input matrix of diemnsion nx96 generated by \code{\link{trinucleotideMatrix}}
#' @param n decompose matrix into n signatures. Default NULL. Tries to predict best value for \code{n} by running NMF on a range of values and chooses based on cophenetic correlation coefficient.
#' @param plotBestFitRes plots consensus heatmap for range of values tried. Default FALSE
#' @param parallel Default 4. Number of cores to use.
#' @param pConstant A small positive value to add to the matrix. Use it ONLY if the functions throws an \code{non-conformable arrays} error
#' @return a list with decomposed scaled signatures, signature contributions in each sample and NMF object.
#' @examples
#' \dontrun{
#' laml.maf <- system.file("extdata", "tcga_laml.maf.gz", package = "maftools")
#' laml <- read.maf(maf = laml.maf)
#' laml.tnm <- trinucleotideMatrix(maf = laml, ref_genome = 'BSgenome.Hsapiens.UCSC.hg19', prefix = 'chr',
#' add = TRUE, useSyn = TRUE)
#' library("NMF")
#' laml.sign <- extractSignatures(mat = laml.tnm, plotBestFitRes = FALSE, n = 2, pConstant = 0.01)
#' }
#' @seealso \code{\link{trinucleotideMatrix}} \code{\link{plotSignatures}} \code{\link{compareSignatures}}
#' @export
extractSignatures = function(mat, n = NULL, plotBestFitRes = FALSE, parallel = 4, pConstant = NULL){
#suppressPackageStartupMessages(require(NMF, quietly = TRUE))
#transpose matrix
start_time = proc.time()
mat = t(mat$nmf_matrix)
zeroMutClass = names(which(rowSums(mat) == 0))
message('-Found zero mutations for conversions:')
for(temp in zeroMutClass){
message(paste0(" ", temp))
#Add small value to avoid zero counts (maybe not appropriate). This happens when sample size is low or in cancers with low mutation rate.
#mat[which(rowSums(mat) == 0),] = 0.1
#To avoid error due to non-conformable arrays
if(pConstant < 0 | pConstant == 0){
stop("pConstant must be > 0")
mat = mat+pConstant
#Available methods for nmf decompositions are 'brunet', 'lee', 'ls-nmf', 'nsNMF', 'offset'.
#But based 21 breast cancer signatures data, defualt brunet seems to be working close to the results.
#Sticking with default for now.
message(paste0('-Running NMF for factorization rank: ', n))
conv.mat.nmf = NMF::nmf(x = mat, rank = n, .opt = paste0('P', parallel), seed = 123456)
conv.mat.nmf = NMF::nmf(x = mat, rank = n, seed = 123456)
w = NMF::basis(conv.mat.nmf)
w = apply(w, 2, function(x) x/sum(x)) #Scale the signatures (basis)
colnames(w) = paste('Signature', 1:ncol(w),sep='_')
h = NMF::coef(conv.mat.nmf)
colnames(h) = colnames(mat) #correct colnames (seems to be mssing with low mutation load)
#For single signature, contribution will be 100% per sample
if(n == 1){
h = h/h
rownames(h) = paste('Signature', '1', sep = '_')
h = apply(h, 2, function(x) x/sum(x)) #Scale contributions (coefs)
rownames(h) = paste('Signature', 1:nrow(h),sep='_')
message("-Finished in",data.table::timetaken(start_time))
return(list(signatures = w, contributions = h, nmfObj = conv.mat.nmf))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.