R/processMultipleSigSets.R

Defines functions processMultipleSigSets

Documented in processMultipleSigSets

#####################
# internal function #
#####################

#' processMultipleSigSets (internal function)
#'
#' Performs the quadratic programming/exposure prediction for multiple
#' subsets (of size k) of mutational signatures and returns information on the
#' best subset (highest explained variance). This function is used by
#' \code{getBestDecomp4Ksignatures} and \code{addBestSignatureToSubset}.
#'
#' @usage processMultipleSigSets(genome, signatures, sigCombn, k,
#' constrainToMaxContribution=FALSE, tolerance=0.1)
#' @param genome Genome for which to approximate the decomposition.
#' @param signatures The whole set of signatures (from which to choose
#' a subset signatures.
#' @param sigCombn The combinations of subsets of \code{k} signatures to use.
#' Has to be the same format as generated by \code{combn}.
#' @param k Number of signatures to use (subset size).
#' @param constrainToMaxContribution (Optional) [Note: this is experimental
#' and is usually not needed!] If \code{TRUE}, the maximum contribution that
#' can be attributed to a signature will be constraint by the variant feature
#' counts (e.g., specific flanking bases) observed in the individual tumor
#' genome. If, for example, 30\% of all observed variants have a specific
#' feature and 60\% of the variants produced by a mutational process/signature
#' will manifest the feature, then the signature can have contributed up to
#' 0.3/0.6 (=0.5 or 50\%) of the observed variants. The lowest possible
#' contribution over all signature features will be taken as the allowed
#' maximum contribution of the signature. This allowed maximum will
#' additionally be increased by the value specified as \code{tolerance}
#' (see below). For the illustrated example and \code{tolerance}=0.1 a
#' contribution of up to 0.5+0.1 = 0.6 (or 60\%) of the signature would be
#' allowed. 
#' @param tolerance (Optional) If \code{constrainToMaxContribution} is
#' \code{TRUE}, the maximum contribution computed for a signature is increased
#' by this value (see above). If the parameter \code{constrainToMaxContribution}
#' is \code{FALSE}, the tolerance value is ignored. Default: 0.1. 
#' @return A list object containing: k=number of signatures; 
#' explVar=variance explained by these signatures; 
#' sigList=list of the signatures; 
#' decomposition=decomposition (exposures) obtained with these signatures.
#' @author Rosario M. Piro\cr Politecnico di Milano\cr Maintainer: Rosario
#' M. Piro\cr E-Mail: <rmpiro@@gmail.com> or <rosariomichael.piro@@polimi.it>
#' @references \url{http://rmpiro.net/decompTumor2Sig/}\cr
#' Krueger, Piro (2019) decompTumor2Sig: Identification of mutational
#' signatures active in individual tumors. BMC Bioinformatics
#' 20(Suppl 4):152.\cr
#' @keywords internal
processMultipleSigSets <- function(genome, signatures, sigCombn, k,
                                   constrainToMaxContribution=FALSE,
                                   tolerance=0.1) {

    decompTmp <- lapply(sigCombn, function(sigIndices) {

        # signatures to be used
        sigs <- signatures[sigIndices]
                
        # decompose for these signatures
        QPforSig(genome, sigs,
                 constrainToMaxContribution=constrainToMaxContribution,
                 tolerance=tolerance)
    } )

    explVarTmp <- vapply(names(decompTmp), function(sigsNames) {

        # signatures to be used
        sigs <- signatures[unlist(strsplit(sigsNames, "|", fixed=TRUE))]
                
        # determine the explained variance for this decomposition
        computeExplainedVariance(decompTmp[sigsNames], sigs, list(genome))
    }, FUN.VALUE=numeric(1) )
    names(explVarTmp) <- names(decompTmp)

    # which was the best explained variance for this number of signatures?
    explVar <- max(explVarTmp)

    # which decomposition achieved this explained variance? If multiple do,
    # select one at random
    if (length(which(explVarTmp == explVar)) > 1) {
        explVarIndex <- sample(which(explVarTmp == explVar),1)
    } else {
        # only on, using sample would lead to wrong behavior!
        explVarIndex <- which(explVarTmp == explVar)
    }
    
    sigList <-
        unlist(strsplit(names(explVarTmp)[explVarIndex], "|", fixed=TRUE))

    decomposition <- decompTmp[[explVarIndex]]
    names(decomposition) <- sigList

    list(k = k,
         explVar = explVar,
         sigList = sigList,
         decomposition = decomposition)
}
rmpiro/decompTumor2Sig documentation built on May 15, 2022, 3:27 a.m.