#' The log likelihood of a \strong{single} observed spectrum given the signature.
#'
#' Assumes that all mutations \code{spectrum} were generated by \code{signature}.
#'
#' @param spectrum A single background spectrum.
#'
#' @param signature A signature as a numeric vector.
#'
#' @param nbinom.size The dispersion parameter for the negative
#' binomial distribution; smaller is more dispersed.
#' See \code{\link[stats]{NegBinomial}}.
#'
#' @keywords internal
#'
LLHOfSignatureGivenSpectrum <- function(spectrum, signature, nbinom.size) {
expected.counts <- sum(spectrum) * signature
LLH <- LLHSpectrumNegBinom(spectrum, expected.counts, nbinom.size)
return(LLH)
}
#' Objective function for estimating a background signature from multiple background spectra.
#'
#' Returns the negative log likelihood
#' that the signature with an
#' estimated negative binomial
#' dispersion parameter,
#' generated the observed (multiple) spectra given the
#' likelihood of the observed total counts
#' attributed to the signature.
#'
#' @keywords internal
#'
#' @param sig.and.nbinom.size Concatenation of signature as a vector and
#' the negative binomial dispersion parameter.For the dispersion parameter for the negative
#' binomial distribution, smaller is more dispersed.
#' See \code{\link[stats]{NegBinomial}}.
#'
#' @param spectra The observed spectra as an \code{\link[ICAMS]{ICAMS}}
#' \code{catalog}.
#'
#' @return -1 * the log likelihood that the the input signature,
#' dispersion parameter, and mutation count generated
#' the observed spectrum
NegLLHOfSignature <- function(sig.and.nbinom.size, spectra) {
len <- length(sig.and.nbinom.size)
nbinom.size <- sig.and.nbinom.size[len]
sig <- sig.and.nbinom.size[-len]
loglh <- 0
for (i in 1:ncol(spectra)) {
loglh.i <-
LLHOfSignatureGivenSpectrum(spectra[ , i, drop = FALSE],
sig,
nbinom.size)
loglh <- loglh + loglh.i
}
return(-1 * loglh)
}
test_g_ineq <- function(est.target.sig.and.b, # Parameters to optimize
obs.spectra,
bg.sig.info ) {
bg.sig.profile <- bg.sig.info[["background.sig"]]
stopifnot("matrix" %in% class(bg.sig.profile))
len.sig <- nrow(bg.sig.profile)
x1 <- est.target.sig.and.b[1:len.sig]
abs(1 - sum(x1))
}
#' Estimate a signature from experimentally exposed spectra minus a background signature.
#'
#' @description
#'
#' We index mutation channels (e.g. \code{ACA > AAA, ACC > AAC, ...}) by \eqn{j}, \eqn{j \in 1...96}.
#'
#' We index input mutational spectra by \eqn{i}.
#'
#' Let
#'
#' \eqn{g = g_1, g_2,\ldots , g_{96}}, with \eqn{\Sigma g_j = 1},
#' be the previously determined, input background signature profile,
#'
#' \eqn{s^i, i \in 1, 2,\ldots} be the input spectra,
#' from exposed samples, usually only 2 or 3,
#'
#' \eqn{b^i, i \in 1, 2,\ldots} be the (to-be-estimated)
#' numbers of mutations due to the background signature in each
#' \eqn{s^i}, and
#'
#' \eqn{t = t_1, t_2,\ldots , t_{96}}, with \eqn{\Sigma t_j = 1},
#' be the (to-be-estimated) target signature due to an exposure.
#'
#' We want to maximize \eqn{\Pi^iP(s^i|b^i,t)P(b^i)} over
#' \eqn{b^1, b^2,\dots} and \eqn{t}. (Note that the code
#' actually minimizes the additive inverse of this.)
#'
#' \eqn{P(b^i)} is estimated from the distribution of previously observed
#' numbers of mutations in untreated samples, with the additional
#' constraint that \eqn{b^i \le |s^i|}), where \eqn{|s^i|} is defined as
#' the total number of mutations in spectrum \eqn{s^i}, i.e.
#' \eqn{|s^i| = \Sigma_j s^i_j}, \eqn{j \in 1...96}.
#'
#' \eqn{P(s^i|b^i,t)} is estimated as follows:
#'
#' The expected number of mutations in each
#' mutation category, \eqn{j}, is estimated as
#'
#' \eqn{e^i_j = g_jb^i + t_j(|s^i| - b^i)}.
#'
#' Then \eqn{P(s^i|e^i)} is estimated as \eqn{\Pi_jP(s^i_j|e^i_j)}.
#'
#' \eqn{P(s^i_j|e^i_j)} is estimated from a negative binomial distribution
#' centered on each \eqn{e^i_j}; see the \code{sig.nbinom.size} elements of
#' the \code{\link{background.info}} package variables.
#'
#' @param spectra The spectra from which to subtract the background,
#' as a matrix or \code{\link[ICAMS]{ICAMS}} catalog.
#'
#' @param bg.sig.info Information about the background signature. See
#' \code{\link{background.info}}.
#'
#' @param m.opts Options to pass to \code{\link[nloptr]{nloptr}}.
#'
#' @param start.b.fraction The estimated fraction of the mutations in
#' \code{spectra} due to the background signature.
#'
#' @details
#' See \code{\link{ObjFn1}}.
#'
#' @return A list with the elements \describe{
#'
#' \item{\code{inferred.target.sig}}{The estimated target signature as a numerical
#' vector.}
#'
#' \item{exposures.to.target.sig}{The estimated total number of mutations due
#' to the target signature in each input spectrum.}
#'
#' \item{\code{exposures.to.bg.sig}}{The estimated total number of mutations due
#' to the background in each input spectrum.}
#'
#' \item{\code{message}}{The \code{message} element of \code{all.opt.ret}.}
#'
#' \item{\code{all.opt.ret}}{The entire return value from the optimization.
#' See \code{\link[nloptr]{nloptr}}}
#' }
#'
#' @export
SeparateSignatureFromBackground <-
function(spectra,
bg.sig.info,
m.opts = NULL,
start.b.fraction = 0.1) {
if (is.null(m.opts)) m.opts <- SeparateSignatureFromBackgroundOptions()
spectra.catalog.type <- attr(spectra, "catalog.type", exact = TRUE)
stopifnot(!is.null(spectra.catalog.type))
sig0 <- MeanOfSpectraAsSig(spectra)$mean.sig
b.x0 <- start.b.fraction * colSums(spectra)
est.target.sig.and.b.x0 <- c(sig0, b.x0)
# Each element of the signature <= 1.
upper.bounds.of.target.sig <- rep(1, nrow(spectra))
ret <- nloptr::nloptr(
x0 = est.target.sig.and.b.x0,
eval_f = ObjFn1,
eval_g_ineq = test_g_ineq,
lb = rep(0, length(est.target.sig.and.b.x0)),
ub = c(upper.bounds.of.target.sig,
colSums(spectra) # The contribution of the background
# should not exceed the total count
# of the spectrum
),
opt = m.opts,
obs.spectra = spectra,
bg.sig.info = bg.sig.info)
soln <- ret$solution
bg.exposure <- soln[(1 + nrow(spectra)):length(soln)]
obs.counts <- colSums(spectra)
sig.to.return <- soln[1:nrow(spectra)]
sum.sig <- sum(sig.to.return)
is.one <- all.equal(1, sum.sig, tolerance = 1e-5)
if (is.character(is.one)) {
message("Sum of elements in inferred signature is not 1; ", is.one)
}
# sum.sig is probably not quite equal to 1
sig.to.return <- sig.to.return / sum.sig
return(list(inferred.target.sig = sig.to.return,
exposures.to.target.sig = obs.counts - bg.exposure,
exposures.to.bg.sig = bg.exposure,
message = ret$message,
all.opt.ret = ret))
}
#' Objective function for \code{\link{SeparateSignatureFromBackground}}.
#'
#' @param est.target.sig.and.b A numeric vector of which elements
#' 1:96 are the signature (as a vector) and the remaining elements
#' are the coefficients for each input spectrum in \code{obs.spectra}.
#'
#' @param obs.spectra The observed spectra, which may be the
#' sum of the activities of the background signature and the signature
#' of an experimental treatment.
#'
#' @param bg.sig.info Information on the background signature. See for example
#' \code{\link{background.info}}, e.g. \code{background.info[["HepG2"]]}.
#'
#' The caller will seek to minimize the value of this function.
#'
#' @keywords internal
#'
ObjFn1 <- function(
est.target.sig.and.b, # Parameters to optimize
obs.spectra,
bg.sig.info
) {
bg.sig.profile <- bg.sig.info[["background.sig"]]
stopifnot("matrix" %in% class(bg.sig.profile))
len.sig <- nrow(bg.sig.profile)
est.target.sig <- est.target.sig.and.b[1:len.sig]
b <- est.target.sig.and.b[(1 + len.sig):length(est.target.sig.and.b)]
# b is a vector of the number of mutations contributed
# by the background to each spectrum
loglh <- 0
for (i in 1:ncol(obs.spectra)) { # For each observed spectrum
obs.spectrum <- obs.spectra[ , i, drop = FALSE]
total.obs.count <- sum(obs.spectrum)
expected.bg.counts <- bg.sig.profile * b[i]
expected.counts <-
expected.bg.counts + (est.target.sig * (total.obs.count - b[i]))
if (FALSE) { # Experimental code, if enabled, does not converge
# (or converges very slowly).
bg.greater.than.spectrum <- which(expected.bg.counts > obs.spectrum)
if (length(bg.greater.than.spectrum) > 0) {
message("x")
return(Inf)
} }
loglh1.i <- LLHSpectrumNegBinom(
spectrum = obs.spectrum,
expected.counts = expected.counts,
nbinom.size = bg.sig.info[["sig.nbinom.size"]]) # This is the dispersion parameter for each channel
loglh2.i <- dnbinom(x = round(b[i]),
mu = bg.sig.info[["count.nbinom.mu"]],
size = bg.sig.info[["count.nbinom.size"]], # This is the dispersion parameter for the number of background signature mutations
log = TRUE)
# We might try a different distribtuion, e.g. draw from
# hist(rgamma(100000, shape = 8, rate = 6) * 1534 / (8 / 6))
# or
# rgamma(100000, shape = 8, rate = 3) * 1534 / (8 / 3))
# rather than
# hist(rnbinom(10000, mu = 1534, size = 3))
# or
# hist(0.1 * rnbinom(10000, mu = 15340, size = 4))
loglh <- loglh + loglh1.i + loglh2.i
}
return(-1 * loglh)
}
Solution2SigVec <- function(solution, sig.number = 96) {
sig.vec <- solution[1:sig.number]
return(sig.vec / sum(sig.vec))
}
Solution2Signature <- function(solution,
sig.number = 96,
ref.genome = NULL,
region = "genome") {
sig <- matrix(Solution2SigVec(solution), ncol = 1)
rownames(sig) <- ICAMS::catalog.row.order[["SBS96"]]
colnames(sig) <- "Inferred.sig"
sig <- ICAMS::as.catalog(
sig, ref.genome = ref.genome,
region = region,
catalog.type = "counts.signature"
)
return(sig)
}
Nloptr2BGMutationCounts <- function(nloptr.retval, sig.number = 96) {
solution <- nloptr.retval$solution
return(solution[(sig.number + 1):length(solution)])
}
Nloptr2Signature <- function(nloptr.retval, sig.number = 96) {
return(Solution2Signature(nloptr.retval$solution, sig.number))
}
#' Return a default value to pass as the \code{m.opts} argument to \code{\link{SeparateSignatureFromBackground}}.
SeparateSignatureFromBackgroundOptions <- function() {
return(
list(algorithm = "NLOPT_LN_COBYLA",
maxeval = 20000,
print_level = 0,
xtol_rel = 0.000001, # 0.0001,)
xtol_abs = 1e-07 # 1e-06
)
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.