R/03-posterior.R
In DeepCNV: Exploiting Normal Contamination to Infer Copy Number from Deep Sequencing

Documented in makeCNVPosterior

# Copyright (C) Kevin R. Coombes 2014-2017

###################################
# Posterior Distribution

# This may be confusing, largely because of the 'hidden' discrete
# prior structure. What I think we actually need is
#    (0) (log)likelihoods for 13 hidden states
#    (1) snp-level (log)likelihoods and posteriors for 3-way CN state
#    (2) gene-level (log)likelihoods and posteriors for 3-way CN state
# Right now,  these are represented by
#    (0) 'hiddenloglike'
#    (1) 'snploglike' and 'snppost'
#    (2) 'loglike' and 'posterior'
setClass("CNVPosterior", 
         representation=list(
             hiddenloglike="array", # grid x hidden x variants
             snploglike="array",    # grid x cnstate x variants
             snppost="array",       # grid x cnstate x variants
             loglike="array",       # grid x cnstate
             posterior="array",     # grid x cnstate
             observed="data.frame",
             prior="CNVPrior"
             ))


computePosterior <- function(loglike, prior) {
  nup <- prior@pdf
  pop <- sweep(loglike, 1, nup, "+")
  # assumes the same answer for pd$Mutation and for pd$SNP
  pop <- sweep(pop, 2, log(cnPrior(prior)), "+")
  # normalize so the sum of the posterior probabilities equals one
  weights <- log( apply(exp(pop), 3, sum) )
  pop<- sweep(pop, 3, weights, '-')
  exp(pop)
}

createPosterior <- function(hll, obs, prior) {
  # so, we are going to compute the one-at-a-time posterior
  # probabilities that Vi=v from the  above stuff and collapse
  # down using the maximum a posteriori (MAP) estimate.  After
  # that, we pretend that the MAP variant types are the truth.
  sll <- array(NA, dim=c(dim(hll)[1], 3, dim(hll)[3]),
               dimnames=list(NULL,
                             cnSet,
                             NULL))
  for (i in 1:dim(hll)[3]) {
    x <- exp(hll[,,i])
    vartype <- as.character(obs$V)[i]
    temp <- lapply(vtSet, function(v) x * pVgT[vartype, v])
    xx <- do.call(cbind, temp)
    temp <- lapply(vtSet, function(v) pGgTS[,,v] * pT[v])
    zz <- do.call(rbind, temp)
    yy <- xx %*% zz
    sll[,,i] <- log(yy)
  }
  # Now 'sll' is the likelihood
  #    Prob(Ki=k, Ni=n | Vbar=vbar, nu=nu0, S=s)
  # where we used, in vbar,  the MAP value of vi for each datarow. We
  # can reapply the beta and copy number priors to start converting
  # these likelihoods into posteriors..
  snppost <- computePosterior(sll, prior)
  # But we're stil not done. need to combine likelihoods across
  # independent variants = datarows
  logprior <- outer(prior@pdf, log(c(0.4, 0.3, 0.3)), "+") # log( P(Theta) )
  loglike <- apply(sll, 1:2, sum)
  lognumer <- loglike + logprior
  M <- max(lognumer)
  temp <- lognumer - M
  foo <- log(sum(exp(temp)))
  logpost <- temp - foo
  posterior <- exp(logpost)
  if (abs(1 - sum(posterior)) > 1e-14) warning("bad scaling")

  
  new("CNVPosterior", 
      loglike=loglike, posterior=posterior,
      snploglike=sll, snppost=snppost,
      hiddenloglike=hll,
      observed=obs, prior=prior)
}

# the 'obs' input must be a data.frame with columns labeled
#      K = number of variant reads
#      N = total number of reads
#      V = type of variant  (SNP or Mutation)
# KRC: should we force this to be an S4-object so we can perform
# validity checking?
makeCNVPosterior <- function(obs, prior) {
  gridPoints <- prior@grid
  hll <- cnvLikelihood(gridPoints, obs) # 'hidden' log likelihood
  # At this point, 'hiddenloglike' is a 3D array of size
  #   gridPoints x genotypes(basecase) x datarows(variants)
  # and it represents the logarithm of
  #     Prob(Ki=k, Ni=n | Gi=g, nu=nu0)
  # We also need something to represent the logarithm of
  #     Prob(Kbar = kbar, Nbar=nbar | Vbar=vbar, nu=nu0, S=s)
  # Since this probability is the product over datarows i, we
  # might take the sum on the log scale
  #    snploglike <- apply(hiddenloglike, 1:3, sum)
  # but this isn't quite correct, since we still need to
  # collapse it for each vector of variantType assignments.
  # The problem is that, with 10 datarows, there are 4^10
  # possible vector assignments to consider, and that's way
  # too many to store and carry around with us.
  #
  createPosterior(hll, obs, prior)
}

#KRC: Why don't we have to export this class to get the 'summary' method for
# 'CNVPosterior' to work?
setClass("CNVPosteriorSummary", 
         representation=list(
           copyNumbers = "data.frame",
           loc="numeric", q05="numeric", q95="numeric",
           grid="numeric", cdf='numeric', pdf='numeric'))

setMethod("as.data.frame", c("CNVPosteriorSummary"), function (x, row.names = NULL, optional = FALSE, ...) {
    data.frame(x@copyNumbers, loc=x@loc, q05=x@q05, q95=x@q95)
})

setMethod("summary", c("CNVPosterior"), function(object, ...) {
  # integrate out nu to get the copy number posteriors
  cnpost <- as.data.frame(as.list(apply(object@posterior, 2, sum)))
  # separately, have to work out the posterior on nu
  # start by integrating out the copy number state
  tp <- apply(object@posterior, 1, sum)
  # find the mode
  grid <- object@prior@grid
  loc <- median(grid[tp == max(tp)])
  cdf <- cumsum(tp)/sum(tp)
  # TODO: Handle (literal) edge cases where MAP estimate is at
  # one end of the interval
  q05 <- round(max(grid[cdf < 0.05]), 3)
  q95 <- round(min(grid[cdf > 0.95]), 3)
  new("CNVPosteriorSummary", copyNumbers=cnpost,
      loc=loc, q05=q05, q95=q95,
      grid=grid, cdf=cdf, pdf=tp)
})

setMethod("show", c("CNVPosteriorSummary"), function(object) {
  cat("Most likely normal fraction =", object@loc,
      "\nNinety percent credible interval = (", object@q05, ',', object@q95, ")\n")
  print(object@copyNumbers)
  invisible(object)
})


setMethod("plot", c("CNVPosterior","missing"), function(x, place="topright", lwd=1, ...) {
  post <- x@snppost
  grid <- x@prior@grid
  plot(grid, post[,"Normal",1], type="n", xlim=c(0,1),
       ylim=range(post), 
       xlab="Normal Fraction", ylab="Posterior Probability",
       lwd=lwd, ...)
  n <- dim(post)[3]
  c1p <- colorRampPalette(c("blue", "cyan"))(n)
  c2p <- colorRampPalette(c("red", "magenta"))(n)
  c3p <- colorRampPalette(c("green", "yellow"))(n)
  for (i in 1:n) {
    lines(grid, post[,"Normal",i], col=c1p[i], lwd=lwd)
    lines(grid, post[,"Deleted",i], col=c2p[i], lwd=lwd)
    lines(grid, post[,"Gained",i], col=c3p[i], lwd=lwd)
  }
  legend(place, c("Deleted", "Normal", "Gained"), lwd=2,
         col=c("red", "blue", "green"))
  invisible(x)
})

setMethod("hist", c("CNVPosterior"), function(x, place="topright", lwd=1, ...) {
  post <- x@posterior
  grid <- x@prior@grid
  plot(grid, post[,"Deleted"], col="red", type="l",
       ylim=range(c(post[,"Deleted"], post[,"Gained"], post[,"Normal"])), 
       xlab="Normal Fraction", ylab="Posterior Probability", lwd=lwd, ...)
  lines(grid, post[,"Normal"], col="blue", lwd=lwd)
  lines(grid, post[,"Gained"], col="green", lwd=lwd)
  legend(place, c("Deleted", "Normal", "Gained"), lwd=2,
         col=c("red", "blue", "green"))
  invisible(post)
})

setMethod("image", c("CNVPosterior"), function(x, ...) {
  post <- x@posterior
  grid <- x@prior@grid
  image(grid, 1:3, post, ylab='', yaxt='n', ...)
  mtext(colnames(post), side=2, at=1:3)
  invisible(post)
})

############
# update the Bayesian analysis by applying a different prior
# Note that we stole the 'update' method from the idea of
# refitting frequentist models.

setMethod("update", c("CNVPosterior"), function(object, prior, ...) {
  # this simply repeats the code from 'makeCNVPosterior'
  # should really refactor it as a function call...
  createPosterior(object@hiddenloglike, 
                  object@observed,
                  prior)
})

Any scripts or data that you put into this service are public.

DeepCNV documentation built on May 2, 2019, 5:23 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

DeepCNV
Exploiting Normal Contamination to Infer Copy Number from Deep Sequencing

R/03-posterior.R
In DeepCNV: Exploiting Normal Contamination to Infer Copy Number from Deep Sequencing

Defines functions computePosterior createPosterior makeCNVPosterior

Documented in makeCNVPosterior

Try the DeepCNV package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

DeepCNV Exploiting Normal Contamination to Infer Copy Number from Deep Sequencing

R/03-posterior.R In DeepCNV: Exploiting Normal Contamination to Infer Copy Number from Deep Sequencing

Defines functions computePosterior createPosterior makeCNVPosterior

Documented in makeCNVPosterior

Try the DeepCNV package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

DeepCNV
Exploiting Normal Contamination to Infer Copy Number from Deep Sequencing

R/03-posterior.R
In DeepCNV: Exploiting Normal Contamination to Infer Copy Number from Deep Sequencing