R/04-multigeneCNV.R

Defines functions multiCNVPosterior

Documented in multiCNVPosterior

# Copyright (C) Kevin R. Coombes 2014-2015

########################
# need to figure out what kind of data structures we need for deep sequencing
# of more than one gene
#
# Option 1: a list where each entry is the same input (data.frame) as provided
#           to makeCNVposterior
# Option 2: a data.frame like that provided to makeCNVPosterior but with an
#           extra column for 'Gene' identifier

# key point is that the estimates of normal fraction should borrow strength
# across all genes, but the copy number estimates should be gene-specific.

setClass("MultiCNVPosterior",
         representation=list(
           gposts = "list",
           cps = "CNVPosteriorSummary"
           ))

setValidity("MultiCNVPosterior", function(object) {
  all(unlist(lapply(object@gposts, inherits, what="CNVPosterior")))
})


multiCNVPosterior <- function(obs, prior=new("CNVPrior")) {
  if (inherits(obs, 'data.frame')) { # input option 2
    gcol <- which(colnames(obs) == "Gene")
    if (length(gcol) == 0) { # assume only one gene
      return(makeCNVPosterior(obs, prior))
    } else {
      gnames <- unique(obs$Gene)
      obs <- lapply(gnames, function(g) obs[obs$Gene==g,])
      names(obs) <- gnames
    }
  }
  # at this point, we're using input option #1, with a list of data.frames
  posters <- lapply(obs, makeCNVPosterior, prior=prior)
 # get posterior on nu by combining data over all genes
  nupost <- Reduce("*", lapply(posters, function(po) {
    apply(po@posterior, 1, sum)
  }))
  nupost <- nupost/sum(nupost)
  # nupost is the pdf; also record the cdf
  cdf <- cumsum(nupost)
  # and the mode
  prior <- posters[[1]]@prior
  grid <- prior@grid
  loc <- median(grid[nupost == max(nupost)])
  # and the credible interval
  q05 <- max(grid[cdf < 0.05])
  q95 <- min(grid[cdf > 0.95])
  # now go back and use the posterior distribuion of nu to
  # get posteriors  of the per-gene copy number states
  cn <- lapply(posters, function(po) {
    tmp <- sweep(po@loglike, 1, nupost, "+")
    tmp <- sweep(tmp, 2, log(cnPrior(prior)), "+")
    tmp <- exp(tmp)
    posterior <- tmp/sum(tmp)
    apply(posterior, 2, sum)    
  })
  cn <- as.data.frame(t(as.data.frame(cn)))
  cps <- new("CNVPosteriorSummary", copyNumbers=cn,
             loc=loc, q05=q05, q95=q95,
             grid=grid, cdf=cdf, pdf=nupost)
  new("MultiCNVPosterior", gposts=posters, cps=cps)
}

setMethod("summary", c("MultiCNVPosterior"), function(object, ...) {
  object@cps
})

setMethod("plot", c("MultiCNVPosterior", "missing"), function(x, place="topright", lwd=1, ...) {
  post <- x@cps@pdf
  grid <- x@cps@grid
  plot(grid, post, type="l",
       xlab="Normal Fraction", ylab="Posterior Probability", lwd=lwd, ...)
  invisible(post)
})

Try the DeepCNV package in your browser

Any scripts or data that you put into this service are public.

DeepCNV documentation built on May 2, 2019, 5:23 p.m.