R/negbinom.R
In girafe: Genome Intervals and Read Alignments for Functional Exploration

Documented in addNBSignificance estimateNBParams plotNegBinomFit

## Question: how to assess number of reads per window (function perWindow
##            and other use cases)

## Idea (already mentioned in cisGenome paper and analogous to estimation
##   of Poisson distribution parameters from limited count numbers in
##   association studies):
##  Negative binomial distribution as family of null distributions of
##  frequencies in uninteresting windows (cite CisGenome, BayesPeak)
##  Use observed count frequencies n0, n1, n2 of windows with 0, 1,
##  or 2 aligned reads for estimating the parameters of the negative
##  binomial distribution.
estimateNBParams <- function(n0, n1, n2){
  # build the ratios of these numbers to estimate the parameters:
  q1 <- n1/n0
  q2 <- n2/n1
  # then the estimates of the parameters are as follows:
  # a) dispersion parameter 'size' ('r' in Wikipedia notation)
  size.est <- q1/(2*q2-q1) # dispersion parameter size (or 'r'
  ## the mean 'mu' (or 'lambda' in Wikipedia notation),
  #mu.est <- q1*r.est/(r.est-q1)
  # which is equivalent to:
  mu.est <- q1/(1-2*q2+q1)
  return(list(size=as.vector(size.est), mu=as.vector(mu.est)))
}#estimateNBParams

## in some cases this Neg.bin estimation does not work,
### then use simpler Poisson
estimatePoissonParam <- function(n0, n1, n2){
  # build the ratios of these numbers to estimate the parameters:
  q1 <- n1/n0
  q2 <- n2/n1
  # then the estimates of the parameters are as follows:
  mu.est <- mean(q1, 2*q2)
  return(mu.est)
}#estimatePoissonParam

addNBSignificance <- function(x, estimate="NB.012", correct="none", max.n=10L){
  stopifnot(inherits(x, "slidingWindowSummary"),
            "n.reads" %in% names(x))
  correct  <- match.arg(correct, p.adjust.methods)
  estimate <- match.arg(estimate, c("NB.012", "NB.ML", "Poisson", "Poisson2"))
  ## 'x$n.reads' has to be integer, but maybe is not when weigthing each
  ##  interval count by the number of matches.
  x$n.reads <- round(x$n.reads)
  ## 1st way, using counts of windows with 0, 1, or 2 reads
  if (estimate == "NB.012"){
    tab.n <- table(x$n.reads)
    if (any(is.na(c(tab.n["0"], tab.n["1"], tab.n["2"]))))
      stop("Cannot estimate paramters of Negative-Binomial distribution, because there are no windows with 0, 1, or 2 aligned reads.")
    E <- estimateNBParams(tab.n["0"], tab.n["1"], tab.n["2"])
  }
  if (estimate == "NB.ML"){
    require("MASS")
    F <- suppressWarnings(MASS::fitdistr(x$n.reads[x$n.reads <= max.n],
                                         "negative binomial"))
    E <- as.list(F$estimate)
  }
  if (estimate == "Poisson"){
    tab.n <- table(x$n.reads)
    E <- list(mu=estimatePoissonParam(tab.n["0"], tab.n["1"], tab.n["2"]),
              size=.Machine$integer.max)
  }
  if (estimate == "Poisson2"){
    ## naive Poisson
    E <- list(mu=sum(x$n.reads)/nrow(x),
              size=.Machine$integer.max)
  }
  pval <- suppressWarnings(pnbinom(x$n.reads-1, mu=E$mu, size=E$size,
                                   lower.tail=FALSE))
  if (any(is.nan(pval)))
    stop("Parameter estimation '", estimate,"' failed. Try another method.\n")
  pval <- p.adjust(pval, method=correct)
  x$p.value <- pval
  E$"p.adjust.method" <- correct
  E$type <- estimate
  attr(x, "NBparams") <- E
  return(x)
}#addNBSignificance


#### function for plotting the fit:
plotNegBinomFit <- function(x, breaks=c(-0.5:15.5, 1e8),
                            bar.col=rainbow(2),
                            addLegend=TRUE,
                            legend.names=c("data","background"), ...)
{
  stopifnot(inherits(x, "slidingWindowSummary"),
            length(bar.col)==2,
            !is.null(attr(x, "NBparams")),
            all(c("size","mu") %in% names(attr(x, "NBparams"))))
  
  ## draw random read numbers (as many as in data)
  x.rand <- rnbinom(nrow(x), size=attr(x,"NBparams")$size,
                    mu=attr(x,"NBparams")$mu)
  xcut <- table(cut(x$n.reads, breaks=breaks))
  randcut <- table(cut(x.rand, breaks=breaks))
  mat <- rbind(xcut, randcut)
  barnames <- floor(breaks[-c(1,length(breaks))])
  barnames <- c(barnames, paste(">",barnames[length(barnames)],sep=""))
  barplot(mat, beside=TRUE, names.arg=barnames,
          ylab="Frequency", col=bar.col,...)
  if (addLegend)
    legend(x="topright", fill=bar.col, legend=legend.names)
  invisible(NULL)
}#plotNegBinomFit

# test:
#sel.cols <- brewer.pal(8, "Set2")[c(1,8)]
#plotNegBinomFit(resWins[[1]],xlab="Reads per window", bar.col=sel.cols)

Any scripts or data that you put into this service are public.

girafe documentation built on Nov. 8, 2020, 4:56 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

girafe
Genome Intervals and Read Alignments for Functional Exploration

R/negbinom.R
In girafe: Genome Intervals and Read Alignments for Functional Exploration

Defines functions plotNegBinomFit addNBSignificance estimatePoissonParam estimateNBParams

Documented in addNBSignificance estimateNBParams plotNegBinomFit

Try the girafe package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

girafe Genome Intervals and Read Alignments for Functional Exploration

R/negbinom.R In girafe: Genome Intervals and Read Alignments for Functional Exploration

Defines functions plotNegBinomFit addNBSignificance estimatePoissonParam estimateNBParams

Documented in addNBSignificance estimateNBParams plotNegBinomFit

Try the girafe package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

girafe
Genome Intervals and Read Alignments for Functional Exploration

R/negbinom.R
In girafe: Genome Intervals and Read Alignments for Functional Exploration