coproanalysis: Analyze CoPRO and Related Data

Documented in calculateDispersion

#' Caclulate dispersion
#'
#' Calculates the dispersion of a TSS, defined as the variacne of the discrete probability distribution on the TSS.
#'
#' @param r the vector of reads
#' @param id5 the vector of id5s
#'
#' @return the dispersion of the TSS
#' @export
#'
#' @examples
# readList <- list(reads1 = c(5, 15), reads2 = c(5, 5), reads3 = c(5, 5, 5))
# id5List <- list(ids1 = c(1, 3), ids2 = c(1, 3), ids3 = c(1, 2, 3))
# dispersions <- sapply(1:3, function(i) {calculateDispersion(readList[[i]], id5List[[i]])})
# dispersionsChar <- as.character(paste0("Dispersion = ", round(dispersions, 2)))
# df <- data.frame(id5 = unlist(id5List), reads = unlist(readList), dispersion = rep(dispersionsChar, c(2, 2, 3)))
# ggplot(data = df, mapping = aes(x = id5, y = reads)) + facet_wrap(.~dispersion) + ylim(c(0, 15)) + xlim(c(0.0, 4.0)) + theme_bw() + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank() ) + xlab("Transcription start nucleotide") + ylab("Reads") + geom_segment(xend = df$id5,yend = 0) + geom_point(size = 3, col = "blue")

calculateDispersion <- function(r, id5) {
  totR <- tapply(X = r, INDEX = id5, FUN = sum) %>% as.numeric
  totR <- totR/sum(totR)
  -sum(log(totR, base = 2) * totR)
}