R/getPriors.R
In baySeq: Empirical Bayesian analysis of patterns of differential expression in count data

# modification on git from copied files
`getPriors.NB` <-
function (cD, samplesize = 1e5, samplingSubset = NULL, equalDispersions = TRUE, estimation = "QL", verbose = TRUE, zeroML = FALSE, consensus = FALSE, cl, ...)
{
  if(!inherits(cD, what = "countData"))
    stop("variable 'cD' must be of or descend from class 'countData'")

  if(!("libsizes" %in% names(cD@sampleObservables)))
    {
      warning("no library sizes available; inferring libsizes using default settings")
      cD@sampleObservables$libsizes <- getLibsizes(cD)
    }

  if(estimation == "ML") stop("Maximum likelihood estimation with getPriors.NB has been removed. Use quasi-likelihood estimation, or the getPriors function.")
  
  if(verbose) message("Finding priors...", appendLF = FALSE)
  
  optimoverPriors <- function(x, estimation, replicates, groups, libsizes, equalDispersions, lensameFlag, zeroML, consensus, disp)
    {
      dispalt <- function(dispersion, cts, mus, libsizes, len)
        abs(2 * (sum(cts[cts > 0] * log(cts[cts > 0]/(mus[cts > 0] * libsizes[cts > 0] * len[cts > 0])), na.rm = TRUE) -
                 sum((cts + dispersion^-1) * log((cts + dispersion^-1) / (mus * libsizes * len + dispersion^-1)), na.rm = TRUE)) - (length(cts[!is.na(cts)])-1))
      mualt <- function(mu, cts, dispersion, libsizes, len)
        sum(dnbinom(cts, size = 1/ dispersion, mu = mu * libsizes * len, log = TRUE), na.rm = TRUE)

      muZeros <- function(mu, cts, dispersion, libsizes, len)
        {
          if(mu == 0) return(NA) else abs(sum(pnbinom(cts, mu = mu * libsizes * len, size = 1 / dispersion, log.p = TRUE), na.rm = TRUE) - log(0.5))
        }
      
      dispML <- function(alphas, replicates, y, libsizes, seglen)
        {
          if(any(alphas[-1] < 0) | alphas[1] <= 0) return(NA)
          
          dispersion <- alphas[1]
          
          mus <- alphas[-1]
          
          sum(sapply(levels(replicates), function(unqrep) sum(dnbinom(y[replicates == unqrep], size = 1 / dispersion,
                                                                      mu = mus[levels(replicates) == unqrep] * libsizes[levels(replicates) == unqrep] * seglen[levels(replicates) == unqrep], log = TRUE), na.rm = TRUE)))
        }

      findDisp.QL <- function(repdata)
        {
          replicates <- repdata$replicates
          cts <- repdata$cts
          len <- as.double(repdata$len)
          libsizes <- as.double(repdata$libsizes)
          searchReps <- levels(replicates)[levels(replicates) %in% replicates]
          
          if(all(cts == 0) | (sum(cts > 0) == 1 & sum(replicates %in% replicates[cts > 0]) == 1))
            return(1e-100)

          repmus <- sapply(searchReps, function(rep) return(mean(cts[replicates == rep] / (libsizes * len)[replicates == rep])))

          mus <- c()

          z <- unlist(lapply(searchReps, function(rep) rbind(which(replicates == rep), repmus[searchReps == rep])))
          
          musmat <- matrix(unlist(z), ncol = 2, byrow = TRUE)

          mus[musmat[,1]] <- musmat[,2]

          newmus <- repmus
          newdisp <- 0
          for(ii in 1:1000) {
            lowInt <- 0
            disp <- optimise(dispalt, interval = c(lowInt, 1e2), cts = cts, mus = mus, libsizes = libsizes, len = len, tol = 1e-50)$minimum

            newdisp <- c(newdisp, disp)
            repmus <- rep(0, length(searchReps))
            nz <- sapply(searchReps, function(rep) any(cts[replicates == rep] != 0))
            repmus[nz] <- sapply(searchReps[nz], function(rep) optimise(mualt, interval = c(0,  max(cts[replicates == rep] / libsizes[replicates == rep], na.rm = TRUE) * 2), cts = cts[replicates == rep], dispersion = disp, libsizes = libsizes[replicates == rep], len = len[replicates == rep], tol = 1e-50, maximum = TRUE)$maximum)

            musmat <- matrix(unlist(lapply(searchReps, function(rep) rbind(which(replicates == rep), repmus[searchReps == rep]))), ncol = 2, byrow = TRUE)
            mus[musmat[,1]] <- musmat[,2]

            newmus <- rbind(newmus, repmus)
            
            if(any(abs(newdisp[-length(newdisp)] - disp) < 1e-50))
              {
                disp <- mean(newdisp[which(abs(newdisp[-length(newdisp)] - disp) < 1e-50):(length(newdisp) - 1)])
                break
              }
          }
          
          disp
        }

      findDisp.ML <- function(repdata)
        {
          replicates <- repdata$replicates
          cts <- repdata$cts
          len <- repdata$len
          libsizes <- as.double(repdata$libsizes)
          searchReps <- levels(replicates)[levels(replicates) %in% replicates]
          
          if(all(cts == 0) | (sum(cts > 0) == 1 & sum(replicates %in% replicates[cts == 1]) == 1))
            return(1e-100)


          repmus <- sapply(searchReps, function(rep) mean(cts[replicates == rep] / (libsizes * len)[replicates == rep]))
          
          disp <- optim(par = c(1, repmus),
                fn = dispML, control = list(fnscale = -1), 
                y = cts, replicates = replicates, libsizes = libsizes, seglen = len)$par[1]

          disp
        }
  
      if(lensameFlag)
        {
          cts <- x[-1]
          len <- x[1]
        } else {
          cts <- x[-(1:(length(x) / 2))]
          len <- x[1:(length(x) / 2)]
        }
      if(length(len) == 1) len <- rep(len, length(cts))

      getMu <- function(samples, disp) {
        if(all(cts[samples] == 0, na.rm = TRUE) & zeroML) {
          return(0)
        } else if(all(cts[samples] == 0, na.rm = TRUE)) {
          return(
                 optimize(muZeros, interval = c(0, max(1 / libsizes[samples] * len[samples], na.rm = TRUE)),
                          cts = cts[which(samples)], dispersion = 1, libsizes = libsizes[which(samples)], len = len[which(samples)], tol = 1e-50, maximum = FALSE)$minimum
                 )
        } else {
          return(optimise(mualt, interval = c(0, max(cts[samples] / libsizes[samples], na.rm = TRUE) * 2), cts = cts[which(samples)], dispersion = disp, libsizes = libsizes[which(samples)], len = len[which(samples)], tol = 1e-50, maximum = TRUE)$maximum)
        }
      }

      if(equalDispersions | consensus) {
          if(!any(sapply(replicates[which(cts != 0)], function(rep) sum(replicates == rep) > 1))) {
              disp <- 1e-50
          } else {
              disp <- switch(estimation,
                             QL = findDisp.QL(list(replicates = replicates, cts = cts, len = len, libsizes = libsizes)),
                             ML = findDisp.ML(list(replicates = replicates, cts = cts, len = len, libsizes = libsizes))
                             )
          }
          if(consensus) {
              mu <- getMu(replicates == sample(levels(replicates), size = 1), disp = disp)
              groupness <- c(mu, disp)
          } else {
              groupness <- lapply(groups, function(group) {
                  dispersion <- rep(disp, length(levels(group)))
                  mus <- sapply(levels(group), function(unqgrp) getMu(group == unqgrp, disp = disp))
                  list(dispersion = dispersion, mus = mus)
              })
          }
      } else {
          repgroups <- lapply(groups, function(group) list(group = group, repData = lapply(levels(group[!is.na(group)]), function(unqgrp) list(replicates = replicates[which(group == unqgrp)], cts = cts[which(group == unqgrp)], len = len[which(group == unqgrp)], libsizes = libsizes[which(group == unqgrp)]))))
          
          dispgroups <- switch(estimation,
                               QL = lapply(repgroups, function(repgroup) list(group = repgroup$group, dispersion = sapply(repgroup$repData, findDisp.QL))),
                               ML = lapply(repgroups, function(repgroup) list(group = repgroup$group, dispersion = sapply(repgroup$repData, findDisp.ML)))
                               )
          groupness <- lapply(dispgroups, function(dispgroup)
              list(dispersion = dispgroup$dispersion,                                                                     
                   mus = sapply(levels(dispgroup$group[!is.na(dispgroup$group)]), function(unqgrp) getMu(dispgroup$group == unqgrp, dispgroup$dispersion[levels(dispgroup$group) == unqgrp]))))
      }
      return(groupness)
  }
  
  if(any(sapply(cD@groups, class) != "factor"))
    {
      cD@groups <- lapply(cD@groups, as.factor)
      warning("Not all members of the '@groups' slot were factors; converting now.")
    }
  if(class(cD@replicates) != "factor")
    {
      cD@replicates <- as.factor(cD@replicates)
      warning("The '@replicates' slot is not a factor; converting now.")
    }

  if(is.null(samplingSubset))
    samplingSubset <- 1:nrow(cD)
  
  samplingSubset <- samplingSubset[rowSums(do.call("cbind", lapply(cD@groups, function(x) do.call("cbind", lapply(levels(x), function(rep) rowSums(is.na(cD@data[samplingSubset,which(x == rep),drop = FALSE])) == length(which(x == rep))))))) == 0]

  sD <- cD[samplingSubset,]
  
  libsizes <- as.vector(sD@sampleObservables$libsizes)
  groups <- sD@groups
  replicates <- as.factor(sD@replicates)

  cellObservables <- sD@cellObservables
  rowObservables <- sD@rowObservables

  if(!("seglens" %in% names(cellObservables) || "seglens" %in% names(rowObservables))) rowObservables <- c(rowObservables, list(seglens = rep(1, nrow(sD))))

  if("seglens" %in% names(cellObservables)) seglens <- cellObservables$seglens
  if("seglens" %in% names(rowObservables)) seglens <- rowObservables$seglens
  if(is.matrix(seglens) && ncol(seglens) == 1) seglens <- seglens[,1]
  
  if(is.vector(seglens)) lensameFlag <- TRUE else lensameFlag <- FALSE

  tupData <- cbind(sD@data, seglens)
  
  ordData <- do.call(order, as.data.frame(tupData))
  if(length(ordData) > 1) {
    dups <- c(1, which(rowSums(tupData[ordData[-1],, drop = FALSE] == (tupData)[ordData[-length(ordData)],, drop = FALSE], na.rm = TRUE) != rowSums(!is.na(tupData[ordData[-1],, drop=FALSE]))) + 1)
  } else dups <- 1
  if(zeroML)
    {
      allzeros <- dups[which(rowSums(tupData[ordData[dups],-ncol(tupData), drop = FALSE], na.rm = TRUE) == 0)]
      if(length(allzeros) > 0)
        dups <- c(1, dups[!(dups %in% allzeros)])
    }
  
  
  if(length(dups) <= samplesize) {
      y <- sD@data[ordData[dups],,drop = FALSE]
      copies <- diff(c(dups, nrow(sD@data) + 1))
      sampled <- (1:nrow(sD))[ordData]
      sy <- cbind(sampled = sampled, representative = rep(1:length(copies), copies))
      weights <- rep(1, nrow(sy))
      if(lensameFlag) seglensy <- seglens[ordData[dups]] else seglensy <- seglens[ordData[dups],]
  } else {
    sampData <- colSums(t(sD@data/seglens) / libsizes, na.rm = TRUE) / ncol(sD)

    if(zeroML) nzSD <- sampData[sampData > 0] else nzSD <- sampData
      
    sqnum <- length(nzSD) / 1000
    squant <- quantile(nzSD, 1:sqnum / sqnum, na.rm = TRUE)
    sqdup <- c(1, which(diff(squant) > min(1 / libsizes) / 10))
    z <- cbind(as.numeric(squant[sqdup]), c(as.numeric(squant[sqdup[-1]]), max(nzSD, na.rm = TRUE)))
    if(zeroML) z[1,1] <- 0 else z[1,1] <- -Inf

    sy <- do.call("rbind",
                  lapply(1:nrow(z), function(ii) {
                    w <- z[ii,]
                    inbetweener <- which(sampData > w[1] & sampData <= w[2])
                    samplenum <- min(length(inbetweener), ceiling(samplesize / nrow(z)), na.rm = TRUE)
                    cbind(sample(inbetweener, size = samplenum, replace = FALSE), length(inbetweener) / samplenum)})
                )

    weights <- sy[,2]
    sy <- sy[,1]

    y <- sD@data[sy,,drop = FALSE]
    if(lensameFlag) seglensy <- seglens[sy] else seglensy <- seglens[sy,,drop = FALSE]

    ordData <- do.call(order, as.data.frame(cbind(y, seglensy)))
    y <- y[ordData,, drop = FALSE]
    weights <- weights[ordData]
    if(lensameFlag) seglensy <- seglensy[ordData] else seglensy <- seglensy[ordData,,drop = FALSE]
    
    dups <- c(1, which(rowSums((cbind(y,seglensy))[-1,] == (cbind(y, seglensy))[-nrow(y),], na.rm = TRUE) != rowSums(is.na(cbind(y,seglensy)[-1,]))) + 1)
    copies <- diff(c(dups, nrow(y) + 1))

    sy <- cbind(sampled = sy[ordData], representative = rep(1:length(copies), copies))

    y <- y[dups,, drop = FALSE]
    if(lensameFlag) seglensy <- seglensy[dups] else seglensy <- seglensy[dups,, drop = FALSE]
    
    if(any(sampData == 0) & zeroML)
      {
        sy <- rbind(sy, cbind(sampled = which(sampData == 0), representative = nrow(y) + 1))
        y <- rbind(y, rep(0, ncol(y)))
        if(lensameFlag) seglensy <- c(seglensy, 1) else seglensy <- rbind(seglensy, rep(1, ncol(seglensy)))
        weights <- c(weights, rep(1, sum(sampData == 0)))
      }
  }

  z <- cbind(seglensy, y)  

  NBpar <- list()

  if(is.null(cl)) {
    parEach <- apply(z, 1, optimoverPriors, estimation = estimation, replicates = replicates, groups = groups, libsizes = libsizes, equalDispersions = equalDispersions, lensameFlag = lensameFlag, zeroML = zeroML, consensus = consensus)
  } else {
    getPriorsEnv <- new.env(parent = .GlobalEnv)
    environment(optimoverPriors) <- getPriorsEnv
    parEach <- parApply(cl, z, 1, optimoverPriors, estimation = estimation, replicates = replicates, groups = groups, libsizes = libsizes, equalDispersions = equalDispersions, lensameFlag = lensameFlag, zeroML, consensus = consensus)  
  }
  
  if(consensus) NBpar <- t(parEach) else {
    NBpar <- lapply(1:length(groups), function(gg)
                    lapply(1:length(levels(groups[[gg]])), function(ii) t(sapply(parEach, function(x) c(x[[gg]]$mus[ii], c(x[[gg]]$dispersion[ii], NA)[as.numeric(is.na(x[[gg]]$dispersion[ii])) + 1])))))
  }

  if(!consensus) {
    NBpar <- lapply(1:length(groups), function(gg)
                    lapply(1:length(levels(groups[[gg]])), function(ii) {
                      par <- NBpar[[gg]][[ii]]
                      par[is.na(par[,2]),2] <- 1
                                        #par[is.na(par),2] <- mean(par[,2], na.rm = TRUE)
                      par
                  }))
    names(NBpar) <- names(groups)
  } else NBpar[is.na(NBpar[,2]),2] <- 1
                
  if(!is.null(cl)) clusterEvalQ(cl, rm(list = ls()))

  if(verbose) message("done.")

  sy[,1] <- samplingSubset[sy[,1]]
                                        
  NBpar <- list(sampled = cbind(sy, weights = weights / sum(weights)), priors = NBpar)
  new(class(cD), cD, priorType = "NB-QL", priors = NBpar, densityFunction = nbinomDensity)
}
Any scripts or data that you put into this service are public.
baySeq documentation built on Nov. 8, 2020, 5:43 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
baySeq
Empirical Bayesian analysis of patterns of differential expression in count data

R/getPriors.R
In baySeq: Empirical Bayesian analysis of patterns of differential expression in count data

Try the baySeq package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

baySeq Empirical Bayesian analysis of patterns of differential expression in count data

R/getPriors.R In baySeq: Empirical Bayesian analysis of patterns of differential expression in count data

Try the baySeq package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

baySeq
Empirical Bayesian analysis of patterns of differential expression in count data

R/getPriors.R
In baySeq: Empirical Bayesian analysis of patterns of differential expression in count data