R/estimate.logp.overallintensity.R

Defines functions estimate.logp.overallintensity

Documented in estimate.logp.overallintensity

#' Take a series of Poisson count signals x, with data on different samples
#' in each row, and compute mean and variance of log(p) [log(mu) if read.depth is
#' not provided] for overall intensity. 
#' optionally estimate the "effect" of a covariate g if g is provided.

estimate.logp.overallintensity <- function(x, g=NULL, read.depth = NULL, reflect=FALSE, center=FALSE, repara=TRUE, lm.approx=TRUE, baseline="inter"){
  
  
  if(!is.numeric(x)) stop("Error: invalid parameter 'x': 'x' must be numeric")
  if(!is.null(g)) if(!(is.numeric(g)|is.factor(g))) stop("Error: invalid parameter 'g', 'g' must be numeric or factor or NULL")
  if(!is.logical(center)) stop("Error: invalid parameter 'center', 'center' must be bool")#need this
  if(!((baseline=="inter")|(baseline=="grp")|is.numeric(baseline))) stop("Error: invalid parameter 'baseline', 'baseline' can be a number or 'inter' or 'grp'")
  if(!is.logical(reflect)) stop("Error: invalid parameter 'reflect', 'reflect' must be bool")
  if(!is.logical(repara)) stop("Error: invalid parameter 'repara', 'repara'  must be bool")
  if(!is.logical(lm.approx)) stop("Error: invalid parameter 'lm.approx', 'lm.approx'  must be bool")
  
  
  if(is.vector(x)){dim(x)<- c(1,length(x))} #change x to matrix
  nsig = nrow(x)
  if(!is.null(read.depth)) if(length(read.depth)!=nsig) stop("Error: read depths for all samples are not provided")
  if(!is.null(g)) if(length(g)!=nsig) stop("Error: covariate g for all samples are not provided")
  if(center==FALSE&baseline=="inter"){baseline="grp"}
  
  J = log2(ncol(x)); if((J%%1)!=0){reflect=TRUE} #if ncol(x) is not a power of 2, reflect x
  if(reflect==TRUE) reflect.indices=reflectSignal(x) #reflect signal; this function is pseudo calling x by reference
  
  
  if(is.null(g)){
    #define res.rate the (log) total number of counts
    if(is.null(read.depth)){
      res.rate=list(lp.mean=log(mean(rowSums(x))), lp.var=0)
    }else{
      res.rate=list(lp.mean=log(mean(rowSums(x/(read.depth%o%rep(1,n))))), lp.var=0)
    }
    
    return(list(lp.mean = res.rate$lp.mean, lp.var = res.rate$lp.var, lpratio.mean=NULL, lpratio.var = NULL, center=center, repara=repara, baseline = baseline))                 
  }else{
    if(is.factor(g)){
      g.num = as.numeric(levels(g))[g]
    }else{
      g.num=g
      if(length(unique(g))==2)
        g=factor(g)
    }
    #weights for quantitative covariate
    if(center==TRUE){
      w=unique(sort(g.num-mean(g.num)))
    }else{
      w=c(0,1)
    }
    xRowSums = rowSums(x)
    if (is.null(read.depth)){  #if sequencing depth is not present then obtain total intensities and ratio of total intensities by taking sums of total intensities in each group
      #define the "failures" this way so that the intercept will be the estimate of total intensity, and the slope will be the estimate of ratio of total intensities
      y.rate=matrix(c(xRowSums,rep(1,nsig)),ncol=2)
      zdat.rate = as.vector(glm.approx(y.rate, g=g, center=center, repara=repara, lm.approx=FALSE, bound=0))
      res.rate=compute.res.rate(zdat.rate, repara, baseline, w, read.depth)
    }else{
      ##run glm.approx to get zdat.rate
      #consider the raw data as binomial counts from a given total number of trials (sequencing depth)
      y.rate = matrix(c(xRowSums,read.depth-xRowSums),ncol=2)
      zdat.rate = as.vector(glm.approx(y.rate, g=g, center=center, repara=repara, lm.approx=lm.approx, disp=disp, bound = 0))
      #computes mean and variance for the baseline overall intensity (used in reconstructing the baseline estimate later)
      res.rate=compute.res.rate(zdat.rate, repara, baseline, w, read.depth, g)
    }
    return(list(lp.mean = res.rate$lp.mean, lp.var = res.rate$lp.var, lpratio.mean=res.rate$lpratio.mean, lpratio.var =res.rate$lpratio.var, center=center, repara=repara, baseline = baseline))
  }
  
}
shimlab/HMTree documentation built on May 29, 2019, 9:25 p.m.