R/stepbins.R

Defines functions stepbins stepbinsTail stepbinsNotail

Documented in stepbins stepbinsNotail stepbinsTail

stepbins <- function(bEdges, bCounts, m=NULL, tailShape = c("onebin", "pareto", "exponential"),
                     nTail=16, numIterations=20, pIndex=1.160964, tbRatio=0.8) {
  L <- length(bCounts)
  if(!(is.na(bEdges[L]) | is.infinite(bEdges[L])))
    warning("Top bin is bounded. Expect inaccurate results.\n")
  if(is.null(m)) { # no mean provided, so make one up
    warning("No mean provided: expect inaccurate results.\n")
    m <- sum(0.5*(c(bEdges[1:(L-1)],2.0*bEdges[L-1])+c(0, bEdges[1:(L-1)]))*bCounts/sum(bCounts))
  }
  tailShape <- match.arg(tailShape)
  if(tailShape == "onebin")
    stepbinsNotail(bEdges, bCounts, m)
  else
    stepbinsTail(bEdges, bCounts, m, tailShape, nTail, numIterations, pIndex, tbRatio)
}

stepbinsTail <- function(bEdges, bCounts, m, tailShape = c("pareto", "exponential"),
                         nTail, numIterations, pIndex, tbRatio) {
  tailShape <- match.arg(tailShape)
  L <- length(bCounts)
  tot <- sum(bCounts)
  e <- c(0,bEdges[1:(L-1)],numeric(nTail)) # preallocate tail edges
  shrinkFactor <- 1
  shrinkMultiplier <- 0.995
  tailCount <- bCounts[L]
  bbtot <- tot-tailCount
  bbMean <- sum((e[2:(L)]+e[1:(L-1)])*bCounts[1:(L-1)])/(2*bbtot) # mean of bounded bins as a step function
  while(m<bbMean) { # binning must be skewed, because supplied mean is less than mean of bounded bins
    e <- e*shrinkMultiplier # shrink the bins
    bbMean <- sum((e[2:(L)]+e[1:(L-1)])*bCounts[1:(L-1)])/(2*bbtot)
    shrinkFactor <- shrinkFactor*shrinkMultiplier
  }
  if(tailCount>0) {
    # there is remaining area in final bin, so make a tail of bins
    L <- L+nTail-1
    tailArea <- tailCount/tot
    bAreas <- c(bCounts/tot, numeric(nTail-1))
    tbWidth <- e[L-nTail+1]-e[L-nTail] # initial tail bin width = width of last bounded bin
    h <- bAreas[L-nTail]/tbWidth # height of last bounded bin
    if(tailShape=="pareto")
      tailUnscaled <- (1:nTail)^(-1-pIndex)
    else
      tailUnscaled <- tbRatio^(1:nTail)
    bAreas[(L-nTail+1):(L)] <- tailUnscaled/sum(tailUnscaled)*tailArea
    repeat {
      e[(L-nTail+2):(L+1)] <- e[L-nTail+1]+(1:(nTail))*tbWidth
      bMean <- sum((e[2:(L+1)]+e[1:L])*bAreas)/2
      if(bMean>m) break # now the correct tbWidth is between 0 and tbWidth
      tbWidth <- tbWidth*2
    }
    l <- 0
    r <- tbWidth
    for(i in 1:numIterations) {
      tbWidth <- (l+r)/2
      e[(L-nTail+2):(L+1)] <- e[L-nTail+1]+(1:(nTail))*tbWidth
      bMean <- sum((e[2:(L+1)]+e[1:L])*bAreas)/2
      if (bMean<m) # bin mean is too small; lengthen bins
        l <- tbWidth
      else
        r <- tbWidth
    }
  }
  else { # final bin is empty, so get rid of it
    L <- L-1
    bAreas <- bCounts[1:L]/tot
    e <- e[1:(L+1)]
  }
  cAreas <- vapply(1:length(bAreas), function(x){sum(bAreas[1:x])}, numeric(1))
  bHeights <- bAreas/(e[2:(L+1)]-e[1:L])
  stepCDF <- approxfun(e, c(0, cAreas), yleft=0, yright=1, rule=2)
  stepPDF <- stepfun(e, c(0, bHeights, 0))
  return(list(stepPDF=stepPDF, stepCDF=stepCDF, E=e[L+1], shrinkFactor=shrinkFactor))
}

stepbinsNotail <- function(bEdges, bCounts, m) {
  L <- length(bCounts)
  tot <- sum(bCounts)
  p <- c(1,1-vapply(1:(L-1), function(x){sum(bCounts[1:x])}, numeric(1))/tot)
  e <- c(0,bEdges[1:(L-1)],0) # preallocate last entry; replace with E later
  A <- 0.5*sum((e[2:L]-e[1:(L-1)])*(p[2:L]+p[1:(L-1)]))
  shrinkFactor <- 1
  shrinkMultiplier <- 0.995
  while(m<A) { # binning must be skewed, because supplied mean is too small
    e <- e*shrinkMultiplier # shrink the bins
    A <- 0.5*sum((e[2:L]-e[1:(L-1)])*(p[2:L]+p[1:(L-1)]))
    shrinkFactor <- shrinkFactor*shrinkMultiplier
  }
  E <- ifelse(p[L]>0, bEdges[L-1]+2*(m-A)/p[L], bEdges[L-1]*1.001) # make width of final bin small if empty
  e[L+1] <- E # this is the right border of the last bin so the mean is preserved
  bAreas <- bCounts/tot
  cAreas <- vapply(1:length(bAreas), function(x){sum(bAreas[1:x])}, numeric(1))
  bHeights <- bAreas/(e[2:(L+1)]-e[1:L])
  stepCDF <- approxfun(e, c(0, cAreas), yleft=0, yright=1, rule=2)
  stepPDF <- stepfun(e, c(0, bHeights, 0))
  return(list(stepPDF=stepPDF, stepCDF=stepCDF, E=E, shrinkFactor=shrinkFactor))
}

Try the binsmooth package in your browser

Any scripts or data that you put into this service are public.

binsmooth documentation built on March 26, 2020, 7:17 p.m.