T4entropy: Tools for Entropy Estimation

Documented in disc1997YK

#' Bayesian Estimation of Shannon Entropy by Yuan and Kesevan (1997)
#' 
#' 
#' @param counts count information. We allow three types of inputs as below: \describe{
#' \item{\code{vector}}{a vector of length \eqn{K} where entries are counts \eqn{(n_1,n_2,\ldots,n_K)} for each alphabet.}
#' \item{\code{histogram}}{an object of class \code{histogram} with \eqn{K+1} breaks.}
#' \item{\code{table}}{an object of class \code{table} of length \eqn{K}.}
#' }
#' @param alpha concentration parameter number or vector. If a single number is provided, we consider it as 
#' \eqn{\alpha_1=\alpha_2=\ldots=\alpha_K=\alpha}. If a vector is provided, it must have the same length as counts.
#' 
#' @return estimated entropy value in natural logarithm.
#' 
#' @examples 
#' ## ----------------------------------------- ##
#' ##  basic usage with three types of inputs
#' ## ----------------------------------------- ##
#' 
#' #  Sample 50 numbers from 1 to 5 uniformly.
#' xx = sample(1:5, 50, replace=TRUE)
#' 
#' #  Create histogram, table, and count vector
#' hx = hist(xx, breaks=seq(from=0.5,to=5.5,by=1), plot=FALSE)
#' tx = table(xx)
#' cx = round(as.double(tx))
#' 
#' # compare three objects
#' line1 = paste0("entropy from histogram : ",round(disc1997YK(hx),4))
#' line2 = paste0("entropy from table     : ",round(disc1997YK(tx),4))
#' line3 = paste0("entropy from counts    : ",round(disc1997YK(cx),4))
#' cat("\n",line1,"\n",line2,"\n",line3)
#' 
#' \dontrun{
#' ## ----------------------------------------- ##
#' ## effect of concentration parameter 'alpha'
#' ## ----------------------------------------- ##
#' 
#' # we consider scalar-valued alphas
#' vec.alpha = 10^(seq(from=-2,to=2,length.out=100))
#' vec.entmu = rep(0,100)
#' 
#' # for each alpha, repeat it 496 times and use table input
#' for (i in 1:100){
#'   enti = rep(0,496)
#'   for (j in 1:496){
#'     tx = table(sample(1:5, 100, replace=TRUE))
#'     enti[j] = disc1997YK(tx, alpha=vec.alpha[i])
#'   }
#'   vec.entmu[i] = base::mean(enti)
#'   print(paste0("* iteration ",i,"/100 complete..."))
#' }
#' 
#' # visualize
#' opar <- par(no.readonly=TRUE)
#' plot(vec.alpha, vec.entmu, lwd=2, type="b", cex=0.5,
#'      xlab="alpha", ylab="entropy", main="Bayesian Estimate of Entropy")
#' par(opar)
#' }
#' 
#' 
#' @references 
#' \insertRef{yuan_bayesian_1997}{T4entropy}
#' 
#' @author Kisung You
#' @export
disc1997YK <- function(counts, alpha=1){
  ###################################################################
  # Preprocessing
  vec.ni  = check_counts(counts,  "disc1997YK")
  K       = length(vec.ni) # number of 'alphabets'
  alpha   = check_alpha(alpha, K, "disc1997YK")
  
  ###################################################################
  # Compute and Return
  return(compute1997YK(vec.ni, alpha))
}


# auxiliary function for 'disc1997YK' -------------------------------------
#' @keywords internal
compute1997YK <- function(vec.ni, vec.alpha){
  # setting change corresponding to the paper
  s = length(vec.ni)
  n = sum(vec.ni)
  alpha  = sum(vec.alpha)
  vec.pi = vec.alpha/alpha
  
  # main computation
  output = 0
  for (i in 1:s){
    term1  = ((alpha*vec.pi[i] + vec.ni[i])/(alpha + n))
    term2  = digamma(alpha*vec.pi[i] + vec.ni[i]+1) - digamma(alpha+n+1)
    output = output - (term1*term2)
  }
  return(output)
}