R/ebpm_gamma_mixture_single_scale.R

Defines functions ebpm_gamma_mixture_single_scale

Documented in ebpm_gamma_mixture_single_scale

#' @title Empirical Bayes Poisson Mean with Mixture of Gamma as Prior (choosing one scale and varying shape)
#' @description Uses Empirical Bayes to fit the model \deqn{x_j | \lambda_j ~ Poi(s_j \lambda_j)} with \deqn{lambda_j ~ g()}
#' with Mixture of Exponential: \deqn{g()  = sum_k pi_k gamma(shape = 1, rate = b_k)} 
#' b_k is selected to cover the lambda_i  of interest for all data  points x_i
#' @import mixsqp

#' @details The model is fit in 2 stages: i) estimate \eqn{g} by maximum likelihood (over pi_k)
#' ii) Compute posterior distributions for \eqn{\lambda_j} given \eqn{x_j,\hat{g}}.
#' @param x A vector of Poisson observations.
#' @param s A vector of scaling factors for Poisson observations: the model is \eqn{y[j]~Pois(s[j]*lambda[j])}.
#' @param scale  Either a scalar or \code{"max"}, which uses \code{max(x/s)} as scale. 
#'   This specifies the scale used in all grids in the mixture of gammas.
#' @param shape Either a vector, or "estimate", which finds grid for shape that makes the range the prior mean cover all \code{x/s}
#' @param g_init The prior distribution \eqn{g}, of the class \code{gammamix}. Usually this is left
#'   unspecified (\code{NULL}) and estimated from the data. However, it can be
#'   used in conjuction with \code{fix_g = TRUE} to fix the prior (useful, for
#'   example, to do computations with the "true" \eqn{g} in simulations). If
#'   \code{g_init} is specified but \code{fix_g = FALSE}, \code{g_init}
#'   specifies the initial value of \eqn{g} used during optimization. 
#'
#' @param fix_g If \code{TRUE}, fix the prior \eqn{g} at \code{g_init} instead
#'   of estimating it.
#'   
#' @param m multiple coefficient when selectig grid, so the  b_k is of the form {low*m^{k-1}}; must be greater than 1; default is 2
#' @param control A list of control parameters  to be passed to the optimization function. `mixsqp` is  used.
#'
#' @return A list containing elements:
#'     \describe{
#'       \item{\code{posterior}}{A data frame of summary results (posterior
#'         means, and posterior log mean).}
#'       \item{\code{fitted_g}}{The fitted prior \eqn{\hat{g}}, of class \code{gammamix}} 
#'       \item{\code{log_likelihood}}{The optimal log likelihood attained
#'         \eqn{L(\hat{g})}.}
#'      }
#' @examples 
#'    beta = c(rep(0,50),rexp(50))
#'    x = rpois(100,beta) # simulate Poisson observations
#'    s = replicate(100,1)
#'    m = 2
#'    out = ebpm::ebpm_gamma_mixture(x,s)
#'    
#' @export

## compute ebpm_gamma_mixture problem
ebpm_gamma_mixture_single_scale <- function(x,s = 1,  shape = "estimate", scale = "max", g_init = NULL, fix_g = FALSE,m = 2, control =  NULL, low = NULL){
  #browser()
  ## a quick  fix when all `x` are 0
  if(max(x) == 0){
    return(ebpm_point_gamma(x = x, s = s, g_init = g_init, fix_g = fix_g))
  }
  if(length(s) == 1){s = replicate(length(x),s)}
  if(is.null(control)){control = mixsqp_control_defaults()}
  if(is.null(g_init)){
    fix_g = FALSE ## then automatically unfix g if specified so
    if(identical(scale, "max")){scale = max(x/s)}
    else{if(identical(scale, "estimate")){stop("the option `scale=estimate` is not implemented for gamma mixtures")}}
    if(identical(shape, "estimate")){shape =  select_shape_gamma(x  = x, s = s, scale = scale, m = m, d = NULL, low = low)}
    g_init = scale2gammamix_init(list(shape = shape, scale =  replicate(length(shape),scale)))
  }
  
  if(!fix_g){ ## need to estimate g_hat
    b = 1/g_init$scale ##  from here use gamma(shape = a, rate = b)  where E = a/b
    a = g_init$shape
    tmp <-  compute_L(x,s,a, b)
    L =  tmp$L
    l_rowmax = tmp$l_rowmax
    fit <- mixsqp(L, x0 = g_init$pi, control = control)
    pi = fit$x
    pi = pi/sum(pi) ## seems that some times pi does not sum to one
  }
  else{
    pi = g_init$pi
    a = g_init$shape
    b = 1/g_init$scale
    ## compute loglikelihood
    tmp <-  compute_L(x,s,a, b)
    L =  tmp$L
    l_rowmax = tmp$l_rowmax
  }
  fitted_g = gammamix(pi = pi, shape = a,  scale  = 1/b)
  log_likelihood = sum(log(exp(l_rowmax) * L %*%  pi))
  
  cpm = outer(x,a,  "+")/outer(s, b, "+")
  Pi_tilde = t(t(L) * pi)
  Pi_tilde = Pi_tilde/rowSums(Pi_tilde)
  lam_pm = rowSums(Pi_tilde * cpm)
  c_log_pm = digamma(outer(x,a,  "+")) - log(outer(s, b, "+"))
  lam_log_pm = rowSums(Pi_tilde * c_log_pm)
  posterior = data.frame(mean = lam_pm, mean_log = lam_log_pm)
  return(list(fitted_g = fitted_g, posterior = posterior,log_likelihood = log_likelihood))
}
stephenslab/ebpm documentation built on Oct. 19, 2023, 1 p.m.