R/likelihood.R
In fastTopics: Fast Algorithms for Fitting Topic Models and Non-Negative Matrix Factorizations to Count Data

#' @rdname likelihood
#'
#' @title NMF and Topic Model Likelihoods and Deviances
#'
#' @description Compute log-likelihoods and deviances for assessing
#'   fit of a topic model or a non-negative matrix factorization (NMF).
#'
#' @details Function \code{cost} computes loss functions proportional
#'   to the negative log-likelihoods, and is mainly for internal use to
#'   quickly compute log-likelihoods and deviances; it should not be
#'   used directly unless you know what you are doing. In particular,
#'   little argument checking is performed by \code{cost}.
#'
#' @param X The n x m matrix of counts or pseudocounts. It can be a
#'   sparse matrix (class \code{"dgCMatrix"}) or dense matrix (class
#'   \code{"matrix"}).
#'
#' @param fit A Poisson NMF or multinomial topic model fit, such as an
#'   output from \code{\link{fit_poisson_nmf}} or
#'   \code{\link{fit_topic_model}}. 
#'
#' @param A The n x k matrix of loadings. It should be a dense matrix.
#'
#' @param B The k x m matrix of factors. It should be a dense matrix.
#'
#' @param e A small, non-negative number added to the terms inside the
#'   logarithms to avoid computing logarithms of zero. This prevents
#'   numerical problems at the cost of introducing a very small
#'   inaccuracy in the computation.
#'
#' @param family If \code{model = "poisson"}, the loss function values
#'   corresponding to the Poisson non-negative matrix factorization are
#'   computed; if \code{model = "multinom"}, the multinomial topic model
#'   loss function values are returned.
#'
#' @param version When \code{version == "R"}, the computations are
#'   performed entirely in R; when \code{version == "Rcpp"}, an Rcpp
#'   implementation is used. The R version is typically faster when
#'   \code{X} is a dense matrix, whereas the Rcpp version is faster and
#'   more memory-efficient when \code{X} is a large, sparse matrix. When
#'   not specified, the most suitable version is called depending on
#'   whether \code{X} is dense or sparse.
#'
#' @return A numeric vector with one entry per row of \code{X}.
#'
#' @examples
#'
#' # Generate a small counts matrix.
#' set.seed(1)
#' out <- simulate_count_data(10,20,3)
#' X   <- out$X
#' fit <- out[c("F","L")]
#' class(fit) <- c("poisson_nmf_fit","list")
#' 
#' # Compute the Poisson log-likelihoods and deviances.
#' data.frame(loglik   = loglik_poisson_nmf(X,fit),
#'            deviance = deviance_poisson_nmf(X,fit))
#' 
#' # Compute multinomial log-likelihoods.
#' loglik_multinom_topic_model(X,fit)
#' 
#' @export
#' 
loglik_poisson_nmf <- function (X, fit, e = 1e-8)
  loglik_helper(X,fit,"loglik.poisson",e)

#' @rdname likelihood
#' 
#' @export
#' 
loglik_multinom_topic_model <- function (X, fit, e = 1e-8)
  loglik_helper(X,fit,"loglik.multinom",e)

#' @rdname likelihood
#' 
#' @export
#' 
deviance_poisson_nmf <- function (X, fit, e = 1e-8)
  loglik_helper(X,fit,"deviance.poisson",e)

#' @rdname likelihood
#'
#' @importFrom Rcpp evalCpp
#' 
#' @export
#'
cost <- function (X, A, B, e = 1e-8, family = c("poisson","multinom"),
                  version) {

  # For the special case of a rank-1 factorization, A and B may need
  # to be vectors, in which case we need to force them to be matrices.
  if (!is.matrix(A))
    A <- matrix(A)
  if (!is.matrix(B))
    B <- matrix(B,1,ncol(X))
    
  # Check and process "model" and "version" input arguments.
  family  <- match.arg(family)
  poisson <- family == "poisson"
  if (missing(version)) {
    if (is.matrix(X))
      version <- "R"
    else
      version <- "Rcpp"
  }

  # Compute the terms in the log-likelihoods that depend on A or B.
  if (version == "R") {
    AB <- A %*% B
    f  <- rowSums(poisson*AB - X*log(AB + e))
  } else if (version == "Rcpp") {
    if (is.matrix(X))
      f <- drop(cost_rcpp(X,A,B,e,poisson))
    else
      f <- drop(cost_sparse_rcpp(X,A,B,e,poisson))
  }
  names(f) <- rownames(X)
  return(f)
}

# This function provides the core implementation for calculation of
# log-likelihoods and deviances.
loglik_helper <- function (X, fit,
  output.type = c("loglik.poisson","loglik.multinom","deviance.poisson"), e) {

  # Verify and process input "output.type".
  output.type <- match.arg(output.type)
      
  # Verify and process inputs "X" and "fit". 
  if (!(inherits(fit,"poisson_nmf_fit") |
        inherits(fit,"multinom_topic_model_fit")))
    stop("Input argument \"fit\" should be an object of class ",
         "\"multinom_topic_model_fit\" or \"multinom_topic_model_fit\"")
  if (inherits(fit,"multinom_topic_model_fit"))
    fit <- multinom2poisson(fit)
  verify.fit.and.count.matrix(X,fit)
  if (is.matrix(X) & is.integer(X))
    storage.mode(X) <- "double"
  F <- fit$F
  L <- fit$L

  # Compute the log-likelihood or deviance.
  if (output.type == "loglik.poisson" | output.type == "loglik.multinom") {
    f <- loglik_poisson_const(X) - cost(X,L,t(F),e,"poisson")
    if (output.type == "loglik.multinom")
      f <- f - loglik_size_factors(X,fit$F,fit$L)
  } else if (output.type == "deviance.poisson")
    f <- deviance_poisson_const(X) + 2*cost(X,L,t(F),e,"poisson")
  return(f)
}

# Compute the constant terms in the Poisson log-likelihoods.
#
#' @importFrom Matrix rowSums
#
loglik_poisson_const <- function (X) {
  if (is.matrix(X))
    return(-rowSums(lgamma(X + 1)))
  else
    return(-rowSums(apply.nonzeros(X,function (x) lgamma(x + 1))))
}

# Compute the constant terms in the multinomial log-likelihoods.
loglik_multinom_const <- function (X)
  lgamma(rowSums(X) + 1) + loglik_poisson_const(X)

# Compute the constant terms in the Poisson deviances.
#
#' @importFrom Matrix rowSums
#
deviance_poisson_const <- function (X) {
  e <- .Machine$double.eps
  if (is.matrix(X))
    return(2*rowSums(X*(log(X + e) - 1)))
  else
    return(2*rowSums(apply.nonzeros(X,function (x) x*(log(x + e) - 1))))
}

# Compute the residuals of the first-order Karush-Kuhn-Tucker (KKT)
# conditions for Poisson non-negative matrix factorization at solution
# estimate (F,L).
poisson_nmf_kkt <- function (X, F, L, e = 1e-8) {
  if (is.matrix(X)) {
    A <- X/(tcrossprod(L,F) + e)
    return(list(F = F*(t(1 - A) %*% L),
                L = L*((1 - A) %*% F)))
  } else {
    A <- x_over_tcrossprod(X,t(L),t(F),e)
    return(list(F = F*(repmat(colSums(L),ncol(X)) - as.matrix(t(A) %*% L)),
                L = L*(repmat(colSums(F),nrow(X)) - as.matrix(A %*% F))))
  }
}

# Given a Poisson non-negative matrix factorization (F, L), compute
# the log-likelihoods for the "size factors"; that is, each vector
# element is a log-likelihood for the model t ~ Poisson(s), where
# t = sum(x) is the total sum of the counts. The size factors, s, are
# recovered from the poisson2multinom transformation.
#
#' @importFrom stats dpois
loglik_size_factors <- function (X, F, L)
  ldpois(rowSums(X),get_multinom_from_pnmf(F,L)$s)

# Compute the log-density for the Poisson distribution with rate
# lambda at x. Inputs x and lambda may be vectors, and x need not be
# integer-valued.
ldpois <- function (x, lambda)
  x*log(lambda) - lambda - lfactorial(x)

Any scripts or data that you put into this service are public.

fastTopics documentation built on Sept. 11, 2024, 5:14 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

fastTopics
Fast Algorithms for Fitting Topic Models and Non-Negative Matrix Factorizations to Count Data

R/likelihood.R
In fastTopics: Fast Algorithms for Fitting Topic Models and Non-Negative Matrix Factorizations to Count Data

Try the fastTopics package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fastTopics Fast Algorithms for Fitting Topic Models and Non-Negative Matrix Factorizations to Count Data

R/likelihood.R In fastTopics: Fast Algorithms for Fitting Topic Models and Non-Negative Matrix Factorizations to Count Data

Try the fastTopics package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fastTopics
Fast Algorithms for Fitting Topic Models and Non-Negative Matrix Factorizations to Count Data

R/likelihood.R
In fastTopics: Fast Algorithms for Fitting Topic Models and Non-Negative Matrix Factorizations to Count Data