R/poisson.R
In fastTopics: Fast Algorithms for Fitting Topic Models and Non-Negative Matrix Factorizations to Count Data

# For each column of the counts matrix, compute MLEs of the parameters
# in the Poisson glm. Input argument "method" may be set to "glm", or
# to any valid setting for "method" in update_factors_poisson_nmf.
#
#' @importFrom stats glm.control
fit_poisson_models <- function (X, L, method, eps = 1e-15, numiter = 100,
                                tol = 1e-8, nc = 1) {

  # Get the number of topics (k) and the number of data columns (m).
  m <- ncol(X)
  k <- ncol(L)
  
  # For each column of X, compute MLEs of the Poisson model parameters.
  if (method == "glm") {
    control <- glm.control(epsilon = tol,maxit = numiter)
    F <- fit_poisson_models_glm(X,L,control)
  } else {
    control <- list(numiter = numiter,nc = nc,eps = eps)
    F <- matrix(0.5,m,k)
    F <- update_factors_poisson_nmf(X,F,L,1:m,method,control)
  }
  return(F)
}

# Implements fit_poisson_models for method = "glm".
fit_poisson_models_glm <- function (X, L, control)  {
  m <- ncol(X)
  k <- ncol(L)
  F <- matrix(0,m,k)
  for (i in 1:m)
    F[i,] <- fit_poisson_glm(X[,i],L,control = control)$coef
  return(F)
}

# Fit the Poisson glm, x[i] ~ Poisson(u[i]), using the "glm" function,
# in which the Poisson rates are u[i] = sum(L[i,]*f). Inputs x and L are
# the data: x is a vector of length n; and L is an n x k matrix. Input
# argument f is a vector of length k giving initial estimates of the
# coefficients.
#
#' @importFrom stats formula
#' @importFrom stats glm
#' @importFrom stats glm.control
#' @importFrom stats summary.glm
#' @importFrom stats poisson
#'
fit_poisson_glm <-
  function (x, L, f = rep(0.5,ncol(L)),
            control = glm.control(epsilon = 1e-10, maxit = 100)) {
  k <- length(f)
  dat <- as.data.frame(cbind(x,L))
  names(dat) <- c("x",paste0("f",1:k))
  model <- formula(paste("x ~",paste(names(dat)[-1],collapse = " + "),"- 1"))
  fit <- suppressWarnings(glm(model,family = poisson(link = "identity"),
                              data = dat,start = f,control = control))
  return(list(fit = fit,coef = summary.glm(fit)$coefficients[,"Estimate"]))
}

# Simulate draws from the posterior distribution of f via random-walk
# Metropolis on g = log(f). The posterior distribution is based on a
# uniform prior and the Poisson glm likelihood with identity link
# function. Input ns specifies the number of Monte Carlo samples to
# simulate. Input s determines the width (standard deviation) of the
# random walk, and input f is the initial state of the Markov
# chain. In the code below, g = log(f) is the current state of the
# Markov chain.
#
# The outputs are (1) "samples", an ns x k matrix of Monte Carlo
# samples of g = log(f), where k = length(f), and (2) "ar", the
# Metropolis acceptance rate for each co-ordinate k.
#
# This is mainly used to test simulate_posterior_poisson_rcpp and is
# not actually used in practice because it is too slow.
#
#' @importFrom stats runif
#' @importFrom stats rnorm
#' @importFrom stats dpois
#' 
simulate_posterior_poisson <- function (x, L, f, ns = 1000, s = 0.3,
                                        e = 1e-15) {
  k       <- length(f)
  g       <- log(f)
  ar      <- rep(0,k)
  visits  <- rep(0,k)
  samples <- matrix(0,ns,k)
  D <- matrix(rnorm(ns*k),ns,k)
  U <- matrix(runif(ns*k),ns,k)
  M <- matrix(sample(k,ns*k,replace = TRUE),ns,k)
  for (i in 1:ns) {
    for (t in 1:k) {

      # Randomly suggest moving to gj* = gj + d, where d ~ N(0,s).
      j         <- M[i,t]
      gnew      <- g
      d         <- s*D[i,t]
      gnew[j]   <- g[j] + d
      visits[j] <- visits[j] + 1

      # Compute the Metropolis acceptance probability, and move to the
      # new state according to this acceptance probability. Note that
      # the additional "d" in the acceptance probability is needed to
      # account for the fact that we are simulating g = log(f), not f;
      # see p. 11 of Devroye (1986) "Non-uniform random variate
      # generation".
      u     <- drop(L %*% exp(g))
      unew  <- drop(L %*% exp(gnew))
      ll    <- sum(dpois(x,u + e,log = TRUE))
      llnew <- sum(dpois(x,unew + e,log = TRUE))
      a     <- exp((llnew - ll) + d)
      a     <- min(1,a)
      if (U[i,t] < a) {
        g <- gnew
        ar[j] <- ar[j] + 1
      }
    }

    # Store the current state of the Markov chain.
    samples[i,] <- g
  }

  # Output the states of the Markov chain and the acceptance rates.
  return(list(samples = samples,ar = ar/visits))
}

# Return the covariance of g = log(f) in the Poisson glm, x[i] ~
# Poisson(u[i]), in which the Poisson rates are u[i] = L[i,]*f. The
# covariance calculations are based on a Laplace approximation to the
# likelihood. The input f should contain MLEs of the coefficients.
compute_poisson_covariance <- function (x, L, f) {
  u <- drop(L %*% f)
  return(chol2inv(chol(tcrossprod(f) * crossprod(sqrt(x)/u*L))))
}

# Add pseudocounts to the data for inference with the Poisson glm;
# specifically, add k rows of counts e to the data matrix, and replace
# the loadings matrix L with rbind(L,I), where I is the k x k identity
# matrix. This is used to extend ML estimation to MAP estimation for
# the Poisson glm, in which f[j] is assigned prior Gamma(a,b), with
# a=1+e, b=1, for j = 1,...,k.
add_pseudocounts <- function (X, L, e = 0.01) {
  m <- ncol(X)
  k <- ncol(L)
  return(list(X = rbind(X,matrix(e,k,m)),
              L = rbind(L,diag(k))))
}

Any scripts or data that you put into this service are public.

fastTopics documentation built on Sept. 11, 2024, 5:14 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

fastTopics
Fast Algorithms for Fitting Topic Models and Non-Negative Matrix Factorizations to Count Data

R/poisson.R
In fastTopics: Fast Algorithms for Fitting Topic Models and Non-Negative Matrix Factorizations to Count Data

Try the fastTopics package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fastTopics Fast Algorithms for Fitting Topic Models and Non-Negative Matrix Factorizations to Count Data

R/poisson.R In fastTopics: Fast Algorithms for Fitting Topic Models and Non-Negative Matrix Factorizations to Count Data

Try the fastTopics package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fastTopics
Fast Algorithms for Fitting Topic Models and Non-Negative Matrix Factorizations to Count Data

R/poisson.R
In fastTopics: Fast Algorithms for Fitting Topic Models and Non-Negative Matrix Factorizations to Count Data