# R/fitting.R In jreduardo/cmpreg: Reparametrized COM-Poisson Regression Models

#' @useDynLib cmpreg, .registration = TRUE
#' @importFrom Rcpp sourceCpp
NULL

#-----------------------------------------------------------------------
#' @rdname llcmp-compute
#' @title Negative of the log-likelihood function from the
#'   reparametrized COM-Poisson model
#' @description Compute the negative of the logarithm of the likelihood
#'   function for a set of observations \code{y} from the reparametrized
#'   COM-Poisson model given the \eqn{\mu} and \eqn{\nu} parameters (see
#'   Details).
#' @details The log-likelihood function is given by
#'   \deqn{\ell(\beta,\nu) = \sum y \log\lambda - \nu\log y! -
#'   \log[Z(\lambda,\nu)],} where \deqn{\log\lambda = \nu \log\left(\mu
#'   - \frac{\nu-1}{2\nu}\right),} and \eqn{\log[Z(\lambda, \nu)]} is a
#'   normalizing constant computed in log space to avoid numerical
#' @param beta A vector of \eqn{\beta} parameter.
#' @param gama A vector of \eqn{\gamma} parameter.
#' @param X Design matrix related to the (approximate) mean parameter
#'   \eqn{\mu = \exp(X \beta)}.
#' @param Z Design matrix related to the dispersion parameter \eqn{\nu =
#'   \exp(Z \gamma)}.
#' @param y Vector of observed count data.
#' @param params A vector of the model parameters \code{params} =
#'   \code{c(beta, gama)}.
#' @return The computed log-likelihood function.
#' @author Eduardo Jr <[email protected]>
#'
llcmp_fixed <- function(beta, gama, X, Z, y) {
# Map to CMP parameter space
Xbeta <- X %*% beta
Zgama <- Z %*% gama
nu <- exp(Zgama)
loglambda <- suppressWarnings(
nu * log(exp(Xbeta) + (nu - 1) / (2 * nu))
)
# Get the normalizing constants
logz <- compute_logz(loglambda, nu)
# Calcula o negativo do log da função de verossimilhança
ll <- sum(y * loglambda - nu * lfactorial(y) - logz)
return(-ll)
}

#' @rdname llcmp-compute
#'
llcmp <- function(params, X, Z, y) {
p <- ncol(X)
q <- ncol(Z)
llcmp_fixed(beta = params[1:p],
gama = params[1:q + p],
X = X, Z = Z, y = y)
}

#-----------------------------------------------------------------------
#' @title Maximize the COM-Poisson log-likelihood function
#' @param X Design matrix related to the (approximate) mean parameter
#'   \eqn{\mu = \exp(X \beta)}.
#' @param Z Design matrix related to the dispersion parameter \eqn{\nu =
#'   \exp(Z \gamma)}.
#' @param y Vector of observed count data.
#' @param start Initial parameters
#' @param method Argument passed to \code{\link[stats]{optim}(...)}.
#' @param lower Argument passed to \code{\link[stats]{optim}(...)}.
#' @param upper Argument passed to \code{\link[stats]{optim}(...)}.
#' @param hessian Argument passed to \code{\link[stats]{optim}(...)}.
#' @param control Argument passed to \code{\link[stats]{optim}(...)}.
#' @return A list with estimated parameters and hessian matrix
#' @author Eduardo Jr <[email protected]>
#' @importFrom stats glm.fit poisson optim
#' @export
#'
cmp_fit <- function(X, Z, y,
start   = NULL,
method  = c("BFGS",
"CG",
"L-BFGS-B",
"SANN"),
lower   = -Inf,
upper   = Inf,
hessian = TRUE,
control = list()) {
#-------------------------------------------
# Initial values
if (is.null(start)) {
# Dimensions
n <- length(y)
p <- ncol(X)
q <- ncol(Z)
model <- glm.fit(x = X, y = y, family = poisson())
start <- c("beta" = model$coefficients, rep(0, q)) names(start)[1:q + p] <- paste0("gama.", colnames(Z)) } else { if (is.null(names(start))) names(start) <- c(paste0("beta.", colnames(X)), paste0("gama.", colnames(Z))) } #------------------------------------------ # Maximization method <- match.arg(method) out <- optim(par = start, fn = llcmp, method = method, lower = lower, upper = upper, hessian = hessian, control = control, X = X, Z = Z, y = y) return(out) } #----------------------------------------------------------------------- #' @title Fitting COM-Poisson models with varying dispersion #' @param formula An object of class "\code{\link[stats]{formula}}" #' describe the model for mean. #' @param dformula An object of class "\code{\link[stats]{formula}}" #' describe the model for dispersion. #' @param data An optional data frame containing the variables in the #' model. If not found in data, the variables are taken from #' \code{environment(formula)}. #' @param ... Arguments to be used by \code{\link{cmp_fit}}. #' @return An object of class \code{cmpreg}. #' @author Eduardo Jr <[email protected]> #' @importFrom stats model.frame model.matrix model.response #' @export #' cmp <- function(formula, dformula = ~1, data, ...) { dots <- list(...) #-------------------------------------------- if (missing(data)) data <- environment(formula) #------------------------------------------- # Get matrices frame <- model.frame(formula, data) terms <- attr(frame, "terms") X <- model.matrix(terms, frame) Z <- model.matrix(dformula, data) y <- model.response(frame) p <- ncol(X) q <- ncol(Z) if (dformula == ~ 1) colnames(Z) <- "log(nu)" #------------------------------------------- # Fit model start <- dots$start
mpois <- glm.fit(x = X, y = y, family = poisson())
poiss <- list(loglik = -mpois$aic/2 + mpois$rank,
coefficients = mpois$coefficients) if (is.null(start)) { start <- c("beta" = mpois$coefficients, rep(0, q))
names(start)[1:q + p] <- paste0("gama.", colnames(Z))
optarg <- list(X = X, Z = Z, y = y, start = start)
optarg <- c(optarg, dots)
} else {
if (is.null(names(start)))
names(start)  <- c(paste0("beta.", colnames(X)),
paste0("gama.", colnames(Z)))
optarg <- list(X = X, Z = Z, y = y, start = start)
optarg <- c(optarg, dots[-which(names(dots) == "start")])
}
details <- do.call(cmp_fit, optarg)
mean_coefficients <- details$par[1:p] disp_coefficients <- details$par[1:q + p]
names(mean_coefficients) <- colnames(X)
names(disp_coefficients) <- colnames(Z)
vcov <- NULL
if ("hessian" %in% names(details))
vcov <- solve(details$hessian) #-------------------------------------------- # Fitted values fitted_mean <- exp(X %*% mean_coefficients) fitted_disp <- exp(Z %*% disp_coefficients) #-------------------------------------------- # Output out <- list(call = match.call(), formula = formula, dformula = dformula, nobs = length(y), df.residual = length(y) - length(details$par),
details = details,
loglik = -details$value, vcov = vcov, coefficients = details$par,
mean_coefficients = mean_coefficients,
disp_coefficients = disp_coefficients,
fitted_mean = c(fitted_mean),
fitted_disp = c(fitted_disp),
poissonfit = poiss,
data = list(X = X, Z = Z, y = y))
class(out) <- "cmpreg"
return(out)
}

jreduardo/cmpreg documentation built on May 14, 2019, 11:05 p.m.