R/oneStepBB.R

# A function for fitting a Beta-Binomial Regression
BBreg <- function(data, formula, weights) {
  # Prototype data -----
  # n <- 100
  # p <- 10
  # X <- matrix(rnorm(n * p), nrow = n)
  # X <- cbind(1, X)
  # beta <- c(-3, runif(p, min = -0.2, max = 0.2))
  # offset <- rnorm(n, sd = 0.2)
  # eta <- as.vector(X %*% beta) + offset
  # mu <- 1 / (1 + exp(-eta))
  # M <- 10^2
  # prop <- rbeta(n, M * mu, M * (1 - mu))
  # N <- rpois(n, rnorm(n, 10^4, 10^3))
  # y <- rbinom(n, N, prop)
  # data <- data.frame(y, N, offset, V = X)
  # formula <- as.formula(paste("cbind(y, N - y) ~ offset(offset)",
  #                             paste("+ V.", 1:ncol(X), collapse = "", sep = "")))
  # weights <- runif(n)

  # Fitting the model
  glmfit <- glm(formula, data = data, family = "binomial", weights = weights)
  coef <- coef(glmfit)
  toremove <- is.na(coef)
  coef <- coef[!toremove]
  X <- model.matrix(formula, data)
  X <- X[, !toremove]
  response <- model.response(glmfit$model)
  weights <- model.weights(glmfit$model)
  offset <- model.offset(glmfit$model)
  y <- response[, 1]
  N <- y + response[, 2]
  mu <- predict(glmfit, type = "response")

  M <- dispersionMLE(y, N, mu)
  param <- c(coef, M)
  param <- optim(param, fn = evalBB, gr = compgrad,
        method = "L-BFGS-B",
        lower = c(rep(-10^5, ncol(X)), 10),
        y = y, N = N, X = X, offset = offset,
        weights = weights)
  coef <- param$par[-length(param$par)]
  eta <- X %*% coef
  coef <- coef(glmfit)
  coef[!is.na(coef)] <- param$par[1:ncol(X)]
  coef[is.na(coef)] <- 0
  M <- param$par[length(param$par)]

  result <- glmfit
  result$M <- M
  result$coefficients <- coef
  result$linear.predictors <- eta
  class(result) <- c("bbreg", class(result))
  return(result)
}

# Compute gradient for beta-binomial regression
compgrad <- function(param, y, N, X, offset, weights) {
  M <- param[length(param)]
  coef <- param[1:(length(param) -1)]
  # coef <- param
  eta <- as.numeric(X %*% coef) + offset
  mu <- 1 / (1 + exp(- eta))
  diff <- N - y
  alpha <- M * mu
  beta <- M * (1 - mu)
  digam <- digamma(y + alpha) - digamma(diff + beta) - digamma(alpha) + digamma(beta)
  digam <- digam * M
  Mgrad <- digamma(y + alpha) * mu + digamma(N - y + beta) * (1 - mu) - digamma(N + M)
  Mgrad <- Mgrad + digamma(M) - digamma(alpha) * mu - digamma(beta) * (1 - mu)
  diexp <- exp(eta) / (exp(eta) + 1) ^ 2
  grad <- t(X) %*% (diexp * digam * weights)
  Mgrad <- sum(Mgrad * weights)

  return(-c(grad, Mgrad))
  # return(- grad)
}

evalBB <- function(param, y, N, X, offset, weights) {
  M <- param[length(param)]
  coef <- param[1:(length(param) - 1)]
  # coef <- param
  eta <- as.numeric(X %*% coef) + offset
  mu <- 1 / (1 + exp(- eta))
  return(-sum(vecBetaBinomDens(y, N, mu, M) * weights))
}

# Maximum likelihood estimation of the overdispersion paramter of a beta binomial distribution
dispersionMLE <- function(y, N, mu) {
  rho <- optimize(f = function(rho) sum(vecBetaBinomDens(y, N, mu, (1 - rho) / rho)),
                  interval = c(0, 1), maximum = TRUE, tol = 10^-6)$maximum
  M <- (1 - rho) / rho
  return(M)
}
RGLab/flowReMix documentation built on May 8, 2019, 5:55 a.m.