R/bdlim1_logistic.R
In bdlim: Bayesian Distributed Lag Interaction Models

Documented in bdlim1_logistic

#' Fit the BDLIM model with 1 pattern of modification with logistic regression
#'
#' @param y A vector of binary outcomes
#' @param exposure A matrix of exposures with one row for each individual
#' @param covars A matrix or data.frame of covariates This should not include the grouping factor (see group below). This may include factor variables.
#' @param group A vector of group memberships. This should be a factor variable.
#' @param id An optional vector of individual IDs if there are repeated measures or other groupings that a random intercept should be included for. This must be a factor variable.
#' @param w_free Logical indicating if the weight functions are shared by all groups (FALSE) or group-specific (TRUE).
#' @param b_free Logical indicating if the effect sizes are shared by all groups (FALSE) or group-specific (TRUE).
#' @param df Degrees of freedom for the weight functions
#' @param nits Number of MCMC iterations.
#' @param nburn Number of MCMC iterations to be discarded as burn in. The default is half if the MCMC iterations. This is only used for WAIC in this function but is passed to summary and plot functions and used there.
#' @param nthin Thinning factors for the MCMC. This is only used for WAIC in this function but is passed to summary and plot functions and used there.
#' @param progress Logical indicating if a progress bar should be shown during MCMC iterations. Default is TRUE.
#'
#' @importFrom stats lm sd rnorm rgamma dbinom runif model.matrix na.omit
#' @importFrom LaplacesDemon WAIC
#' @importFrom utils txtProgressBar setTxtProgressBar
#' @importFrom BayesLogit rpg
#'
#' @return A list with posteriors of parameters
#' @export

bdlim1_logistic <- function(
  y,
  exposure,
  covars,
  group,
  id = NULL,
  w_free,
  b_free,
  df,
  nits,
  nburn = round(nits / 2),
  nthin = 1,
  progress = TRUE
) {
  # make sure group is a factor variable
  if (!is.factor(group)) {
    stop("group must be a factor variable.")
  }

  # allow for only one group
  if (length(unique(group)) == 1) {
    if (!w_free & !b_free) {
      group <- rep(1, length(group))
    } else {
      stop("heterogeneity not allowed with only one group")
    }
  }

  # make sure exposure is a data.frame
  if (is.null(colnames(exposure))) {
    colnames(exposure) <- paste0("exposure", 1:ncol(exposure))
  }
  exposure <- as.data.frame(exposure)

  # make sure covariates have names
  if (is.null(colnames(covars))) {
    colnames(covars) <- paste0("covar", 1:ncol(covars))
  }
  covars <- as.data.frame(covars)

  # make design matrix for all data except exposures
  # sort by group to make help with MCMC.
  # remove observations with missing values
  # drop unused levels
  alldata <- droplevels(na.omit(cbind(y, group, covars, exposure), group))
  if (length(y) < nrow(alldata)) {
    warning(
      "Dropped",
      length(y) - nrow(alldata),
      "observations with missing values."
    )
  }

  if (length(levels(group)) != length(levels(alldata$group))) {
    warning("Removing rows with missing values removed a level in the group")
  }

  # ADD random effect matrix here
  if (!is.null(id)) {
    RE <- model.matrix(~ id - 1)
    RElocation <- 1:ncol(RE)
    REmodel <- TRUE
    nRE <- ncol(RE)
  } else {
    REmodel <- FALSE
    nRE <- 0
  }

  # dimensions
  n <- nrow(alldata)
  n_groups <- length(unique(alldata$group))
  if (n_groups > 1) {
    names_groups <- levels(alldata$group)
  } else {
    names_groups <- "all"
  }
  n_times <- ncol(exposure)

  # design matrix for covariates and main effects of group
  mm <- model.matrix(y ~ . - 1, data = alldata)

  # add random effects
  if (REmodel) {
    mm <- cbind(RE, mm)
  }

  # get design matrix excluding covariates
  design <- mm[, -c(ncol(mm) + 1 - c(ncol(exposure):1))]

  # exposure matrix
  exposure <- mm[, c(ncol(mm) + 1 - c(ncol(exposure):1))]

  # basis for weights
  basis <- makebasis(exposure, df = df)

  # preliminary weighted exposures
  # make flat for all groups
  theta <- lm(rep(1 / sqrt(n_times), n_times) ~ basis - 1)$coef
  w <- drop(basis %*% theta)
  w <- w / sqrt(sum(w^2))
  w <- w * sign(sum(w))

  # starting values for weighted exposures
  # these are the same weightings for all groups
  E <- exposure %*% w

  # replicate if w is group specific
  if (w_free) {
    w <- matrix(rep(w, n_groups), n_groups, n_times, byrow = TRUE)
    theta <- matrix(rep(theta, n_groups), n_groups, df, byrow = TRUE)
    n_weight_groups <- n_groups
  } else {
    w <- matrix(w, 1, n_times, byrow = TRUE)
    theta <- matrix(theta, 1, df, byrow = TRUE)
    n_weight_groups <- 1
  }

  # design matrix for weighted exposures
  if (b_free) {
    Edesign <- design[, 1:n_groups]
    colnames(Edesign) <- paste0("E", names_groups)
  } else {
    Edesign <- matrix(1, n, 1)
    colnames(Edesign) <- "E"
  }

  # add weighted exposures to design matrix.
  design <- cbind(design, Edesign * drop(E))
  n_regcoef <- ncol(design)

  # index for groups for weights
  # identifies which rows are in which weight groups
  w_group_ids <- list()
  if (w_free) {
    for (j in 1:n_groups) {
      w_group_ids[[j]] <- which(design[, j] == 1)
    }
  } else {
    w_group_ids[[1]] <- 1:n
  }

  # starting values for other parameters
  REprec <- 0.01

  # place to store results
  regcoef_keep <- matrix(NA, nits, n_regcoef)
  w_keep <- matrix(NA, nits, n_times * n_groups)
  ll_sum_keep <- REprec_keep <- rep(NA, nits)

  # iterations to be kept
  # this is only used for waic
  iter_keep <- seq(nburn + 1, nits, by = nthin)
  ll_all_keep <- matrix(NA, n, length(iter_keep))

  # set linear predictor to 0 for starting values
  pred_mean_model_scale <- rep(0, n)

  # initiate progress bar
  if (progress) {
    message(
      "Start MCMC for bdlim1 with w_free=",
      w_free,
      " and b_free=",
      b_free,
      "."
    )

    progress_bar = txtProgressBar(min = 0, max = nits, style = 3, char = "=")
  }

  for (i in 1:nits) {
    # omega from PG augmentation
    w_pg <- rpg(n, 1, pred_mean_model_scale)

    # update regression coefficients
    design_w <- t(scale(t(design), 1 / sqrt(w_pg), center = FALSE))
    V <- t(design_w) %*% design_w
    diag(V) <- diag(V) + c(rep(REprec, nRE), rep(1 / 100, n_regcoef - nRE))
    V <- chol2inv(chol(V))
    m <- drop(V %*% (t(design) %*% (y - .5)))
    regcoef <- drop(m + t(chol(V)) %*% rnorm(n_regcoef))

    # update random effect variance if a RE model
    if (REmodel) {
      REprec <- rgamma(1, .5 + nRE / 2, .5 + sum(regcoef[RElocation]^2) / 2)
    }

    for (j in 1:n_weight_groups) {
      # log likelihood to start update of theta/w
      ll <- sum(dbinom(
        y[w_group_ids[[j]]],
        1,
        1 / (1 + exp(-design[w_group_ids[[j]], ] %*% regcoef)),
        log = TRUE
      ))
      threshold <- ll + log(runif(1))
      ll <- threshold - 1 # allows always to start loop

      # vector for ellipse
      nu <- matrix(rnorm(df), 1, df)
      eta_max <- eta <- runif(1, 0, 2 * pi)
      eta_min <- eta_max - 2 * pi

      while (ll < threshold) {
        # proposed coefficients and normalized weights
        theta_prop <- theta[j, ] * cos(eta) + nu * sin(eta)
        w[j, ] <- drop(basis %*% c(theta_prop))
        w[j, ] <- w[j, ] / sqrt(sum(w[j, ]^2))
        w[j, ] <- w[j, ] * sign(sum(w[j, ]))

        # update weighted exposures for this group
        design[w_group_ids[[j]], colnames(Edesign)] <- as.matrix(Edesign[
          w_group_ids[[j]],
        ]) *
          drop(exposure[w_group_ids[[j]], ] %*% w[j, ])

        # log likelihood
        ll <- sum(dbinom(
          y[w_group_ids[[j]]],
          1,
          1 / (1 + exp(-design[w_group_ids[[j]], ] %*% regcoef)),
          log = TRUE
        ))
        # adjust eta in case repeat
        if (eta < 0) {
          eta_min <- eta
        } else {
          eta_max <- eta
        }
        eta <- runif(1, eta_min, eta_max)
      }

      # update theta (w and design are already updated)
      theta[j, ] <- theta_prop

      # update progress bar
      if (progress) {
        setTxtProgressBar(progress_bar, value = i)
      }
    } # end MCMC loop

    # close progress bar
    if (progress) {
      close(progress_bar)
      message(
        "End MCMC for bdlim1 with w_free=",
        w_free,
        " and b_free=",
        b_free,
        "."
      )
    }

    # save values
    w_keep[i, ] <- c(t(w))
    regcoef_keep[i, ] <- regcoef
    REprec_keep[i] <- REprec
    pred_mean_model_scale <- design %*% regcoef
    ll_sum_keep[i] <- sum(dbinom(
      y,
      1,
      1 / (1 + exp(-pred_mean_model_scale)),
      log = TRUE
    ))

    if (i %in% iter_keep) {
      ll_all_keep[, which(iter_keep == i)] <- sum(dbinom(
        y,
        1,
        1 / (1 + exp(-pred_mean_model_scale)),
        log = TRUE
      ))
    }
  }

  # format
  colnames(regcoef_keep) <- colnames(design)
  colnames(regcoef_keep)[1:n_groups] <- paste0("intercept", names_groups)
  colnames(w_keep) <- paste0(
    "w_",
    rep(names_groups, each = n_times),
    "_",
    rep(1:n_times, n_groups)
  )

  # summarize posterior for cumulative effect and distributed lag function
  dlfun <- ce <- list()
  for (i in names_groups) {
    w_temp <- w_keep[, paste0("w_", i, "_", 1:n_times)]
    if (b_free) {
      E_temp <- regcoef_keep[, paste0("E", i)]
    } else {
      E_temp <- regcoef_keep[, "E"]
    }
    dlfun[[i]] <- w_temp * E_temp
    ce[[i]] <- rowSums(dlfun[[i]])
  }

  out <- list(
    w = w_keep,
    regcoef = regcoef_keep[, (nRE + 1):n_regcoef],
    sigma = NA,
    loglik = ll_sum_keep,
    names_groups = names_groups,
    n_times = n_times,
    dlfun = dlfun,
    ce = ce,
    WAIC = WAIC(ll_all_keep),
    n = length(y),
    nits = nits,
    nburn = nburn,
    nthin = nthin,
    REmodel = REmodel,
    family = "binomial",
    nits = nits,
    nburn = nburn,
    nthin = nthin,
    call = match.call()
  )

  if (REmodel) {
    out$RE <- regcoef_keep[, 1:nRE]
    out$REsd = 1 / sqrt(REprec_keep)
  }

  class(out) <- "bdlim1"

  return(out)
}