R/predict_zero_inflation.R
In ggeffects: Create Tidy Data Frames of Marginal Effects for 'ggplot' from Model Outputs

.join_simulations <- function(prediction_data, newdata, prdat, sims, ci, clean_terms) {
  # after "bootstrapping" confidence intervals by simulating from the
  # multivariate normal distribution, we need to prepare the data and
  # calculate "bootstrapped" estimates and CIs.

  prediction_data$sort__id <- seq_len(nrow(prediction_data))
  column_matches <- sapply(colnames(prediction_data), function(.x) {
    any(unique(prediction_data[[.x]]) %in% newdata[[.x]])
  })

  # we need two data grids here: one for all combination of levels from the
  # model predictors ("newdata"), and one with the current combinations only
  # for the terms in question ("prediction_data"). "sims" has always the same
  # number of rows as "newdata", but "prediction_data" might be shorter. So we
  # merge "prediction_data" and "newdata", add mean and quantiles from "sims"
  # as new variables, and then later only keep the original observations
  # from "prediction_data" - by this, we avoid unequal row-lengths.

  join_by <- colnames(prediction_data)[column_matches]
  prediction_data <- merge(newdata, prediction_data, by = join_by, all = TRUE, sort = FALSE)

  prediction_data$predicted <- rowMeans(sims)
  prediction_data$conf.low <- apply(sims, 1, stats::quantile, probs = 1 - ci)
  prediction_data$conf.high <- apply(sims, 1, stats::quantile, probs = ci)
  prediction_data$std.error <- apply(sims, 1, stats::sd)

  # group_by() changes the order of rows / variables in "prediction_data", however
  # we later add back the original predictions "prdat" (see below), which
  # correspond to the *current* sorting of prediction_data. So we add a dummy-ID,
  # which we use to restore the original sorting of prediction_data later...

  prediction_data <- prediction_data[!is.na(prediction_data$sort__id), , drop = FALSE]

  prediction_data <- cbind(
    stats::aggregate(
      prediction_data[c("predicted", "conf.low", "conf.high", "std.error")],
      by = prediction_data[clean_terms],
      FUN = mean,
      na.rm = TRUE
    ),
    id = prediction_data$sort__id
  )

  rownames(prediction_data) <- NULL

  if (length(clean_terms) == 1) {
    prediction_data <- prediction_data[order(prediction_data[[1]]), , drop = FALSE]
  } else if (length(clean_terms) == 2) {
    prediction_data <- prediction_data[order(prediction_data[[1]], prediction_data[[2]]), , drop = FALSE]
  } else if (length(clean_terms) == 3) {
    prediction_data <- prediction_data[order(prediction_data[[1]], prediction_data[[2]], prediction_data[[3]]), , drop = FALSE] # nolint
  } else if (length(clean_terms) == 4) {
    prediction_data <- prediction_data[order(prediction_data[[1]], prediction_data[[2]], prediction_data[[3]], prediction_data[[4]]), , drop = FALSE] # nolint
  }

  # we use the predicted values from "predict(type = "reponse")", but the
  # bootstrapped CI - so we need to fix a bit here. "predict(type = "reponse")"
  # works as intended, but the related standard errors are not reliable (due
  # to the combination of the conditional and zero-inflated model), so we need
  # the simulated standard errors and CI's - but can use the "correct" predictions.
  # in order to make CI and predictions match, we take the simulated CI-range
  # and use the original predicted values as "center" for those CI-ranges.

  if (length(prdat) == nrow(prediction_data)) {
    prediction_data <- prediction_data[order(prediction_data$id), , drop = FALSE]
    ci.range <- (prediction_data$conf.high - prediction_data$conf.low) / 2
    prediction_data$predicted <- prdat

    # fix negative CI
    ci.low <- prediction_data$predicted - ci.range
    neg.ci <- ci.low < 0
    if (any(neg.ci)) {
      ci.range[neg.ci] <- ci.range[neg.ci] - abs(ci.low[neg.ci]) - 1e-05
      prediction_data$std.error[neg.ci] <- prediction_data$std.error[neg.ci] - ((abs(ci.low[neg.ci]) + 1e-05) / stats::qnorm(ci)) # nolint
    }

    prediction_data$conf.low <- prediction_data$predicted - ci.range
    prediction_data$conf.high <- prediction_data$predicted + ci.range
    prediction_data <- .remove_column(prediction_data, "id")
  }

  prediction_data
}




.simulate_zi_predictions <- function(model,
                                     newdata,
                                     nsim = 1000,
                                     terms = NULL,
                                     value_adjustment = NULL,
                                     condition = NULL) {

  # Since the zero inflation and the conditional model are working in "opposite
  # directions", confidence intervals can not be derived directly  from the
  # "predict()"-function. Thus, confidence intervals for type = "zero_inflated" are
  # based on quantiles of simulated draws from a multivariate normal distribution
  # (see also _Brooks et al. 2017, pp.391-392_ for details).

  if (inherits(model, "glmmTMB")) {
    .simulate_predictions_glmmTMB(model, newdata, nsim, terms, value_adjustment, condition)
  } else if (inherits(model, "MixMod")) {
    .simulate_predictions_MixMod(model, newdata, nsim, terms, value_adjustment, condition)
  } else {
    .simulate_predictions_zeroinfl(model, newdata, nsim, terms, value_adjustment, condition)
  }
}






.simulate_predictions_glmmTMB <- function(model,
                                          newdata,
                                          nsim,
                                          terms = NULL,
                                          value_adjustment = NULL,
                                          condition = NULL) {
  insight::check_if_installed("lme4")

  # Since the zero inflation and the conditional model are working in "opposite
  # directions", confidence intervals can not be derived directly  from the
  # "predict()"-function. Thus, confidence intervals for type = "zero_inflated" are
  # based on quantiles of simulated draws from a multivariate normal distribution
  # (see also _Brooks et al. 2017, pp.391-392_ for details).

  tryCatch(
    {
      condformula <- lme4::nobars(stats::formula(model)[-2])
      ziformula <- lme4::nobars(stats::formula(model$modelInfo$allForm$ziformula))

      # if formula has a polynomial term, and this term is one that is held
      # constant, model.matrix() with "newdata" will throw an error - so we
      # re-build the newdata-argument by including all values for poly-terms, if
      # these are hold constant.

      fixes <- .rows_to_keep(model, newdata, condformula, ziformula, terms, value_adjustment, condition)

      if (!is.null(fixes)) {
        keep <- fixes$keep
        newdata <- fixes$newdata
      } else {
        keep <- NULL
      }

      x.cond <- stats::model.matrix(condformula, newdata)
      beta.cond <- lme4::fixef(model)$cond

      x.zi <- stats::model.matrix(ziformula, newdata)
      beta.zi <- lme4::fixef(model)$zi

      cond.varcov <- insight::get_varcov(model, component = "conditional")
      zi.varcov <- insight::get_varcov(model, component = "zero_inflated")

      pred.condpar.psim <- .mvrnorm(n = nsim, mu = beta.cond, Sigma = cond.varcov)
      pred.cond.psim <- x.cond %*% t(pred.condpar.psim)
      pred.zipar.psim <- .mvrnorm(n = nsim, mu = beta.zi, Sigma = zi.varcov)
      pred.zi.psim <- x.zi %*% t(pred.zipar.psim)

      if (!.is_empty(keep)) {
        pred.cond.psim <- pred.cond.psim[keep, , drop = FALSE]
        pred.zi.psim <- pred.zi.psim[keep, , drop = FALSE]
      }

      list(cond = pred.cond.psim, zi = pred.zi.psim)
    },
    error = function(x) x,
    warning = function(x) NULL,
    finally = function(x) NULL
  )
}


.simulate_predictions_MixMod <- function(model,
                                         newdata,
                                         nsim,
                                         terms = NULL,
                                         value_adjustment = NULL,
                                         condition = NULL) {
  insight::check_if_installed("lme4")

  tryCatch(
    {
      condformula <- stats::formula(model, type = "fixed")
      ziformula <- stats::formula(model, type = "zi_fixed")

      # if formula has a polynomial term, and this term is one that is held
      # constant, model.matrix() with "newdata" will throw an error - so we
      # re-build the newdata-argument by including all values for poly-terms, if
      # these are hold constant.

      fixes <- .rows_to_keep(model, newdata, condformula, ziformula, terms, value_adjustment, condition)

      if (!is.null(fixes)) {
        keep <- fixes$keep
        newdata <- fixes$newdata
      } else {
        keep <- NULL
      }

      x.cond <- stats::model.matrix(condformula, newdata)
      beta.cond <- lme4::fixef(model, sub_model = "main")

      x.zi <- stats::model.matrix(ziformula, newdata)
      beta.zi <- lme4::fixef(model, sub_model = "zero_part")

      cond.varcov <- insight::get_varcov(model, component = "conditional")
      zi.varcov <- insight::get_varcov(model, component = "zero_inflated")

      pred.condpar.psim <- .mvrnorm(n = nsim, mu = beta.cond, Sigma = cond.varcov)
      pred.cond.psim <- x.cond %*% t(pred.condpar.psim)
      pred.zipar.psim <- .mvrnorm(n = nsim, mu = beta.zi, Sigma = zi.varcov)
      pred.zi.psim <- x.zi %*% t(pred.zipar.psim)

      if (!.is_empty(keep)) {
        pred.cond.psim <- pred.cond.psim[keep, , drop = FALSE]
        pred.zi.psim <- pred.zi.psim[keep, , drop = FALSE]
      }

      list(cond = pred.cond.psim, zi = pred.zi.psim)
    },
    error = function(x) x,
    warning = function(x) NULL,
    finally = function(x) NULL
  )
}


.simulate_predictions_zeroinfl <- function(model,
                                           newdata,
                                           nsim = 1000,
                                           terms = NULL,
                                           value_adjustment = NULL,
                                           condition = NULL) {
  tryCatch(
    {
      condformula <- stats::as.formula(paste0("~", insight::safe_deparse(stats::formula(model)[[3]][[2]])))
      ziformula <- stats::as.formula(paste0("~", insight::safe_deparse(stats::formula(model)[[3]][[3]])))

      # if formula has a polynomial term, and this term is one that is held
      # constant, model.matrix() with "newdata" will throw an error - so we
      # re-build the newdata-argument by including all values for poly-terms, if
      # these are hold constant.

      fixes <- .rows_to_keep(model, newdata, condformula, ziformula, terms, value_adjustment, condition)

      if (!is.null(fixes)) {
        keep <- fixes$keep
        newdata <- fixes$newdata
      } else {
        keep <- NULL
      }

      x.cond <- stats::model.matrix(condformula, model = "count", data = newdata)
      beta.cond <- stats::coef(model, model = "count")

      x.zi <- stats::model.matrix(ziformula, model = "zero", data = newdata)
      beta.zi <- stats::coef(model, model = "zero")

      cond.varcov <- insight::get_varcov(model, component = "conditional")
      zi.varcov <- insight::get_varcov(model, component = "zero_inflated")

      pred.condpar.psim <- .mvrnorm(nsim, mu = beta.cond, Sigma = cond.varcov)
      pred.cond.psim <- x.cond %*% t(pred.condpar.psim)

      pred.zipar.psim <- .mvrnorm(nsim, mu = beta.zi, Sigma = zi.varcov)
      pred.zi.psim <- x.zi %*% t(pred.zipar.psim)

      if (!.is_empty(keep)) {
        pred.cond.psim <- pred.cond.psim[keep, , drop = FALSE]
        pred.zi.psim <- pred.zi.psim[keep, , drop = FALSE]
      }

      list(cond = pred.cond.psim, zi = pred.zi.psim)
    },
    error = function(x) x,
    warning = function(x) NULL,
    finally = function(x) NULL
  )
}






.rows_to_keep <- function(model, newdata, condformula, ziformula, terms, value_adjustment, condition) {
  # if formula has a polynomial term, and this term is one that is held
  # constant, model.matrix() with "newdata" will throw an error - so we
  # re-build the newdata-argument by including all values for poly-terms, if
  # these are hold constant.

  const.values <- attr(newdata, "constant.values")
  condformula_string <- insight::safe_deparse(condformula)
  ziformula_string <- insight::safe_deparse(ziformula)

  keep <- NULL

  if (.has_poly_term(condformula_string) || .has_poly_term(ziformula_string)) {

    model_frame <- insight::get_data(model, source = "frame", verbose = FALSE)

    polycondcheck <- NULL
    polyzicheck <- NULL

    if (.has_poly_term(condformula_string)) {
      polyterm <- .get_poly_term(condformula_string)
      if (polyterm %in% names(const.values)) {
        polycondcheck <- polyterm
        polydg <- .get_poly_degree(condformula_string)
        polyvals <- paste0(
          stats::quantile(model_frame[[polyterm]], probs = seq_len(polydg + 1) / (polydg + 2)),
          collapse = ","
        )
        terms <- c(terms, sprintf("%s [%s]", polyterm, polyvals))
      }
    }

    if (.has_poly_term(ziformula_string)) {
      polyterm <- .get_poly_term(ziformula_string)
      if (polyterm %in% names(const.values)) {
        polyzicheck <- polyterm
        polydg <- .get_poly_degree(ziformula_string)
        polyvals <- paste0(
          stats::quantile(model_frame[[polyterm]], probs = seq_len(polydg + 1) / (polydg + 2)),
          collapse = ","
        )
        terms <- c(terms, sprintf("%s [%s]", polyterm, polyvals))
      }
    }

    newdata <- .data_grid(
      model = model,
      model_frame = model_frame,
      terms = terms,
      value_adjustment = value_adjustment,
      factor_adjustment = FALSE,
      show_pretty_message = FALSE,
      condition = condition,
      verbose = FALSE
    )

    keep.cond <- vector("numeric")
    keep.zi <- vector("numeric")

    if (!is.null(polycondcheck)) {
      keep.cond <- unlist(lapply(polycondcheck, function(.x) {
        wm <- newdata[[.x]][which.min(abs(newdata[[.x]] - .typical_value(newdata[[.x]], fun = value_adjustment)))]
        as.vector(which(newdata[[.x]] == wm))
      }), use.names = FALSE)
    }

    if (!is.null(polyzicheck)) {
      keep.zi <- unlist(lapply(polyzicheck, function(.x) {
        wm <- newdata[[.x]][which.min(abs(newdata[[.x]] - .typical_value(newdata[[.x]], fun = value_adjustment)))]
        as.vector(which(newdata[[.x]] == wm))
      }), use.names = FALSE)
    }

    keep <- intersect(keep.cond, keep.zi)

    if (.is_empty(keep))
      keep <- unique(c(keep.cond, keep.zi))
  }

  if (.is_empty(keep)) return(NULL)

  list(keep = keep, newdata = newdata)
}



.get_zeroinfl_gam_predictions <- function(model, newdata, nsim = 1000) {
  tryCatch(
    {
      mm <- stats::model.matrix(model, data = newdata)

      linpred <- attr(mm, "lpi", exact = TRUE)
      cond <- linpred[[1]]
      zi <- linpred[[2]]

      x.cond <- mm[, cond]
      x.zi <- mm[, zi]

      beta.cond <- stats::coef(model)[cond]
      beta.zi <- stats::coef(model)[zi]

      varcov.cond <- stats::vcov(model)[cond, cond]
      varcov.zi <- stats::vcov(model)[zi, zi]

      psim.cond <- .mvrnorm(nsim, mu = beta.cond, Sigma = varcov.cond)
      pred.cond <- x.cond %*% t(psim.cond)

      psim.zi <- .mvrnorm(nsim, mu = beta.zi, Sigma = varcov.zi)
      pred.zi <- x.zi %*% t(psim.zi)

      list(cond = pred.cond, zi = pred.zi)
    },
    error = function(x) x,
    warning = function(x) NULL,
    finally = function(x) NULL
  )
}

Any scripts or data that you put into this service are public.

ggeffects documentation built on Sept. 12, 2024, 7:41 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

ggeffects
Create Tidy Data Frames of Marginal Effects for 'ggplot' from Model Outputs

R/predict_zero_inflation.R
In ggeffects: Create Tidy Data Frames of Marginal Effects for 'ggplot' from Model Outputs

Defines functions .get_zeroinfl_gam_predictions .rows_to_keep .simulate_predictions_zeroinfl .simulate_predictions_MixMod .simulate_predictions_glmmTMB .simulate_zi_predictions .join_simulations

Try the ggeffects package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ggeffects Create Tidy Data Frames of Marginal Effects for 'ggplot' from Model Outputs

R/predict_zero_inflation.R In ggeffects: Create Tidy Data Frames of Marginal Effects for 'ggplot' from Model Outputs

Defines functions .get_zeroinfl_gam_predictions .rows_to_keep .simulate_predictions_zeroinfl .simulate_predictions_MixMod .simulate_predictions_glmmTMB .simulate_zi_predictions .join_simulations

Try the ggeffects package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ggeffects
Create Tidy Data Frames of Marginal Effects for 'ggplot' from Model Outputs

R/predict_zero_inflation.R
In ggeffects: Create Tidy Data Frames of Marginal Effects for 'ggplot' from Model Outputs