insight: Easy Access to Model Information for Various Model Objects

Documented in get_data get_data.afex_aov get_data.default get_data.glmmTMB get_data.rma

#' @title Get the data that was used to fit the model
#' @name get_data
#'
#' @description This functions tries to get the data that was used to fit the
#'   model and returns it as data frame.
#'
#' @param effects Should model data for fixed effects (`"fixed"`), random
#'   effects (`"random"`) or both (`"all"`) be returned? Only applies to mixed
#'   or gee models.
#' @param source String, indicating from where data should be recovered. If
#'   `source = "environment"` (default), data is recovered from the environment
#'   (e.g. if the data is in the workspace). This option is usually the fastest
#'   way of getting data and ensures that the original variables used for model
#'   fitting are returned. Note that always the _current_ data is recovered from
#'   the environment. Hence, if the data was modified _after_ model fitting
#'   (e.g., variables were recoded or rows filtered), the returned data may no
#'   longer equal the model data. If `source = "frame"` (or `"mf"`), the data
#'   is taken from the model frame. Any transformed variables are back-transformed,
#'   if possible. This option returns the data even if it is not available in
#'   the environment, however, in certain edge cases back-transforming to the
#'   original data may fail. If `source = "environment"` fails to recover the
#'   data, it tries to extract the data from the model frame; if
#'   `source = "frame"` and data cannot be extracted from the model frame, data
#'   will be recovered from the environment. Both ways only returns observations
#'   that have no missing data in the variables used for model fitting.
#' @param verbose Toggle messages and warnings.
#'
#' @inheritParams find_predictors
#' @inheritParams find_formula
#'
#' @inheritSection find_predictors Model components
#'
#' @return The data that was used to fit the model.
#'
#' @examplesIf require("lme4", quietly = TRUE)
#' data(cbpp, package = "lme4")
#' cbpp$trials <- cbpp$size - cbpp$incidence
#' m <- glm(cbind(incidence, trials) ~ period, data = cbpp, family = binomial)
#' head(get_data(m))
#' @export
get_data <- function(x, ...) {
  UseMethod("get_data")
}


# extract data from environment -------------------------------

# main workhorse, we try to recover data from environment as good as possible.
# the dataset is subset if needed, and weights are added. only those columns
# are returned that we actually find in the model...
# data_name is useful when we have the name of the data frame object stored as
# a string (e.g., in brmsfit attr(x$data, "data_frame"))
.get_data_from_environment <- function(x,
                                       effects = "all",
                                       component = "all",
                                       source = "environment",
                                       additional_variables = NULL,
                                       verbose = FALSE,
                                       data_name = NULL) {
  # process arguments, check whether data should be recovered from
  # environment or model frame
  source <- .check_data_source_arg(source)
  # if not environment, leave
  if (source != "environment") {
    return(NULL)
  }

  # handle arguments
  effects <- validate_argument(effects, c("all", "fixed", "random"))
  component <- validate_argument(
    component,
    c("all", "conditional", "zero_inflated", "zi", "smooth_terms", "dispersion")
  )

  # we want to add the variable for subsettig, too
  model_call <- get_call(x)

  # for random effects, we still need all variables to be extracted
  # in case we have missing data. E.g., if random effects variables have
  # no missing data, but response or other fixed effects has, "get_random()"
  # should only return non-missing data for the model - thus, missing cases
  # in any fixed effects variable should be removed, even if non-missing in
  # random effects variables (see #777)
  if (effects == "random") {
    selected_vars <- "all"
  } else {
    selected_vars <- effects
  }

  # extract model variables, if possible
  vars <- try(
    find_variables(x, effects = selected_vars, component = component, flatten = TRUE, verbose = FALSE),
    silent = TRUE
  )

  # if "find_variables()" fails, we set it to NULL
  if (inherits(vars, "try-error")) {
    vars <- NULL

    # if "find_variables()" returns NULL, we assume this is intentional, as
    # specific model components were requested, which are not available
  } else if (is.null(vars) && effects != "fixed") {
    # for fixed effects, always include response,
    # so return NULL only if effects != "fixed"
    if (verbose) {
      format_warning(
        "Could not find any variables for the specified model component.",
        "You may try other values for the `effects` and `component` argument to retrieve model data."
      )
    }
    return(NULL)
  }

  out <- tryCatch(
    {
      # recover data frame from environment
      dat <- .recover_data_from_environment(x, data_name = data_name, verbose = verbose)
      # for metafor, we need to add weights...
      if (inherits(x, c("rma.uni", "rma"))) {
        ## TODO: check if we need to do this for other meta-analysis packages, too
        wdat <- data.frame(Weights = get_weights(x))
        additional_variables <- c(additional_variables, "Weights")
        dat <- tryCatch(cbind(dat, wdat), error = function(e) dat)
      }
      # additional variables? Some models, like plm::plm(), have an "index"
      # slot in the model call with further variables
      if (!is.null(additional_variables) && !isTRUE(additional_variables)) {
        vars <- c(vars, additional_variables)
      }
      # add response, only required if "find_variables()" does not already
      # return it (which is the case when component is "all" or "conditional")
      if (!component %in% c("all", "conditional")) {
        vars <- c(vars, find_response(x, combine = FALSE))
      }

      ## TODO: do we want random slopes included? Previuosly, we did not.
      # add random slopes, if any
      # if (effects %in% c("all", "random")) {
      #   vars <- c(vars, unlist(find_random_slopes(x)))
      # }

      # select only those variables from the data that we find in the model
      if (!is.null(vars)) {
        # weighting variable?
        vars <- c(vars, find_weights(x))
        # offset?
        vars <- c(vars, find_offset(x))
        # subset?
        if (!is.null(model_call$subset)) {
          subset_vars <- .safe(all.vars(model_call$subset))
          vars <- c(vars, subset_vars)
        }
        vars <- unique(vars)
        # if "additional_variables" is TRUE, keep *all* variables from original
        # data, else make sure only required columns are returned
        if (!isTRUE(additional_variables)) {
          dat <- dat[, intersect(vars, colnames(dat)), drop = FALSE]
        }
      }

      # complete cases only, as in model frames, need to filter attributes
      # only use model variables in complete.cases()
      if (is.null(vars)) {
        cc <- stats::complete.cases(dat)
      } else {
        cc <- stats::complete.cases(dat[, intersect(vars, colnames(dat))])
      }

      # only preserve random effects
      if (effects == "random") {
        dat <- dat[find_random(x, split_nested = TRUE, flatten = TRUE)]
      }

      if (!all(cc)) {
        # save original data, for attributes
        original_dat <- dat
        # filter
        dat <- dat[cc, , drop = FALSE]
        # add back labels
        var_label <- compact_list(lapply(original_dat, attr, "label", exact = TRUE))
        if (length(var_label)) {
          for (i in names(var_label)) {
            attr(dat[[i]], "label") <- var_label[[i]]
          }
        }
        val_labels <- compact_list(lapply(original_dat, attr, "labels", exact = TRUE))
        if (length(val_labels)) {
          for (i in names(val_labels)) {
            attr(dat[[i]], "labels") <- val_labels[[i]]
          }
        }
      }
      # any data left?
      if (nrow(dat) == 0 || ncol(dat) == 0) {
        dat <- NULL
      }
      dat
    },
    error = function(x) {
      NULL
    }
  )
  # successful?
  if (is.null(out) && verbose) {
    format_warning(
      "Could not recover model data from environment. Please make sure your data is available in your workspace.",
      "Trying to retrieve data from the model frame now."
    )
  }
  out
}


# find data from the environment -----------------------------------

# return data from a data frame that is in the environment,
# and subset the data, if necessary
.recover_data_from_environment <- function(x, data_name = NULL, verbose = FALSE) {
  model_call <- get_call(x)

  if (is.null(model_call[["data"]]) && is.character(data_name)) {
    model_call[["data"]] <- as.name(data_name)
  }

  # special handling for fixest, see #767
  if (inherits(x, "fixest")) {
    # when called from inside function, fixest seems to have a different
    # environment that requires recovering from parent-environment
    dat <- .safe(eval(model_call$data, envir = parent.env(x$call_env)))
  } else {
    # first, try environment of formula, see #666. set enclos = NULL so eval()
    # does not fall back to parent frame when the environment is NULL, since we
    # want to try that after checking the formula
    dat <- .safe(eval(model_call$data,
      envir = environment(model_call$formula),
      enclos = NULL
    ))
  }

  # second, try to extract formula directly
  if (is.null(dat)) {
    dat <- .safe(eval(model_call$data,
      envir = environment(find_formula(x, verbose = FALSE)$conditional),
      enclos = NULL
    ))
  }

  # validation check- if data frame is named like a function, e.g.
  # rep <- data.frame(...), we now have a function instead of the data
  # we then need to reset "dat" to NULL and search in the global env

  if (!is.null(dat) && !is.data.frame(dat)) {
    dat <- .safe(as.data.frame(dat))
  }

  # third try, global env
  if (is.null(dat)) {
    dat <- .safe(eval(model_call$data, envir = globalenv()))
  }

  # last try, internal env
  if (is.null(dat)) {
    dat <- .safe(eval(model_call$data, envir = parent.env(x$call_env)))
  }

  if (!is.null(dat) && object_has_names(model_call, "subset")) {
    subset_data <- .safe(subset(dat, subset = eval(model_call$subset)))
    if (!is.null(subset_data)) {
      dat <- subset_data
    } else if (verbose) {
      format_warning("Looks like the original data was subset, however `get_data()` could not retrieve the subset of the data. The full data set is returned.") # nolint
    }
  }

  dat
}


# default method ------------------------------------------------------

#' @rdname get_data
#' @export
get_data.default <- function(x, source = "environment", verbose = TRUE, ...) {
  if (inherits(x, "list") && object_has_names(x, "gam")) {
    x <- x$gam
    class(x) <- c(class(x), c("glm", "lm"))
  }

  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  # fall back to extract data from model frame
  if (is.null(model_data)) {
    mf <- tryCatch(
      if (inherits(x, "Zelig-relogit")) {
        .get_zelig_relogit_frame(x)
      } else {
        stats::model.frame(x)
      },
      error = function(x) NULL
    )
    # process arguments, check whether data should be recovered from
    # environment or model frame
    source <- .check_data_source_arg(source)
    # if no data found, extract from environment - we repeat this step here
    # in case the source was not already environment
    if ((is.null(mf) || nrow(mf) == 0) && source != "environment") {
      mf <- tryCatch(
        {
          dat <- .recover_data_from_environment(x, verbose = verbose)
          vars <- find_variables(x, flatten = TRUE, verbose = FALSE)
          dat[, intersect(vars, colnames(dat)), drop = FALSE]
        },
        error = function(x) {
          NULL
        }
      )
    }
    model_data <- .prepare_get_data(x, mf, verbose = verbose)
  }
  model_data
}


#' @export
get_data.data.frame <- function(x, ...) {
  x
}

#' @export
get_data.censReg <- get_data.default

#' @export
get_data.maxLik <- get_data.default

#' @export
get_data.maxim <- get_data.default


#' @export
get_data.summary.lm <- function(x, verbose = TRUE, ...) {
  mf <- tryCatch(
    .recover_data_from_environment(x, verbose = verbose)[, all.vars(x$terms), drop = FALSE],
    error = function(x) NULL
  )
  .prepare_get_data(x, mf, verbose = verbose)
}


#' @export
get_data.model_fit <- function(x, verbose = TRUE, ...) {
  get_data(x$fit, verbose = verbose, ...)
}


#' @export
get_data.mhurdle <- function(x, verbose = TRUE, ...) {
  x[["model"]]
}


# classical and survival models -----------------------------------------------


#' @export
get_data.mjoint <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  mf <- tryCatch(
    {
      dat <- x$data[[1]]
      data_columns <- intersect(
        colnames(dat),
        unique(c(
          find_response(x, combine = FALSE, component = "all"),
          find_variables(x, flatten = TRUE, verbose = FALSE)
        ))
      )
      dat[, data_columns, drop = FALSE]
    },
    error = function(x) {
      NULL
    }
  )
  .prepare_get_data(x, stats::na.omit(mf), verbose = verbose)
}


#' @export
get_data.geeglm <- function(x,
                            effects = "all",
                            source = "environment",
                            verbose = TRUE,
                            ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(
    x,
    effects = effects,
    source = source,
    verbose = verbose,
    ...
  )

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  effects <- match.arg(effects, choices = c("all", "fixed", "random"))
  mf <- tryCatch(stats::model.frame(x), error = function(x) NULL)
  if (!is.null(mf)) {
    id <- data.frame(x$id)
    colnames(id) <- deparse(parse(text = safe_deparse(get_call(x)))[[1]][["id"]])
    mf <- cbind(mf, id)
    # select effects
    vars <- switch(effects,
      all = find_variables(x, flatten = TRUE, verbose = FALSE),
      fixed = find_variables(x, effects = "fixed", flatten = TRUE, verbose = FALSE),
      random = find_random(x, flatten = TRUE)
    )
    mf <- mf[, intersect(vars, colnames(mf)), drop = FALSE]
  }
  .prepare_get_data(x, mf, effects = effects, verbose = verbose)
}

#' @export
get_data.glmgee <- get_data.geeglm


#' @export
get_data.gee <- function(x,
                         effects = "all",
                         source = "environment",
                         verbose = TRUE,
                         ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x,
    effects = effects,
    source = source,
    verbose = verbose,
    ...
  )

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  effects <- match.arg(effects, choices = c("all", "fixed", "random"))
  mf <- tryCatch(
    {
      dat <- .recover_data_from_environment(x, verbose = verbose)
      vars <- switch(effects,
        all = find_variables(x, flatten = TRUE, verbose = FALSE),
        fixed = find_variables(x, effects = "fixed", flatten = TRUE, verbose = FALSE),
        random = find_random(x, flatten = TRUE)
      )
      dat[, intersect(vars, colnames(dat)), drop = FALSE]
    },
    error = function(x) {
      NULL
    }
  )
  .prepare_get_data(x, stats::na.omit(mf), effects = effects, verbose = verbose)
}


#' @export
get_data.rqss <- function(x,
                          component = "all",
                          source = "environment",
                          verbose = TRUE,
                          ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x,
    component = component,
    source = source,
    verbose = verbose,
    ...
  )

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  component <- match.arg(component, choices = c("all", "conditional", "smooth_terms"))

  mf <- tryCatch(
    {
      dat <- .recover_data_from_environment(x, verbose = verbose)
      vars <- find_variables(
        x,
        effects = "all",
        component = component,
        flatten = TRUE,
        verbose = FALSE
      )
      dat[, intersect(vars, colnames(dat)), drop = FALSE]
    },
    error = function(x) {
      NULL
    }
  )
  .prepare_get_data(x, stats::na.omit(mf), verbose = verbose)
}


#' @export
get_data.gls <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  mf <- tryCatch(
    {
      dat <- .recover_data_from_environment(x, verbose = verbose)
      data_columns <- intersect(
        colnames(dat),
        find_variables(x, flatten = TRUE, verbose = FALSE)
      )
      dat[, data_columns, drop = FALSE]
    },
    error = function(x) {
      NULL
    }
  )
  .prepare_get_data(x, stats::na.omit(mf), verbose = verbose)
}

#' @export
get_data.survfit <- get_data.gls

#' @export
get_data.aareg <- get_data.gls

#' @export
get_data.complmrob <- get_data.gls

#' @export
get_data.nlrq <- get_data.gls

#' @export
get_data.robmixglm <- get_data.gls

#' @export
get_data.selection <- get_data.gls


#' @export
get_data.lqmm <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  mf <- tryCatch(x$mfArgs$data, error = function(x) NULL)
  .prepare_get_data(x, stats::na.omit(mf), verbose = verbose)
}

#' @export
get_data.nls <- get_data.gls

#' @export
get_data.gnls <- get_data.gls


# zero-inflated models -------------------------------------------------------


#' @export
get_data.hurdle <- function(x,
                            component = "all",
                            source = "environment",
                            verbose = TRUE,
                            ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x,
    component = component,
    source = source,
    verbose = verbose,
    ...
  )

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  component <- match.arg(component, choices = c("all", "conditional", "zi", "zero_inflated", "dispersion"))
  .return_zeroinf_data(x, component, verbose = verbose)
}

#' @export
get_data.zeroinfl <- get_data.hurdle

#' @export
get_data.zerotrunc <- get_data.hurdle


#' @export
get_data.zcpglm <- function(x,
                            component = "all",
                            source = "environment",
                            verbose = TRUE,
                            ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x,
    component = component,
    source = source,
    verbose = verbose,
    ...
  )

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  component <- match.arg(component, choices = c("all", "conditional", "zi", "zero_inflated"))

  mf <- stats::model.frame(x)
  mf_zero <- mf$zero
  mf_tweedie <- mf$tweedie

  # zcpglm saves variables twice, once in the model frame for zero-inflated
  # model and once for the tweedie-model. we now need to remove duplicates
  cn <- setdiff(colnames(mf$zero), colnames(mf$tweedie))

  if (length(cn)) {
    mf_zero <- mf_zero[cn]
  } else {
    mf_zero <- NULL
  }

  mf <- switch(component,
    all = do.call(cbind, compact_list(list(mf_tweedie, mf_zero))),
    conditional = mf_tweedie,
    zi = ,
    zero_inflated = mf_zero
  )
  .prepare_get_data(x, stats::na.omit(mf), verbose = verbose)
}


# mixed models -------------------------------------------------------------


#' @rdname get_data
#' @export
get_data.glmmTMB <- function(x,
                             effects = "all",
                             component = "all",
                             source = "environment",
                             verbose = TRUE,
                             ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(
    x,
    effects = effects,
    component = component,
    source = source,
    verbose = verbose,
    ...
  )

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  effects <- match.arg(effects, choices = c("all", "fixed", "random"))
  component <- match.arg(component,
    choices = c("all", "conditional", "zi", "zero_inflated", "dispersion")
  )

  model.terms <- find_variables(
    x,
    effects = "all",
    component = "all",
    flatten = FALSE,
    verbose = FALSE
  )

  mf <- tryCatch(stats::model.frame(x), error = function(x) NULL)
  mf <- .prepare_get_data(x, mf, effects, verbose = verbose)

  # add variables from other model components
  mf <- .add_zeroinf_data(x, mf, model.terms$dispersion)
  mf <- .add_zeroinf_data(x, mf, model.terms$zero_inflated)
  mf <- .add_zeroinf_data(x, mf, model.terms$zero_inflated_random)
  mf <- .add_zeroinf_data(x, mf, model.terms$zi)
  mf <- .add_zeroinf_data(x, mf, model.terms$zi_random)
  .return_combined_data(x, mf, effects, component, model.terms, verbose = verbose)
}


#' @export
get_data.merMod <- function(x,
                            effects = "all",
                            source = "environment",
                            verbose = TRUE,
                            ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x,
    effects = effects,
    source = source,
    verbose = verbose,
    ...
  )

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  effects <- match.arg(effects, choices = c("all", "fixed", "random"))

  mf <- .safe({
    switch(effects,
      fixed = stats::model.frame(x, fixed.only = TRUE),
      all = stats::model.frame(x, fixed.only = FALSE),
      random = stats::model.frame(x, fixed.only = FALSE)[, find_random(x, split_nested = TRUE, flatten = TRUE), drop = FALSE] # nolint
    )
  })
  .prepare_get_data(x, mf, effects, verbose = verbose)
}


#' @export
get_data.mmrm <- function(x,
                          effects = "all",
                          source = "environment",
                          verbose = TRUE,
                          ...) {
  effects <- match.arg(effects, choices = c("all", "fixed", "random"))
  # find variables
  fixed_vars <- find_variables(x, effects = "fixed", flatten = TRUE)
  random_vars <- find_random(x, split_nested = TRUE, flatten = TRUE)
  # data from model frame
  mf <- .prepare_get_data(x, stats::model.frame(x), effects, verbose = verbose)
  tryCatch(
    switch(effects,
      fixed = mf[intersect(colnames(mf), fixed_vars)],
      all = mf[intersect(colnames(mf), unique(c(fixed_vars, random_vars)))],
      random = mf[intersect(colnames(mf), random_vars)]
    ),
    error = function(x) NULL
  )
}

#' @export
get_data.mmrm_fit <- get_data.mmrm

#' @export
get_data.mmrm_tmb <- get_data.mmrm


#' @export
get_data.merModList <- function(x, effects = "all", ...) {
  format_warning("Can't access data for `merModList` objects.")
  return(NULL)
}


#' @export
get_data.MANOVA <- function(x,
                            effects = "all",
                            source = "environment",
                            verbose = TRUE,
                            ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, effects = effects, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  effects <- match.arg(effects, choices = c("all", "fixed", "random"))

  mf <- .safe({
    switch(effects,
      fixed = .remove_column(x$input$data, x$input$subject),
      all = x$input$data,
      random = x$input$data[, x$input$subject, drop = FALSE]
    )
  })
  .prepare_get_data(x, mf, effects, verbose = verbose)
}

#' @export
get_data.RM <- get_data.MANOVA


#' @export
get_data.cpglmm <- function(x,
                            effects = "all",
                            source = "environment",
                            verbose = TRUE,
                            ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, effects = effects, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  effects <- match.arg(effects, choices = c("all", "fixed", "random"))
  dat <- stats::model.frame(x)

  mf <- tryCatch(
    switch(effects,
      fixed = dat[, find_predictors(x, effects = "fixed", flatten = TRUE, verbose = FALSE), drop = FALSE],
      all = dat,
      random = dat[, find_random(x, split_nested = TRUE, flatten = TRUE), drop = FALSE]
    ),
    error = function(x) NULL
  )
  .prepare_get_data(x, mf, effects, verbose = verbose)
}

#' @export
get_data.HLfit <- get_data.cpglmm


#' @export
get_data.glmm <- function(x,
                          effects = "all",
                          source = "environment",
                          verbose = TRUE,
                          ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x,
    effects = effects,
    source = source,
    verbose = verbose,
    ...
  )

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  effects <- match.arg(effects, choices = c("all", "fixed", "random"))
  dat <- get_data.default(x, verbose = verbose)

  mf <- .safe({
    switch(effects,
      fixed = dat[, find_predictors(
        x,
        effects = "fixed",
        flatten = TRUE,
        verbose = FALSE
      ), drop = FALSE],
      all = dat,
      random = dat[, find_random(x, split_nested = TRUE, flatten = TRUE), drop = FALSE]
    )
  })
  .prepare_get_data(x, mf, effects, verbose = verbose)
}


#' @export
get_data.mixor <- function(x,
                           effects = "all",
                           source = "environment",
                           verbose = TRUE,
                           ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x,
    effects = effects,
    source = source,
    verbose = verbose,
    ...
  )

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  effects <- match.arg(effects, choices = c("all", "fixed", "random"))

  mf <- tryCatch(
    switch(effects,
      fixed = stats::model.frame(x),
      all = cbind(stats::model.frame(x), x$id),
      random = data.frame(x$id)
    ),
    error = function(x) NULL
  )
  fix_cn <- which(colnames(mf) %in% c("x.id", "x$id"))
  colnames(mf)[fix_cn] <- safe_deparse(x$call$id)

  .prepare_get_data(x, mf, effects, verbose = verbose)
}


#' @export
get_data.glmmadmb <- function(x,
                              effects = "all",
                              source = "environment",
                              verbose = TRUE,
                              ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x,
    effects = effects,
    source = source,
    verbose = verbose,
    ...
  )

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  effects <- match.arg(effects, choices = c("all", "fixed", "random"))

  fixed_data <- x$frame
  random_data <- .recover_data_from_environment(x, verbose = verbose)[, find_random(x, split_nested = TRUE, flatten = TRUE), drop = FALSE]

  mf <- .safe({
    switch(effects,
      fixed = fixed_data,
      all = cbind(fixed_data, random_data),
      random = random_data
    )
  })
  .prepare_get_data(x, mf, effects, verbose = verbose)
}


#' @export
get_data.rlmerMod <- function(x, effects = "all", source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, effects = effects, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  effects <- match.arg(effects, choices = c("all", "fixed", "random"))
  .get_data_from_modelframe(x, stats::model.frame(x), effects)
}

#' @export
get_data.clmm <- get_data.rlmerMod


#' @export
get_data.mixed <- function(x,
                           effects = "all",
                           source = "environment",
                           verbose = TRUE,
                           ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x,
    effects = effects,
    source = source,
    verbose = verbose
  )

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  effects <- match.arg(effects, choices = c("all", "fixed", "random"))
  .get_data_from_modelframe(x, x$data, effects)
}


#' @export
#' @rdname get_data
#' @param shape Return long or wide data? Only applicable in repeated measures
#'   designs.
get_data.afex_aov <- function(x, shape = c("long", "wide"), ...) {
  if (length(attr(x, "within"))) {
    shape <- match.arg(shape)
  } else {
    shape <- "long"
  }
  x$data[[shape]]
}


#' @export
get_data.sem <- function(x,
                         effects = "all",
                         source = "environment",
                         verbose = TRUE,
                         ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x,
    effects = effects,
    source = source,
    verbose = verbose,
    ...
  )

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  effects <- match.arg(effects, choices = c("all", "fixed", "random"))
  mf <- tryCatch(
    {
      dat <- .recover_data_from_environment(x, verbose = verbose)
      vars <- switch(effects,
        all = find_variables(x, flatten = TRUE, verbose = FALSE),
        fixed = find_variables(x, effects = "fixed", flatten = TRUE, verbose = FALSE),
        random = find_random(x, flatten = TRUE)
      )
      dat[, intersect(vars, colnames(dat)), drop = FALSE]
    },
    error = function(x) {
      NULL
    }
  )
  .prepare_get_data(x, stats::na.omit(mf), effects, verbose = verbose)
}


#' @export
get_data.lme <- function(x, effects = "all", source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, effects = effects, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  effects <- match.arg(effects, choices = c("all", "fixed", "random"))
  dat <- .safe(x$data)

  stats::na.omit(.get_data_from_modelframe(x, dat, effects))
}


#' @export
get_data.MixMod <- function(x,
                            effects = "all",
                            component = "all",
                            source = "environment",
                            verbose = TRUE,
                            ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(
    x,
    effects = effects,
    component = component,
    source = source,
    verbose = verbose,
    ...
  )

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  effects <- validate_argument(effects, c("all", "fixed", "random"))
  component <- validate_argument(
    component,
    c("all", "conditional", "zi", "zero_inflated", "dispersion")
  )

  tryCatch(
    {
      fitfram <- stats::model.frame(x, type = "fixed")
      fitfram_re <- stats::model.frame(x, type = "random")
      fitfram_zi <- stats::model.frame(x, type = "zi_fixed")
      fitfram_zi_re <- stats::model.frame(x, type = "zi_random")

      if (!is_empty_object(fitfram_re)) {
        for (i in seq_along(fitfram_re)) {
          fitfram <- .merge_dataframes(fitfram_re[[i]], fitfram, replace = TRUE)
        }
      }
      if (!is_empty_object(fitfram_zi)) {
        fitfram <- .merge_dataframes(fitfram_zi, fitfram, replace = TRUE)
      }
      if (!is_empty_object(fitfram_zi_re)) {
        for (i in seq_along(fitfram_zi_re)) {
          fitfram <- .merge_dataframes(fitfram_zi_re[[i]], fitfram, replace = TRUE)
        }
      }

      fitfram$grp__id <- unlist(x$id)
      colnames(fitfram)[ncol(fitfram)] <- x$id_name[1]

      # test...
      fitfram <- .prepare_get_data(x, fitfram, effects, verbose = verbose)

      model.terms <- find_variables(
        x,
        effects = "all",
        component = "all",
        flatten = FALSE,
        verbose = FALSE
      )

      .return_combined_data(x, mf = fitfram, effects, component, model.terms, verbose = verbose)
    },
    error = function(x) {
      NULL
    }
  )
}


#' @export
get_data.BBmm <- function(x, effects = "all", source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, effects = effects, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  effects <- match.arg(effects, choices = c("all", "fixed", "random"))
  mf <- tryCatch(
    {
      dat <- .recover_data_from_environment(x, verbose = verbose)[, find_variables(x, flatten = TRUE), drop = FALSE]
      switch(effects,
        all = dat[, find_variables(x, flatten = TRUE), drop = FALSE],
        fixed = dat[, find_variables(x, effects = "fixed", flatten = TRUE), drop = FALSE],
        random = dat[, find_random(x, flatten = TRUE), drop = FALSE]
      )
    },
    error = function(x) {
      x$X
    }
  )

  .prepare_get_data(x, stats::na.omit(mf), effects, verbose = verbose)
}


#' @export
get_data.glimML <- function(x, effects = "all", source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, effects = effects, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  effects <- match.arg(effects, choices = c("all", "fixed", "random"))
  dat <- x@data
  mf <- switch(effects,
    all = dat[, find_variables(x, flatten = TRUE), drop = FALSE],
    fixed = dat[, find_variables(x, effects = "fixed", flatten = TRUE), drop = FALSE],
    random = dat[, find_random(x, flatten = TRUE), drop = FALSE]
  )

  .prepare_get_data(x, stats::na.omit(mf), effects, verbose = verbose)
}


# sem models -------------------------------------

#' @export
get_data.lavaan <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  if (identical(source, "environment")) {
    model_data <- .safe(.recover_data_from_environment(x, verbose = verbose), NULL)

    if (!is.null(model_data)) {
      return(model_data)
    }
  }

  # fall back to extract data from model frame
  check_if_installed("lavaan")
  as.data.frame(lavaan::lavInspect(x, what = "data"))
}

#' @export
get_data.blavaan <- get_data.lavaan


# additive models (gam) -------------------------------------

#' @export
get_data.gam <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  mf <- tryCatch(stats::model.frame(x), error = function(x) NULL)
  .prepare_get_data(x, mf, verbose = verbose)
}


#' @export
get_data.list <- function(x, source = "environment", verbose = TRUE, ...) {
  model_data <- NULL
  if (any(c("gam", "gamm") %in% names(x))) {
    x <- x$gam
    class(x) <- c(class(x), c("glm", "lm"))
    mf <- tryCatch(stats::model.frame(x), error = function(x) NULL)
    model_data <- .prepare_get_data(x, mf, verbose = verbose)
  } else {
    get_data.default(x, source = source, verbose = verbose, ...)
  }
  model_data
}


#' @export
get_data.vgam <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  mf <- tryCatch(
    get(x@misc$dataname, envir = parent.frame())[, find_variables(x, flatten = TRUE), drop = FALSE],
    error = function(x) NULL
  )

  .prepare_get_data(x, mf, verbose = verbose)
}


#' @export
get_data.gamm <- function(x, verbose = TRUE, ...) {
  x <- x$gam
  class(x) <- c(class(x), c("glm", "lm"))

  mf <- tryCatch(stats::model.frame(x), error = function(x) NULL)
  .prepare_get_data(x, mf, verbose = verbose)
}


#' @export
get_data.gamlss <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  mf <- tryCatch(
    {
      elements <- c("mu", "sigma", "nu", "tau")
      mf_list <- compact_list(lapply(elements, function(e) {
        if (paste0(e, ".x") %in% names(x)) {
          stats::model.frame(x, what = e)
        } else {
          NULL
        }
      }))

      mf_data <- mf_list[[1]]

      if (length(mf_list) > 1L) {
        for (i in 2:length(mf_list)) {
          cn <- setdiff(colnames(mf_list[[i]]), colnames(mf_data))
          if (length(cn)) mf_data <- cbind(mf_data, mf_list[[i]][, cn, drop = FALSE])
        }
      }

      mf_data
    },
    error = function(x) {
      NULL
    }
  )
  .prepare_get_data(x, mf, effects = "all", verbose = verbose)
}


# fixed effects and panel regression --------------------------------------


#' @export
get_data.felm <- function(x, effects = "all", source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, effects = effects, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  # original data does not appear to be stored in the model object
  effects <- match.arg(effects, choices = c("all", "fixed", "random"))
  .get_data_from_modelframe(x, stats::model.frame(x), effects, verbose = verbose)
}


#' @export
get_data.feis <- function(x, effects = "all", source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, effects = effects, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  # original data does not appear to be stored in the model object
  effects <- match.arg(effects, choices = c("all", "fixed", "random"))
  mf <- tryCatch(.recover_data_from_environment(x, verbose = verbose),
    error = function(x) stats::model.frame(x)
  )
  .get_data_from_modelframe(x, mf, effects, verbose = verbose)
}


#' @export
get_data.fixest <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  # original data does not appear to be stored in the model object
  # see https://github.com/lrberge/fixest/issues/340 and #629
  model_call <- get_call(x)
  mf <- eval(model_call$data, envir = parent.env(x$call_env))
  # mf <- .recover_data_from_environment(x, verbose = verbose)
  .get_data_from_modelframe(x, mf, effects = "all", verbose = verbose)
}


#' @export
get_data.feglm <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  mf <- as.data.frame(x$data)
  .get_data_from_modelframe(x, mf, effects = "all", verbose = verbose)
}


#' @export
get_data.pgmm <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  model_terms <- find_variables(x, effects = "all", component = "all", flatten = TRUE)
  mf <- tryCatch(.recover_data_from_environment(x, verbose = verbose)[, model_terms, drop = FALSE],
    error = function(x) NULL
  )
  .prepare_get_data(x, mf, verbose = verbose)
}


#' @export
get_data.plm <- function(x, source = "environment", verbose = TRUE, ...) {
  # extract index variables
  index <- eval(get_call(x)$index)
  # try to recover data from environment
  # avoid feeding the same argument twice
  if ("additional_variables" %in% names(list(...))) {
    model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)
  } else {
    model_data <- .get_data_from_environment(x, source = source, additional_variables = index, verbose = verbose, ...)
  }

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  mf <- stats::model.frame(x)
  model_terms <- find_variables(x, effects = "all", component = "all", flatten = TRUE)
  cn <- colnames(mf)
  mf <- as.data.frame(lapply(mf, function(i) {
    if (is.factor(i)) {
      as.factor(i)
    } else if (is.character(i)) {
      as.character(i)
    } else if (is.integer(i)) {
      as.integer(i)
    } else if (is.numeric(i)) {
      as.numeric(i)
    } else if (is.logical(i)) {
      as.logical(i)
    } else {
      as.vector(i)
    }
  }))
  colnames(mf) <- clean_names(cn)

  # find index variables
  index <- eval(parse(text = safe_deparse(x$call))[[1]]$index)

  # try to get index variables from orignal data
  if (!is.null(index)) {
    original_data <- .recover_data_from_environment(x, verbose = verbose)
    keep <- intersect(index, colnames(original_data))
    if (length(keep)) {
      mf <- cbind(mf, original_data[, keep, drop = FALSE])
      model_terms <- c(model_terms, keep)
    }
  }

  .prepare_get_data(x, mf[, model_terms, drop = FALSE], verbose = verbose)
}


#' @export
get_data.wbm <- function(x, effects = "all", verbose = TRUE, ...) {
  effects <- match.arg(effects, choices = c("all", "fixed", "random"))
  mf <- stats::model.frame(x)

  # dat <- as.data.frame(x@orig_data)

  if (effects == "random") {
    return(stats::na.omit(mf[, unique(find_random(x, split_nested = TRUE, flatten = TRUE)), drop = FALSE]))
  }

  resp.col <- which(colnames(mf) == find_response(x))
  mf <- mf[, c(resp.col, (seq_len(ncol(mf)))[-resp.col])]

  .prepare_get_data(x, stats::na.omit(mf), effects, verbose = verbose)
}


#' @export
get_data.wbgee <- get_data.wbm


#' @export
get_data.ivreg <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  mf <- .safe(stats::model.frame(x))
  ft <- find_variables(x, flatten = TRUE)

  if (is_empty_object(mf)) {
    final_mf <- .safe({
      dat <- .recover_data_from_environment(x, verbose = verbose)
      dat[, ft, drop = FALSE]
    })
  } else {
    cn <- clean_names(colnames(mf))
    remain <- setdiff(ft, cn)
    if (is_empty_object(remain)) {
      final_mf <- mf
    } else {
      final_mf <- .safe({
        dat <- .recover_data_from_environment(x, verbose = verbose)
        cbind(mf, dat[, remain, drop = FALSE])
      })
    }
  }

  .prepare_get_data(x, stats::na.omit(final_mf), verbose = verbose)
}


#' @export
get_data.iv_robust <- get_data.ivreg


#' @export
get_data.ivprobit <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  .prepare_get_data(x, stats::na.omit(as.data.frame(x$mr1)), verbose = verbose)
}


#' @export
get_data.bife <- function(x, effects = "all", source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, effects = effects, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  effects <- match.arg(effects, choices = c("all", "fixed", "random"))
  mf <- as.data.frame(x$data)

  if (effects == "random") {
    return(stats::na.omit(mf[, unique(find_random(x, split_nested = TRUE, flatten = TRUE)), drop = FALSE]))
  } else if (effects == "fixed") {
    mf <- mf[, setdiff(colnames(mf), unique(find_random(x, split_nested = TRUE, flatten = TRUE))), drop = FALSE]
  }

  .prepare_get_data(x, stats::na.omit(mf), effects, verbose = verbose)
}


# Bayesian regression ---------------------------------------------------


#' @export
get_data.brmsfit <- function(x, effects = "all", component = "all", source = "environment", verbose = FALSE, ...) {
  # try to recover data from environment
  # verbose is FALSE by default because `get_call()` often does not work on
  # `brmsfit` objects, so we typically default to the `data` held in the object.
  data_name <- attr(x$data, "data_name")
  model_data <- .get_data_from_environment(
    x,
    effects = effects,
    component = component,
    source = source,
    verbose = verbose,
    data_name = data_name,
    ...
  )

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  effects <- match.arg(effects, choices = c("all", "fixed", "random"))
  component <- match.arg(component, choices = c("all", .all_elements()))

  model.terms <- find_variables(x, effects = "all", component = "all", flatten = FALSE)
  mf <- stats::model.frame(x)

  if (.is_multi_membership(x)) {
    model.terms <- lapply(model.terms, .clean_brms_mm)
    rs <- setdiff(
      unlist(find_random_slopes(x), use.names = FALSE),
      unlist(model.terms, use.names = FALSE)
    )
    if (!is_empty_object(rs)) model.terms$random <- c(rs, model.terms$random)
  }

  .return_combined_data(
    x,
    .prepare_get_data(x, mf, effects = effects, verbose = verbose),
    effects,
    component,
    model.terms,
    is_mv = is_multivariate(x),
    verbose = verbose
  )
}


#' @export
get_data.stanreg <- function(x, effects = "all", source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, effects = effects, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  effects <- match.arg(effects, choices = c("all", "fixed", "random"))

  model.terms <- find_variables(x,
    effects = "all",
    component = "all",
    flatten = FALSE
  )

  mf <- stats::model.frame(x)

  .return_combined_data(
    x,
    .prepare_get_data(x, mf, effects = effects, verbose = verbose),
    effects,
    component = "all",
    model.terms,
    is_mv = is_multivariate(x),
    verbose = verbose
  )
}


#' @export
get_data.BFBayesFactor <- function(x, ...) {
  x@data
}


#' @export
get_data.MCMCglmm <- function(x, effects = "all", source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, effects = effects, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  effects <- match.arg(effects, choices = c("all", "fixed", "random"))
  mf <- tryCatch(
    {
      env_dataframes <- names(which(unlist(eapply(.GlobalEnv, is.data.frame))))
      pv <- find_predictors(x, effects = effects, component = "all", flatten = TRUE)
      matchframe <- unlist(lapply(env_dataframes, function(.x) {
        dat <- get(.x)
        all(pv %in% colnames(dat))
      }))
      mf <- env_dataframes[matchframe][1]
      if (is.na(mf)) {
        NULL
      } else {
        dat <- get(mf)
        switch(effects,
          fixed = dat[, setdiff(colnames(dat), find_random(x, flatten = TRUE)), drop = FALSE],
          all = dat,
          random = dat[, find_random(x, flatten = TRUE), drop = FALSE]
        )
      }
    },
    error = function(x) {
      NULL
    }
  )

  .prepare_get_data(x, mf, effects = effects, verbose = verbose)
}


#' @export
get_data.stanmvreg <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  mf <- tryCatch(
    {
      out <- data.frame()
      for (i in stats::model.frame(x)) {
        out <- .merge_dataframes(out, i)
      }

      out
    },
    error = function(x) {
      NULL
    }
  )

  .prepare_get_data(x, mf, verbose = verbose)
}


# mfx models ------------------------------------------------------

#' @export
get_data.betamfx <- function(x, ...) {
  get_data(x$fit, ...)
}

#' @export
get_data.betaor <- get_data.betamfx

#' @export
get_data.logitor <- get_data.betamfx

#' @export
get_data.poissonirr <- get_data.betamfx

#' @export
get_data.negbinirr <- get_data.betamfx

#' @export
get_data.logitmfx <- get_data.betamfx

#' @export
get_data.poissonmfx <- get_data.betamfx

#' @export
get_data.probitmfx <- get_data.betamfx

#' @export
get_data.negbinmfx <- get_data.betamfx


# other models ------------------------------------------------------


#' @export
get_data.svy_vglm <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  mf <- x$design$variables[find_variables(x, flatten = TRUE)]
  .prepare_get_data(x, mf, verbose = verbose)
}


#' @export
get_data.mediate <- function(x, source = "environment", verbose = TRUE, ...) {
  d1 <- get_data(x$model.m, source = source, verbose = verbose)
  d2 <- get_data(x$model.y, source = source, verbose = verbose)
  merge(d1, d2, sort = FALSE, all = TRUE)
}


#' @export
get_data.mle2 <- function(x, ...) {
  as.data.frame(do.call(cbind, x@data))
}

#' @export
get_data.mle <- get_data.mle2


#' @export
get_data.nestedLogit <- function(x, ...) {
  d <- x$data
  if (!is.null(x$subset)) {
    d <- subset(d, eval(parse(text = x$subset), envir = d))
  }
  d
}


#' @export
get_data.glht <- function(x, source = "environment", verbose = TRUE, ...) {
  get_data(x$model, source = source, verbose = verbose, ...)
}


#' @export
get_data.averaging <- function(x, ...) {
  ml <- attributes(x)$modelList
  if (is.null(ml)) {
    format_warning("Can't retrieve data. Please use `fit = TRUE` in `model.avg()`.")
    return(NULL)
  }

  mf <- tryCatch(
    Reduce(function(x, y) merge(x, y, all = TRUE, sort = FALSE), lapply(ml, stats::model.frame)),
    error = function(x) NULL
  )

  if (is.null(mf)) {
    mf <- tryCatch(
      .recover_data_from_environment(x)[, find_variables(x, flatten = TRUE), drop = FALSE],
      error = function(x) NULL
    )
  }

  .prepare_get_data(x, mf)
}


#' @export
get_data.Arima <- function(x, ...) {
  # first try, parent frame
  dat <- tryCatch(eval(x$call$x, envir = parent.frame()), error = function(e) NULL)

  if (is.null(dat)) {
    # second try, global env
    dat <- tryCatch(eval(x$call$x, envir = globalenv()), error = function(e) NULL)
  }

  dat
}


#' @export
get_data.coxph <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  # first try, parent frame
  dat <- tryCatch(
    {
      mf <- .recover_data_from_environment(x, verbose = verbose)
      mf <- .prepare_get_data(x, stats::na.omit(mf), verbose = FALSE)
    },
    error = function(x) NULL
  )

  # second try, default extractor. Less good because of coercion to other types
  if (is.null(dat)) {
    # second try, global env
    dat <- get_data.default(x, source = source, verbose = verbose, ...)
  }

  dat
}

#' @export
get_data.coxme <- get_data.coxph


#' @export
get_data.BGGM <- function(x, ...) {
  x$Y
}


#' @export
get_data.mcmc.list <- function(x, ...) {
  NULL
}


#' @export
get_data.DirichletRegModel <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  mf <- x$data
  resp <- vapply(x$data, inherits, TRUE, "DirichletRegData")
  .prepare_get_data(x, mf[!resp], verbose = verbose)
}


#' @export
get_data.vglm <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  mf <- tryCatch(
    if (length(x@model)) {
      x@model
    } else {
      env <- environment(x@terms$terms)
      if (is.null(env)) env <- parent.frame()
      fcall <- x@call
      fcall$method <- "model.frame"
      fcall$smart <- FALSE
      eval(fcall, env, parent.frame())
    },
    error = function(x) NULL
  )

  .prepare_get_data(x, mf)
}


#' @export
get_data.biglm <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  mf <- stats::model.frame(x)
  .prepare_get_data(x, mf)
}

#' @export
get_data.bigglm <- get_data.biglm


#' @export
get_data.LORgee <- function(x, source = "environment", effects = "all", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, effects = effects, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  effects <- match.arg(effects, choices = c("all", "fixed", "random"))
  mf <- tryCatch(
    {
      dat <- .recover_data_from_environment(x, verbose = verbose)[, find_variables(x, flatten = TRUE), drop = FALSE]
      switch(effects,
        all = dat[, find_variables(x, flatten = TRUE), drop = FALSE],
        fixed = dat[, find_variables(x, effects = "fixed", flatten = TRUE), drop = FALSE],
        random = dat[, find_random(x, flatten = TRUE), drop = FALSE]
      )
    },
    error = function(x) {
      stats::model.frame(x)
    }
  )

  .prepare_get_data(x, stats::na.omit(mf), effects = effects)
}


#' @export
get_data.gmnl <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  mf <- tryCatch(x$mf, error = function(x) NULL)
  .prepare_get_data(x, mf)
}


#' @export
get_data.gbm <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  mf <- tryCatch(
    get(safe_deparse(x$call$data), envir = parent.frame())[, find_variables(x, flatten = TRUE), drop = FALSE],
    error = function(x) stats::model.frame(x)
  )

  .get_data_from_modelframe(x, mf, effects = "all")
}


#' @export
get_data.tobit <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  dat <- .recover_data_from_environment(x, verbose = verbose)
  ft <- find_variables(x, flatten = TRUE, verbose = FALSE)
  remain <- intersect(ft, colnames(dat))

  .prepare_get_data(x, stats::na.omit(dat[, remain, drop = FALSE]), verbose = verbose)
}


#' @export
get_data.clmm2 <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  mf <- tryCatch(
    {
      data_complete <- x$location
      data_scale <- x$scale

      if (!is.null(data_scale)) {
        remain <- setdiff(colnames(data_scale), colnames(data_complete))
        if (length(remain)) data_complete <- cbind(data_complete, data_scale[, remain, drop = FALSE])
      }

      data_complete <- cbind(data_complete, x$grFac)
      colnames(data_complete)[ncol(data_complete)] <- unlist(.find_random_effects(x, f = find_formula(x, verbose = FALSE), split_nested = TRUE)) # nolint

      data_complete
    },
    error = function(x) {
      NULL
    }
  )

  .prepare_get_data(x, mf)
}


#' @export
get_data.clm2 <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  mf <- tryCatch(
    {
      data_complete <- x$location
      data_scale <- x$scale

      if (!is.null(data_scale)) {
        remain <- setdiff(colnames(data_scale), colnames(data_complete))
        if (length(remain)) data_complete <- cbind(data_complete, data_scale[, remain, drop = FALSE])
      }

      data_complete
    },
    error = function(x) {
      NULL
    }
  )

  .prepare_get_data(x, mf)
}


#' @export
get_data.bracl <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  mf <- stats::model.frame(x)
  suppressWarnings(.prepare_get_data(x, mf, verbose = verbose))
}


#' @export
get_data.mlogit <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  mf <- tryCatch(as.data.frame(stats::model.frame(x)), error = function(x) NULL)
  .prepare_get_data(x, mf, verbose = verbose)
}


#' @export
get_data.phylolm <- function(x, source = "environment", verbose = TRUE, ...) {
  # DO NOT TOUCH THE SOURCE ARGUMENT!
  # phylo models have no model.frame() method, so we can only recover from
  # environment. We still need the "source" argument, even if it's not used here,
  #  to avoid the "multiple argument match" error for those instances, where
  # `get_data()` is called # with `source = "frame"`.
  .get_data_from_environment(x, source = "environment", verbose = verbose, ...)
}

#' @export
get_data.phyloglm <- get_data.phylolm


#' @export
#' @rdname get_data
#' @param include_interval For meta-analysis models, should normal-approximation
#'   confidence intervals be added for each response effect size?
#' @param transf For meta-analysis models, if intervals are included, a function
#'   applied to each response effect size and its interval.
#' @param transf_args For meta-analysis models, an optional list of arguments
#'   passed to the `transf` function.
#' @param ci For meta-analysis models, the Confidence Interval (CI) level if
#'   `include_interval = TRUE`. Default to 0.95 (95%).
get_data.rma <- function(x,
                         source = "environment",
                         verbose = TRUE,
                         include_interval = FALSE,
                         transf = NULL,
                         transf_args = NULL,
                         ci = 0.95,
                         ...) {
  # standard errors and moderators are not found by find_predictors(),
  # so we need them as additional variables
  model_call <- get_call(x)
  additional_variables <- c(
    safe_deparse(model_call$vi),
    safe_deparse(model_call$sei),
    safe_deparse(model_call$mods)
  )
  # try to recover data from environment
  model_data <- .get_data_from_environment(
    x,
    source = source,
    additional_variables = additional_variables,
    verbose = verbose,
    ...
  )

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  mf <- tryCatch(.recover_data_from_environment(x, verbose = verbose), error = function(x) NULL)
  mf_attr <- attributes(mf)
  mf <- merge(mf, data.frame(Weights = get_weights(x)), by = "row.names", all = TRUE, sort = FALSE)
  rownames(mf) <- mf$Row.names
  mf$Row.names <- NULL
  mostattributes(mf) <- c(
    attributes(mf)[c("names", "row.names")],
    mf_attr[c("yi.names", "vi.names", "digits", "class")]
  )
  if (isTRUE(include_interval)) {
    model_response <- .safe(mf[[find_response(x)]])
    sei <- .safe(mf[[model_call$sei]])
    if (is.null(sei)) {
      sei <- .safe(sqrt(mf[[model_call$vi]]))
    }
    if (is.null(sei)) {
      format_error("Could not find `sei` or `vi` for this model.")
    }
    mf$ci <- ci
    mf$CI_low <- model_response - stats::qnorm((1 - ci) / 2, lower.tail = FALSE) * sei
    mf$CI_high <- model_response + stats::qnorm((1 - ci) / 2, lower.tail = FALSE) * sei
    if (!is.null(transf)) {
      if (!is.function(transf)) {
        format_error("`transf` must be a function.")
      }
      if (is.null(transf_args)) {
        mf[[find_response(x)]] <- sapply(mf[[find_response(x)]], transf)
        mf$CI_low <- sapply(mf$CI_low, transf)
        mf$CI_high <- sapply(mf$CI_high, transf)
      } else {
        mf[[find_response(x)]] <- sapply(mf[[find_response(x)]], transf, transf_args)
        mf$CI_low <- sapply(mf$CI_low, transf, transf_args)
        mf$CI_high <- sapply(mf$CI_high, transf, transf_args)
      }
    }
  }
  original_rownames <- rownames(x$X)
  if (is.null(original_rownames)) {
    original_rownames <- seq_len(nrow(mf))
  }
  .prepare_get_data(x, mf[original_rownames, , drop = FALSE], verbose = verbose)
}


#' @export
get_data.metaplus <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  mf <- .safe(.recover_data_from_environment(x, verbose = verbose))
  .prepare_get_data(x, stats::na.omit(mf), verbose = verbose)
}

#' @export
get_data.ivFixed <- get_data.metaplus


#' @export
get_data.meta_random <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  mf <- .safe(x$data$data)
  .prepare_get_data(x, stats::na.omit(mf), verbose = verbose)
}

#' @export
get_data.meta_fixed <- get_data.meta_random


#' @export
get_data.meta_bma <- function(x, source = "environment", verbose = TRUE, ...) {
  # try to recover data from environment
  model_data <- .get_data_from_environment(x, source = source, verbose = verbose, ...)

  if (!is.null(model_data)) {
    return(model_data)
  }

  # fall back to extract data from model frame
  mf <- tryCatch(x$meta$fixed$data$data, error = function(x) NULL)
  .prepare_get_data(x, stats::na.omit(mf), verbose = verbose)
}


#' @export
get_data.bfsl <- function(x, ...) {
  as.data.frame(x$data[c("x", "y", "sd_x", "sd_y")])
}


#' @export
get_data.mipo <- function(x, ...) {
  .safe({
    models <- eval(x$call$object)
    get_data(models$analyses[[1]], ...)
  })
}


#' @export
get_data.htest <- function(x, ...) {
  out <- NULL
  if (!is.null(x$data.name)) {
    out <- .retrieve_htest_data(x)
  }
  out
}


# helper -------------

.check_data_source_arg <- function(source) {
  source <- match.arg(source, choices = c("environment", "mf", "modelframe", "frame"))
  switch(source,
    environment = "environment",
    "frame"
  )
}
easystats/insight documentation built on April 13, 2025, 8:38 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
easystats/insight
Easy Access to Model Information for Various Model Objects

R/get_data.R
In easystats/insight: Easy Access to Model Information for Various Model Objects

Documented in get_data get_data.afex_aov get_data.default get_data.glmmTMB get_data.rma

R Package Documentation

Browse R Packages

We want your feedback!

easystats/insight Easy Access to Model Information for Various Model Objects

R/get_data.R In easystats/insight: Easy Access to Model Information for Various Model Objects

Documented in get_data get_data.afex_aov get_data.default get_data.glmmTMB get_data.rma

R Package Documentation

Browse R Packages

We want your feedback!

easystats/insight
Easy Access to Model Information for Various Model Objects

R/get_data.R
In easystats/insight: Easy Access to Model Information for Various Model Objects