R/find_terms.R

Defines functions .formula_to_string .get_variables_list_aovlist .get_variables_list find_terms.mipo find_terms.bfsl find_terms.afex_aov find_terms.aovlist .find_terms find_terms.default find_terms

Documented in find_terms find_terms.default

#' @title Find all model terms
#' @name find_terms
#'
#' @description Returns a list with the names of all terms, including response
#'   value and random effects, "as is". This means, on-the-fly tranformations
#'   or arithmetic expressions like `log()`, `I()`, `as.factor()` etc. are
#'   preserved.
#'
#' @param as_term_labels Logical, if `TRUE`, extracts model formula and tries to
#'   access the `"term.labels"` attribute. This should better mimic the `terms()`
#'   behaviour even for those models that do not have such a method, but may be
#'   insufficient, e.g. for mixed models.
#' @inheritParams find_formula
#' @inheritParams find_predictors
#'
#' @return A list with (depending on the model) following elements (character
#' vectors):
#'
#' - `response`, the name of the response variable
#' - `conditional`, the names of the predictor variables from the *conditional*
#'    model (as opposed to the zero-inflated part of a model)
#' - `random`, the names of the random effects (grouping factors)
#' - `zero_inflated`, the names of the predictor variables from the *zero-inflated* part of the model
#' - `zero_inflated_random`, the names of the random effects (grouping factors)
#' - `dispersion`, the name of the dispersion terms
#' - `instruments`, the names of instrumental variables
#'
#' Returns `NULL` if no terms could be found (for instance, due to
#' problems in accessing the formula).
#'
#' @note The difference to [`find_variables()`] is that `find_terms()`
#'   may return a variable multiple times in case of multiple transformations
#'   (see examples below), while `find_variables()` returns each variable
#'   name only once.
#'
#' @examplesIf require("lme4", quietly = TRUE)
#' data(sleepstudy, package = "lme4")
#' m <- suppressWarnings(lme4::lmer(
#'   log(Reaction) ~ Days + I(Days^2) + (1 + Days + exp(Days) | Subject),
#'   data = sleepstudy
#' ))
#'
#' find_terms(m)
#'
#' # sometimes, it is necessary to retrieve terms from "term.labels" attribute
#' m <- lm(mpg ~ hp * (am + cyl), data = mtcars)
#' find_terms(m, as_term_labels = TRUE)
#' @export
find_terms <- function(x, ...) {
  UseMethod("find_terms")
}

#' @rdname find_terms
#' @export
find_terms.default <- function(x, flatten = FALSE, as_term_labels = FALSE, verbose = TRUE, ...) {
  f <- find_formula(x, verbose = verbose)

  if (is.null(f)) {
    return(NULL)
  }

  # mimics original "terms()" behaviour, leads to slightly different results
  if (isTRUE(as_term_labels)) {
    return(lapply(f, function(i) attr(stats::terms(i), "term.labels")))
  }

  resp <- find_response(x, verbose = FALSE)

  if (is_multivariate(f) || isTRUE(attributes(f)$two_stage)) {
    l <- lapply(f, .get_variables_list, resp = resp)
  } else {
    l <- .get_variables_list(f, resp)
  }

  if (flatten) {
    unique(unlist(l, use.names = FALSE))
  } else {
    l
  }
}


.find_terms <- function(f, response) {
  out <- lapply(f, function(i) {
    if (is.list(i)) {
      .find_terms(i, response = NULL)
    } else {
      f_terms <- unname(attr(stats::terms(i), "term.labels"))
      sub("(.*)::(.*)", "\\2", f_terms)
    }
  })

  compact_list(c(list(response = response), out))
}



#' @export
find_terms.aovlist <- function(x, flatten = FALSE, verbose = TRUE, ...) {
  resp <- find_response(x, verbose = FALSE)
  f <- find_formula(x, verbose = verbose)[[1]]

  l <- .get_variables_list_aovlist(f, resp)
  if (flatten) {
    unique(unlist(l, use.names = FALSE))
  } else {
    l
  }
}


#' @export
find_terms.afex_aov <- function(x, flatten = FALSE, verbose = TRUE, ...) {
  resp <- find_response(x, verbose = FALSE)

  if (length(attr(x, "within")) == 0L) {
    l <- find_terms(x$lm, flatten = FALSE, verbose = TRUE, ...)
    l$response <- resp
  } else {
    f <- find_formula(x, verbose = verbose)[[1]]
    l <- .get_variables_list_aovlist(f, resp)
  }

  if (flatten) {
    unique(unlist(l, use.names = FALSE))
  } else {
    l
  }
}


#' @export
find_terms.bfsl <- function(x, flatten = FALSE, verbose = TRUE, ...) {
  resp <- find_response(x, verbose = FALSE)
  f <- find_formula(x, verbose = verbose)

  if (is.null(f)) {
    fx <- "x"
  } else {
    fx <- f[[1]][[3]]
  }
  l <- list(conditional = c(resp, fx))

  if (flatten) {
    unique(unlist(l, use.names = FALSE))
  } else {
    l
  }
}


# unsupported ------------------


#' @export
find_terms.mipo <- function(x, flatten = FALSE, ...) {
  l <- list(conditional = unique(as.vector(summary(x)$term)))
  if (flatten) {
    unique(unlist(l, use.names = FALSE))
  } else {
    l
  }
}



# helper -----------------------


.get_variables_list <- function(f, resp = NULL) {
  # exception for formula w/o response
  if (is.null(resp) || !is_empty_object(resp)) {
    f$response <- sub("(.*)::(.*)", "\\2", safe_deparse(f$conditional[[2L]]))
    f$conditional <- safe_deparse(f$conditional[[3L]])
  } else {
    f$conditional <- safe_deparse(f$conditional[[2L]])
  }

  f <- lapply(f, function(.x) {
    if (is.list(.x)) {
      .x <- vapply(.x, .formula_to_string, character(1))
    } else {
      if (!is.character(.x)) .x <- safe_deparse(.x)
    }
    .x
  })

  # protect "-1"
  f$conditional <- gsub("(-1|- 1)(?![^(]*\\))", "#1", f$conditional, perl = TRUE)

  # This regular expression matches any of the characters *, +, :, |, -, or /,
  # unless they are preceded by a ^ and followed by a closing parenthesis ).
  f <- lapply(f, function(.x) {
    pattern <- "(?<!\\^)[*+:|\\-\\/](?![^(]*\\))" # was: "[\\*\\+:\\-\\|/](?![^(]*\\))"
    f_parts <- gsub("~", "", trim_ws(unlist(
      strsplit(split = pattern, x = .x, perl = TRUE),
      use.names = FALSE
    )), fixed = TRUE)
    # if user has used namespace in formula-functions, these are returned
    # as empty elements. remove those here
    if (any(nchar(f_parts) == 0)) {
      f_parts <- f_parts[-which(nchar(f_parts) == 0)]
    }
    text_remove_backticks(unique(f_parts))
  })


  # remove "1" and "0" from variables in random effects

  if (object_has_names(f, "random")) {
    pos <- which(f$random %in% c("1", "0"))
    if (length(pos)) f$random <- f$random[-pos]
  }

  if (object_has_names(f, "zero_inflated_random")) {
    pos <- which(f$zero_inflated_random %in% c("1", "0"))
    if (length(pos)) f$zero_inflated_random <- f$zero_inflated_random[-pos]
  }

  # restore -1
  need_split <- endsWith(f$conditional, "#1")
  if (any(need_split)) {
    f$conditional <- c(
      f$conditional[!need_split],
      trim_ws(unlist(strsplit(f$conditional[need_split], " ", fixed = TRUE), use.names = FALSE))
    )
  }
  f$conditional <- gsub("#1", "-1", f$conditional, fixed = TRUE)

  # reorder, so response is first
  compact_list(f[c(length(f), 1:(length(f) - 1))])
}


.get_variables_list_aovlist <- function(f, resp = NULL) {
  i <- vapply(f[[3]], function(x) {
    x <- as.character(x)
    x[1] == "Error" && length(x) > 1
  }, TRUE)
  error <- utils::capture.output(print(f[[3]][i][[1]]))
  f[[3]][i] <- NULL
  f[[3]] <- f[[3]][[2]]
  f[[3]] <- as.name(paste0(attr(stats::terms.formula(f), "term.labels"), collapse = "+"))

  l <- .get_variables_list(f, resp)
  names(l) <- c("response", "conditional")
  l$error <- error
  l
}

.formula_to_string <- function(f) {
  if (!is.character(f)) f <- safe_deparse(f)
  f
}

Try the insight package in your browser

Any scripts or data that you put into this service are public.

insight documentation built on Nov. 26, 2023, 5:08 p.m.