R/tidiers.R

Defines functions .get_confint tidy_stan_model tidy_model

#' @importFrom stats qnorm pnorm
#' @importFrom effectsize standardize
#' @importFrom parameters model_parameters
#' @importFrom insight standardize_names
tidy_model <- function(
  model, ci.lvl, tf, type, bpe, robust, facets, show.zeroinf, p.val,
  standardize = FALSE, bootstrap = FALSE, iterations = 1000, seed = NULL, p_adjust = NULL, ...) {

  if (!is.logical(standardize) && standardize == "") standardize <- NULL
  if (is.logical(standardize) && standardize == FALSE) standardize <- NULL

  if (is.stan(model)) {
    out <- tidy_stan_model(model, ci.lvl, tf, type, bpe, show.zeroinf, facets, ...)
  } else {
    if (!is.null(standardize)) {
      if (isTRUE(standardize)) standardize <- "std"
      model <- effectsize::standardize(model, two_sd = isTRUE(standardize == "std2"))
    }
    if (!is.null(seed)) {
      set.seed(seed)
    }
    component <- ifelse(show.zeroinf & insight::model_info(model)$is_zero_inflated, "all", "conditional")

    if (is.null(p.val)) {
      if (inherits(model, c("glm", "polr"))) {
        p.val <- "profile"
      } else {
        p.val <- "wald"
      }
    }

    df_method <- switch(
      p.val,
      "wald" = "wald",
      "kr" = ,
      "kenward" = "kenward",
      "s" = ,
      "satterthwaite" = "satterthwaite",
      "profile" = "profile",
      p.val
    )

    if (!is.null(robust) && !is.null(robust$vcov.fun)) {
      if (grepl("^vcov", robust$vcov.fun)) {
        robust$vcov.fun <- sub("^vcov", "", robust$vcov.fun)
      }
      model_params <- parameters::model_parameters(model, ci = ci.lvl, component = component, bootstrap = bootstrap, iterations = iterations, robust = TRUE, vcov_estimation = robust$vcov.fun, vcov_type = robust$vcov.type, vcov_args = robust$vcov.args, df_method = df_method, p_adjust = p_adjust, effects = "fixed")
    } else {
      model_params <- parameters::model_parameters(model, ci = ci.lvl, component = component, bootstrap = bootstrap, iterations = iterations, df_method = df_method, p_adjust = p_adjust, effects = "fixed")
    }
    out <- insight::standardize_names(model_params, style = "broom")

    # warning for p-values?
    tryCatch({
      if (insight::model_info(model)$is_mixed && df_method == "kenward" && insight::find_algorithm(model)$algorithm != "REML") {
        warning("Model was not fitted by REML. Re-fitting model using REML, but p-values, df, etc. still might be unreliable.", call. = FALSE)
      }
    },
    error = function(e) { NULL }
    )

    column <- which(colnames(out) == "response")
    if (length(column)) colnames(out)[column] <- ifelse(isTRUE(facets), "facet", "response.level")

    column <- which(colnames(out) == "component")
    if (length(column)) colnames(out)[column] <- "wrap.facet"

    if (!is.null(out$effect) && "random" %in% out$effect) {
      out$group <- NULL
    }

    column <- which(colnames(out) == "group")
    if (length(column)) colnames(out)[column] <- "wrap.facet"

    # remove duplicated column names
    dupl_cols <- duplicated(colnames(out))
    if (any(dupl_cols)) {
      out <- out[!dupl_cols]
    }

    if ("component" %in% colnames(out)) {
      out$component[out$component == "zero_inflated"] <- "Zero-Inflated Model"
      out$component[out$component == "zi"] <- "Zero-Inflated Model"
      out$component[out$component == "conditional"] <- "Conditional Model"
      out$component[out$component == "count"] <- "Conditional Model"
    }

    attr(out, "pretty_names") <- attributes(model_params)$pretty_names
  }

  out
}




#' @importFrom stats mad formula
#' @importFrom bayestestR ci
#' @importFrom insight is_multivariate model_info
#' @importFrom sjmisc var_rename add_columns is_empty typical_value
#' @importFrom dplyr select filter slice inner_join n_distinct
#' @importFrom purrr map_dbl
#' @importFrom rlang .data
tidy_stan_model <- function(model, ci.lvl, tf, type, bpe, show.zeroinf, facets, ...) {

  # set defaults

  p.inner <- .5
  p.outer <- ci.lvl

  # get model information
  modfam <- insight::model_info(model)

  if (insight::is_multivariate(model))
    modfam <- modfam[[1]]

  # additional arguments for 'effects()'-function?
  add.args <- lapply(match.call(expand.dots = F)$`...`, function(x) x)

  # check whether we have "prob.inner" and "prob.outer" argument
  # and if so, use these for CI and Bayesian point estimate

  if ("prob.inner" %in% names(add.args)) p.inner <- eval(add.args[["prob.inner"]])
  if ("prob.outer" %in% names(add.args)) p.outer <- eval(add.args[["prob.outer"]])


  # get two CI-intervals

  if (type == "re")
    ty <- "random"
  else
    ty <- "fixed"

  d1 <- bayestestR::ci(model, ci = p.outer, effects = ty, component = "all")
  d2 <- bayestestR::ci(model, ci = p.inner, effects = ty, component = "all")

  if (!is.null(tf)) {
    funtrans <- match.fun(tf)

    d1$CI_low <- funtrans(d1$CI_low)
    d1$CI_high <- funtrans(d1$CI_high)
    d2$CI_low <- funtrans(d2$CI_low)
    d2$CI_high <- funtrans(d2$CI_high)
  }

  # bind columns, so we have inner and outer hdi interval

  dat <- d2 %>%
    dplyr::select(.data$CI_low, .data$CI_high) %>%
    sjmisc::var_rename(CI_low = "conf.low50", CI_high = "conf.high50") %>%
    sjmisc::add_columns(d1) %>%
    sjmisc::var_rename(CI_low = "conf.low", CI_high = "conf.high", Parameter = "term") %>%
    dplyr::select(-.data$CI, -.data$Effects, -.data$Component)

  # for brmsfit models, we need to remove some columns here to
  # match data rows later

  mod.dat <- as.data.frame(model, optional = FALSE)

  if (inherits(model, "brmsfit")) {
    re.sd <- string_starts_with("sd_", x = colnames(mod.dat))
    re.cor <- string_starts_with("cor_", x = colnames(mod.dat))
    lp <- string_starts_with("lp__", x = colnames(mod.dat))
    resp.cor <- string_starts_with("rescor__", x = colnames(mod.dat))
    priors <- string_starts_with("prior_", x = colnames(mod.dat))
    xme <- string_starts_with(pattern = "Xme_me", x = colnames(mod.dat))
    xme.sd <- string_starts_with(pattern = "sdme_me", x = colnames(mod.dat))

    brmsfit.removers <- unique(c(re.sd, re.cor, lp, resp.cor, priors, xme, xme.sd))

    if (!sjmisc::is_empty(brmsfit.removers))
      mod.dat <- dplyr::select(mod.dat, !! -brmsfit.removers)

    # also clean prepared data frame
    resp.cor <- string_starts_with("rescor__", x = dat$term)

    if (!sjmisc::is_empty(resp.cor))
      dat <- dplyr::slice(dat, !! -resp.cor)
  }

  # do transformation on posterior samples first,
  # then summarize (see https://discourse.mc-stan.org/t/monotonic-effects-in-non-gaussian-models/6353/5)

  # need to transform point estimate as well
  if (!is.null(tf)) {
    funtrans <- match.fun(tf)
    all.cols <- sjmisc::seq_col(mod.dat)
    simp.pars <- string_starts_with("simo_mo", colnames(mod.dat))
    if (!sjmisc::is_empty(simp.pars)) all.cols <- all.cols[-simp.pars]
    for (i in all.cols) mod.dat[[i]] <- funtrans(mod.dat[[i]])
  }


  # add bayesian point estimate

  est <- purrr::map_dbl(mod.dat, ~ sjmisc::typical_value(.x, fun = bpe))

  dat <- data_frame(
    term = names(est),
    estimate = est,
    p.value = 0,
    std.error = purrr::map_dbl(mod.dat, stats::mad)
  ) %>%
    dplyr::inner_join(
      dat,
      by = "term"
    )

  # sort columns, for tab_model()
  sorted_columns <- intersect(
    c("term", "estimate", "std.error", "conf.low", "conf.high", "conf.low50", "conf.high50", "p.value"),
    colnames(dat)
  )
  dat <- dat[, sorted_columns]

  # remove some of the information not needed for plotting

  if ("sigma" %in% dat$term) dat <- dplyr::filter(dat, .data$term != "sigma")
  if ("lp__" %in% dat$term) dat <- dplyr::filter(dat, .data$term != "lp__")
  if ("shape" %in% dat$term) dat <- dplyr::filter(dat, .data$term != "shape")


  # remove sd_c and cor_ row

  re <- string_starts_with("sd_", x = dat$term)
  if (!sjmisc::is_empty(re)) dat <- dplyr::slice(dat, !! -re)

  re <- string_starts_with("cor_", x = dat$term)
  if (!sjmisc::is_empty(re)) dat <- dplyr::slice(dat, !! -re)


  # check if we need to keep or remove random effects

  re <- string_starts_with("b[", x = dat$term)
  re.s <- string_starts_with("Sigma[", x = dat$term)
  re.i <- intersect(
    string_starts_with("r_", x = dat$term),
    string_ends_with(".", x = dat$term)
  )

  # and all random effect error terms
  if (!sjmisc::is_empty(re.s)) dat <- dplyr::slice(dat, !! -re.s)


  if (type == "est") {

    # remove all random effect intercepts
    if (!sjmisc::is_empty(re)) dat <- dplyr::slice(dat, !! -re)

    # remove random effects from brmsfit-models
    if (!sjmisc::is_empty(re.i)) dat <- dplyr::slice(dat, !! -re.i)

  } else if (type == "re") {

    # remove all random effect intercepts
    if (!sjmisc::is_empty(re)) dat <- dplyr::slice(dat, !! re)

    # remove random effects from brmsfit-models
    if (!sjmisc::is_empty(re.i)) dat <- dplyr::slice(dat, !! re.i)

  }


  # for plot-type random effects, make sure that the random effects
  # are plotted as facet grid, grouped by groups

  if (type == "re") {

    dat$facet <- "re"

    # find random intercepts

    ri <- grep("b\\[\\(Intercept\\) (.*)\\]", dat$term)

    if (!sjmisc::is_empty(ri)) {
      dat$facet[ri] <- "(Intercept)"
      dat$term[ri] <- gsub("b\\[\\(Intercept\\) (.*)\\]", "\\1", dat$term[ri])
    }


    # find random intercepts

    ri1 <- grep("r_(.*)\\.(.*)\\.", dat$term)
    ri2 <- which(gsub("r_(.*)\\.(.*)\\.", "\\2", dat$term) == "Intercept")

    if (!sjmisc::is_empty(ri1)) {
      ri <- intersect(ri1, ri2)
      dat$facet[ri] <- "(Intercept)"
      dat$term[ri] <- gsub("r_(.*)\\.(.*)\\.", "\\1", dat$term[ri])
    }


    # fix multiple random intercepts

    if (inherits(model, "brmsfit")) {
      pattern <- "(.*)\\.(.*)"
    } else {
      pattern <- "(.*)\\:(.*)"
    }

    interc <- which(dat$facet == "(Intercept)")

    if (!sjmisc::is_empty(interc)) {
      interc.grps <- gsub(pattern, "\\1", dat$term[interc])
      resp.lvl <- gsub(pattern, "\\2", dat$term[interc])

      if (!sjmisc::is_empty(interc.grps) && dplyr::n_distinct(interc.grps) > 1) {
        dat$facet[interc] <- sprintf("(Intercept: %s)", interc.grps)
        dat$term[interc] <- resp.lvl
      }
    }


    # find random slopes

    rs1 <- grep("b\\[(.*) (.*)\\]", dat$term)
    rs2 <- which(gsub("b\\[(.*) (.*)\\]", "\\1", dat$term) != "(Intercept)")

    if (!sjmisc::is_empty(rs1)) {
      rs <- intersect(rs1, rs2)
      rs.string <- gsub("b\\[(.*) (.*)\\]", "\\1", dat$term[rs])
      dat$facet[rs] <- rs.string
      dat$term[rs] <- gsub("b\\[(.*) (.*)\\]", "\\2", dat$term[rs])
    }


    # find random slopes

    rs1 <- grep("r_(.*)\\.(.*)\\.", dat$term)
    rs2 <- which(gsub("r_(.*)\\.(.*)\\.", "\\2", dat$term) != "Intercept")

    if (!sjmisc::is_empty(rs1)) {
      rs <- intersect(rs1, rs2)
      rs.string <- gsub("r_(.*)\\.(.*)\\.", "\\2", dat$term[rs])
      dat$facet[rs] <- rs.string
      dat$term[rs] <- gsub("r_(.*)\\.(.*)\\.", "\\1", dat$term[rs])
    }

  }


  # categorical model?

  if (inherits(model, "brmsfit") && modfam$is_categorical) {

    # terms of categorical models are prefixed with "mu"

    if (length(string_starts_with("b_mu", x = dat$term)) == nrow(dat)) {
      dat$term <- substr(dat$term, 5, max(nchar(dat$term)))
      # create "response-level" variable
      dat <- sjmisc::add_variables(dat, response.level = "", .before = 1)
      dat$response.level <- gsub("(.*)\\_(.*)", "\\1", dat$term)
      dat$term <- gsub("(.*)\\_(.*)", "\\2", dat$term)
    }
  }


  # multivariate-response model?

  if (inherits(model, "brmsfit") && insight::is_multivariate(model)) {

    # get response variables

    responses <- stats::formula(model)$responses

    # also clean prepared data frame
    resp.sigma1 <- string_starts_with("sigma_", x = dat$term)
    resp.sigma2 <- string_starts_with("b_sigma_", x = dat$term)

    resp.sigma <- c(resp.sigma1, resp.sigma2)

    if (!sjmisc::is_empty(resp.sigma))
      dat <- dplyr::slice(dat, !! -resp.sigma)


    # create "response-level" variable

    dat <- sjmisc::add_variables(dat, response.level = "", .before = 1)

    # copy name of response into new character variable
    # and remove response name from term name

    for (i in responses) {
      m <- grep(pattern = sprintf("_%s_", i), x = dat$term)
      dat$response.level[intersect(which(dat$response.level == ""), m)] <- i
      dat$term <- gsub(sprintf("b_%s_", i), "", dat$term, fixed = TRUE)
      dat$term <- gsub(sprintf("s_%s_", i), "", dat$term, fixed = TRUE)
    }


    # check whether each category should be printed in facets, or
    # in a single graph (with dodged geoms)

    if (!missing(facets) && isTRUE(facets))
      colnames(dat)[1] <- "facet"
    else
      colnames(dat)[1] <- "response.level"
  }


  # do we have a zero-inflation model?

  if (modfam$is_zero_inflated || sjmisc::str_contains(dat$term, "b_zi_", ignore.case = T)) {
    dat$wrap.facet <- "Conditional Model"

    # zero-inflated part
    zi <- string_starts_with("b_zi_", x = dat$term)

    # check if zero-inflated part should be shown or removed
    if (show.zeroinf) {
      dat$wrap.facet[zi] <- "Zero-Inflated Model"
      dat$term[zi] <- sub(pattern = "b_zi_", replacement = "b_", x = dat$term[zi], fixed = T)
    } else {
      if (!sjmisc::is_empty(zi)) dat <- dplyr::slice(dat, !! -zi)
    }
  }


  # check model for monotonic effects

  simplex.terms <- string_starts_with(pattern = "simo_mo", x = dat$term)
  if (!sjmisc::is_empty(simplex.terms)) {
    if (!obj_has_name(dat, "wrap.facet")) {
      dat$wrap.facet <- ""
      dat$wrap.facet[simplex.terms] <- "Simplex Parameters"
    } else {
      dat$wrap.facet[simplex.terms] <- sprintf(
        "%s (Simplex Parameters)",
        dat$wrap.facet[simplex.terms]
      )
    }
  }


  # remove facet column if not necessary
  if (!show.zeroinf && obj_has_name(dat, "wrap.facet"))
    dat <- dplyr::select(dat, -.data$wrap.facet)

  dat
}


.get_confint <- function(ci.lvl = .95) {
  if (!is.null(ci.lvl) && !is.na(ci.lvl))
    (1 + ci.lvl) / 2
  else
    .975
}

Try the sjPlot package in your browser

Any scripts or data that you put into this service are public.

sjPlot documentation built on July 10, 2021, 5:07 p.m.