#' Tidy and put the finishing touches on a model output
#'
#' Wrapper around \code{broom::tidy()} with some additional finishing touches!
#'
#' @param x A model object to be converted into a \code{tibble::tibble()}
#' @param .labels A named list of variable labels. If NULL, current labels will be used
#' @param .conf_int Logical to return a 95 percent confidence interval
#' @param .flextable Logical to return a formatted flextable object if TRUE or a list of a dataframe and row numbers to use in \code{flextable::padding()}
#' @param .header1 See \code{??mRclwhip::format_flextable} for details
#' @param .header2 See \code{??mRclwhip::format_flextable} for details
#' @param ... additional arguments passed to \code{broom::tidy()}
#' @examples
#' df1 <- tibble::tibble(
#' id = letters,
#' age = sample(seq(18, 39, 1), 26, replace = TRUE),
#' mile_time = sample(seq(6, 7.5, 0.1), 26, replace = TRUE),
#' gender = sample(c("Male", "Female"), 26, replace = TRUE),
#' country = sample(c("USA", "Canada", "Africa", "England"), 26, replace = TRUE)
#' )
#'
#' lm_res <- lm(mile_time ~ age + gender + country, data = df1)
#'
#' lm_res %>%
#' polish(
#' .labels = c(age = "Age", gender = "Gender", country = "Country of origin"),
#' .header1 = list(values = c("", "Estimate (95% CI)"))
#' )
#' @export
polish <- function(x, .labels = NULL, .conf_int = TRUE, .flextable = TRUE, .header1 = NULL, .header2 = NULL, ...) {
tryCatch({
int_df <- eval(x$call$data, envir = parent.frame())
},
error = function(e) {
usethis::ui_stop(
"The data frame {x$call$data} does not exist in the environment anymore. Please pass in labels or recreate {x$call$data}."
)
}
)
if(is.null(.labels)) {
labs <- mRclwhip::label_df(int_df) %>%
dplyr::mutate(
label = dplyr::case_when(
label == "" ~ variable,
label != "" ~ label,
TRUE ~ NA_character_
)
)
} else {
labs <- tibble::tibble(
variable = names(.labels),
label = .labels %>% unname()
)
if(nrow(labs) != length(attr(x$terms, "term.labels"))) {
usethis::ui_stop(
glue::glue("You provided {nrow(labs)} labels but your model has {length(attr(x$terms, 'term.labels'))} covariate(s). ",
"Please provide the same number of labels as covariates.")
)
}
}
## Create a data frame with the # of levels for each factor var
factor_vars <- int_df %>%
dplyr::select_if(function(x) is.factor(x) | is.character(x)) %>%
names()
numlevs <- purrr::map_dbl(factor_vars, ~length(levels(as.factor(int_df[[.x]]))))
levs_df <- tibble::tibble(
variable = factor_vars,
numlevs = numlevs
)
res <- x %>%
broom::tidy(conf.int = .conf_int, ...) %>%
dplyr::filter(stringr::str_detect(term, "(Intercept)", negate = TRUE)) %>%
dplyr::mutate(
variable = stringr::str_extract(term, mRclwhip::vec_to_regex(labs[["variable"]])),
level = stringr::str_remove(term, variable),
p.value = mRclwhip::format_p(p.value)
) %>%
dplyr::left_join(labs, "variable") %>%
dplyr::left_join(levs_df, "variable") %>%
dplyr::mutate(
label_lev = case_when(
numlevs > 2 ~ level,
!is.na(level) & level != "" ~ as.character(glue::glue("{label} ({level})")),
TRUE ~ label
),
estimate = glue::glue("{scales::comma(estimate, accuracy = 0.01)}"),
conf.low = glue::glue("{scales::comma(conf.low, accuracy = 0.01)}"),
conf.high = glue::glue("{scales::comma(conf.high, accuracy = 0.01)}"),
Estimate = glue::glue("{estimate} ({conf.low}, {conf.high})"),
numlevs = mRclwhip::unfill_vec(numlevs)
) %>%
dplyr::select(label_lev, Estimate, label, numlevs)
# find rows where the >2 level factors start to determine where to add the new rows with labels
# Keep in mind, once you add one label as a new row, you must increase the index for the next
# one by 1 (for each row added prior)
newrow_ind <- res %>% dplyr::mutate(rn = dplyr::row_number()) %>% dplyr::filter(numlevs > 2) %>% pull(rn)
rowlab <- res %>% dplyr::filter(dplyr::row_number() %in% newrow_ind) %>% dplyr::pull(label)
for(i in 1:length(newrow_ind)) {
newrow_ind[i] <- newrow_ind[i] + (i - 1)
}
addrow <- function(.before, lab) {
res <<- res %>%
dplyr::add_row(label_lev = lab, .before = .before)
}
purrr::walk2(newrow_ind, rowlab, addrow)
## get row numbers for padding the factor levels in the flextable
padding_rn <- res %>%
tidyr::fill(numlevs) %>%
dplyr::mutate(
rn = dplyr::row_number(),
numlevs = ifelse(numlevs > 2, numlevs, NA)
) %>%
dplyr::filter(!is.na(numlevs)) %>%
dplyr::pull(rn)
## return either a flextable or a list with a dataframe and row numbers
if(.flextable) {
res %>%
dplyr::select(-c(label, numlevs)) %>%
mRclwhip::format_flextable(
header1 = .header1,
header2 = .header2
) %>%
flextable::padding(i = padding_rn, j = 1, padding.left = 25)
} else {
res <- res %>% dplyr::select(-c(label, numlevs))
list(res, padding_rn)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.