R/sourcoise_refresh.R

Defines functions sourcoise_refresh

Documented in sourcoise_refresh

# refresh -----------------------

#' Refresh sourcoise cache by executing sources selected
#'
#' All scripts (passed to `sourcoise_refresh()`) are executed with logging enabled.
#'
#' The function returns the list of script executed but its main effect is a side-effect as scripts are executed and caches updates accordingly.
#' Note also that log files reflect execution and track possible errors.
#' Because of logging the execution comes with a loss in performance, which is not an issue if scripts are long to execute.
#'
#' It is possible to execute `sourcoise_refresh()` without execution forcing (`force_exec=FALSE`) or with it.
#' Forced execution means that the script is executed even if the cache is valid.
#' In the case of non forced execution, execution is triggered by other cache invalidation tests (change in source file, lapse or tacked files).
#'
#' When scripts are linked to qmds (i.e. when run in a quarto project), it is possible to unfreeeze and uncache those qmds with the option `unfreeze=TRUE`.
#' This allows to refresh the cahe and then render the qmds using the new data.
#'
#' It is possible to pass to refresh a function that will be executed before every script. This allows to load packages and declare global variables that can be used in each script.
#' If packages are loaded inside the script, then this is not needed.
#'
#' Parameters registered ins `sourcoise_status()` such as `wd` or `args` are used to execute the script.
#'
#' @param what (tibble) a tibble as generated by `sourcoise_status()`, possibly filtered, (defaut to `source_status()` )
#' @param force_exec (boolean) (default `FALSE`) if `TRUE` code is executed, no matter what is cached
#' @param unfreeze (boolean) (default `TRUE`) when possible, unfreeze and uncache .qmd files in a quarto project when data used by those .qmd has been refreshed
#' @param quiet (boolean) (default `FALSE`) no message if TRUE
#' @param init_fn (function) (default `NULL`) execute a function before sourcing to allow initialization
#' @param root (default `NULL`) force root to be set, instead of letting the function finding the root, for advanced uses
#' @param log (character) (default `"INFO"`) log levels as in `logger::log_threshold()` (c("OFF", "INFO", ...)), comes with a small performance cost
#' @param .progress (boolean) (default `TRUE`) displays a progression bar based on previous execution timings
#'
#' @family sourcoise
#'
#' @importFrom rlang .data
#' @return a list of r scripts (characters) executed, with timing and success and a side effect on caches
#' @export
#' @examplesIf rlang::is_installed("insee")
#' dir <- tempdir()
#' fs::file_copy(
#'    fs::path_package("sourcoise", "ipch", "prix_insee.R"),
#'    dir,
#'    overwrite = TRUE)
#' # Force execution (root is set explicitly here, it is normally deduced from project)
#' data <- sourcoise("prix_insee.R", root = dir, force_exec = TRUE)
#' # we then refresh all caches
#' sourcoise_refresh(root = dir)

sourcoise_refresh <- function(
    what = NULL,
    force_exec = TRUE,
    unfreeze = TRUE,
    quiet = FALSE,
    init_fn = getOption("sourcoise.init_fn"),
    root = NULL,
    log = "INFO",
    .progress = TRUE) {

  refresh_start <- Sys.time()

  if(is.null(what))
    what <- sourcoise_status(root = root, quiet = quiet)

  if(!force_exec)
    what <- what |>
      dplyr::group_by(.data$src) |>
      dplyr::filter(!any(.data$valid)) |>
      dplyr::ungroup()

  if(nrow(what)==0)
    return(list())

  # on en garde qu'un
  what <- what |>
    dplyr::group_by(.data$src) |>
    dplyr::arrange(dplyr::desc(.data$date)) |>
    dplyr::slice(1) |>
    dplyr::ungroup()

  if(nrow(what)==0)
    return(list())

  if(!is.null(init_fn)) {
    init_fn()
  }

  total_time <- ceiling(sum(what$timing, na.rm=TRUE))
  if(is.null(root))
    cwd <- getwd() |> fs::path_abs()
  else
    cwd <- root
  if(.progress)
    idpgr <- cli::cli_progress_bar("refreshing", total = total_time)

  res <- purrr::pmap(what, function(src, wd, lapse, args, root, track, qmd_file, src_in, timing, log_file, ...) {
    exec_wd <- getwd()
    if(wd=="project")
      exec_wd <- root |> fs::path_norm()
    if(wd=="file")
      exec_wd <- fs::path_join(c(root, fs::path_dir(src))) |> fs::path_norm()
    if(wd=="qmd")
      exec_wd <- fs::path_join(c(root, fs::path_dir(qmd_file[[1]]))) |> fs::path_norm()

    src_data <- sourcoise(
      path = src,
      force_exec = force_exec,
      track = track,
      args = args,
      wd = wd,
      lapse = lapse,
      metadata = TRUE,
      quiet = TRUE,
      src_in = src_in,
      root = root,
      log = log)

    if(.progress)
      cli::cli_progress_update(inc = timing, id = idpgr)

    msrc <- fs::path_join(c(root, src)) |> fs::path_rel(cwd)
    if( src_data$ok == "exec" ) {
      cli::cli_alert_success(
        "{msrc} exectued in {round(src_data$timing)} s. for {scales::label_bytes()(src_data$size)} of data" )
    } else {
      cli::cli_alert_danger(
        "{msrc} failed (see log {.file {src_data$log_file}})" )
    }

    if(unfreeze)
      purrr::walk(src_data$qmd_file, ~{
        if(src_data$ok == "exec") {
          unfreeze(.x, root, quiet = TRUE)
          uncache(.x, root, quiet = TRUE)
        }
      })
    if(!is.null(src_data$error))
      list(src = fs::path_join(c(root, src)), ok = "error", timing = NA, size = NA)
    else
      list(src = fs::path_join(c(root, src)), ok = src_data$ok, timing = src_data$timing, size = src_data$size)
  }
  )

  if(.progress)
    cli::cli_process_done(id = idpgr)

  res <- purrr::transpose(res)

  dt <- difftime(Sys.time(), refresh_start, units = "secs") |> as.numeric() |> round()
  if(!quiet)
    cli::cli_alert_info("Total refresh in {dt} seconds")

  invisible(res)
}

Try the sourcoise package in your browser

Any scripts or data that you put into this service are public.

sourcoise documentation built on April 4, 2025, 5:17 a.m.