gamlsspipeline:

Documented in run_gamlss_pipeline

#' @title Run GAMLSS pipeline
#'
#' @description
#' Takes arguments that specify the running of the key steps in running a GAMLSS pipeline:
#' \itemize{
#'   \item{project directory creation,}
#'   \item{data pre-processing,}
#'   \item{creating exploratory plots,}
#'   \item{fitting the model,}
#'   \item{creating validation plots, and}
#'   \item{creating results plots.}
#' }
#' The purpose of this is to make it easier to do all the steps in a reproducible way that
#' saves the results automatically.
#'
#' @param dir_proj \code{character} or \code{function}.
#' \itemize{
#'   \item{If \code{character}, then this folder is created if it doesn't exist.}
#'   \item{If a \code{function}, then it should be a function that
#'   has a single argument that expects the named list produced by \code{rlang::list2(...)}.
#'   This function should simply return. the project directory as a character.
#'   This folder is created if it doesn't exist.}
#' }
#' @param dir_proj_empty \code{logical}. If \code{TRUE}, then the project directory
#' is emptied before any results are saved. Default is \code{FALSE}.
#' TODO: Wrap each of the *_fn functions so that whatever they return is saved to the
#' correct folder.
#' @export
run_gamlss_pipeline <- function(dir_proj,
                                dir_proj_empty = FALSE,
                                data_raw,
                                preprocess_fn = NULL,
                                plot_exp_fn = NULL,
                                fit_fn = NULL,
                                get_fit_stats_fn = NULL,
                                plot_fit_fn = NULL,
                                plot_val_fn = NULL,
                                debug_live = FALSE,
                                ...){

  # ====================================
  # Preparation
  # ====================================

  # check that data is supplied
  if(missing(data_raw)) stop("data_raw must be supplied.")

  # create expected parameters
  # remember to change get_expected_params and its test when this is changed
  expected_params <- list("preprocess_fn" = c("data_raw", "params_dots"),
                          "plot_exp_fn" = c("data_raw", "data_mod", "dir_proj", "params_dots"),
                          'fit_fn' = c("data_mod", "params_dots", "dir_proj"),
                          'get_fit_stats_fn' = c("data_raw", "data_mod", "dir_proj",
                                                 "params_dots", "fit_obj"),
                          "plot_fit_fn" = c("data_raw", "data_mod", "dir_proj",
                                            "params_dots", "fit_obj", "fit_stats"),
                          "plot_val_fn" = c("data_raw", "data_mod", "dir_proj",
                                            "params_dots", "fit_obj"))

  # save current environment as a variable
  env_main <- environment()

  # collect dots
  params_dots <- rlang::list2(...)

  # get project directory, creating it if need be
  dir_proj <- .setup_proj_dir(dir_proj = dir_proj,
                              dir_proj_empty = dir_proj_empty,
                              params_dots = params_dots)

  # replace NULL functions
  .replace_null_fns(env = env_main, expected_params = expected_params)

  # check that functions have correct parameters, if not NULL
  .validate_fns(env = env_main, expected_params = NULL)

  # save dot parameters and functions
  save_objects(params_dots = params_dots,
               preprocess_fn = preprocess_fn,
               plot_exp_fn = plot_exp_fn, fit_fn = fit_fn,
               get_fit_stats_fn = get_fit_stats_fn,
               plot_fit_fn = plot_fit_fn, plot_val_fn = plot_val_fn,
               dir_proj = dir_proj, dir_sub = c("params"),
               empty = TRUE)

  # debug function if requested
  .add_debug(fn_name = names(expected_params),
             debug_live = debug_live, env = env_main)

  # pre-process data
  data_mod <- preprocess_fn(data_raw = data_raw, params_dots = params_dots)

  # ====================================
  # Analysis
  # ====================================

  # make exploratory plots
  plot_exp_fn(data_raw = data_raw, data_mod = data_mod,
              dir_proj = dir_proj,
              params_dots = params_dots)

  # fit model
  fit_obj <- fit_fn(data_mod = data_mod, params_dots = params_dots,
                    dir_proj = dir_proj)

  # make validation plots
  plot_val_fn(data_raw = data_raw, data_mod = data_mod,
              dir_proj = dir_proj, fit_obj = fit_obj,
              params_dots = params_dots)

  # tables
  fit_stats <- get_fit_stats_fn(data_raw = data_raw, data_mod = data_mod,
                                fit_obj = fit_obj, params_dots = params_dots,
                                dir_proj = dir_proj)


  # make results plots
  plot_fit_fn(data_raw = data_raw, data_mod = data_mod,
              dir_proj = dir_proj, fit_obj = fit_obj,
              params_dots = params_dots, fit_stats = fit_stats)


  message('pipeline run complete')
  invisible(TRUE)
}