R/RerunWorkflow.R

#' Rerun a workflow object.
#'
#' Takes a workflow object and reruns it.
#'
#' @param workflow A zoonWorkflow object from a previous zoon analysis
#' @param from Which modules should be run. If NULL (default), run from the
#'  first NULL output (i.e. where the workflow broke). Otherwise takes an
#'  integer and runs from that module.
#'
#' @return A list with the results of each module and a copy of the
#'  call used to execute the workflow.
#'
#' @export
#' @name RerunWorkflow
#' @importFrom utils sessionInfo
#' @examples \dontrun{
#' w <- workflow(UKAnophelesPlumbeus,
#'               UKAir,
#'               Background(n = 70), 
#'               LogisticRegression,
#'               PrintMap)
#'
#' RerunWorkflow(w)
#'}



RerunWorkflow <- function(workflow, from = NULL) {
  
  stopifnot(inherits(workflow, 'zoonWorkflow'))

  # If from isn't NULL, it should be an integer 1:5
  if (!is.null(from)){
   stopifnot(from %in% c(1:5) )
  }

  # Find first NULL modules and run from there.
  if (is.null(from)) {
    NullModules <- sapply(workflow[1:5], is.null)
    if (!sum(NullModules) == 0){
      from <- which.max(NullModules)
    } else {
      from <- 1
    }
  }
  
  # get the arguments from the call used to run this workflow
  callArgs <- SplitCall(workflow$call)

  # These are strings at first but we need to change them
  # to calls as is expected in the main zoon workflow function
  # this 
  occSub <- StringToCall(callArgs['occurrence'])
  covSub <- StringToCall(callArgs['covariate'])
  proSub <- StringToCall(callArgs['process'])
  modSub <- StringToCall(callArgs['model'])
  outSub <- StringToCall(callArgs['output'])

  forceReproducible <- as.logical(callArgs['forceReproducible'])

  # save the local environment as it needs to be passed to various functions.
  e <- environment() 
  
  # capture the session info to return in workflow object
  session.info <- sessionInfo()

  # Check all modules are of same list structure
  occurrence.module <- CheckModList(occSub)
  covariate.module <- CheckModList(covSub)
  process.module <- CheckModList(proSub)
  model.module <- CheckModList(modSub)
  output.module <- CheckModList(outSub)
  
  # Only one of occurrence, covariate, process and model can be a list of 
  #   multiple modules.
  isChain <- sapply(list(occurrence.module, covariate.module, 
                         process.module, model.module, output.module), 
                    function(x) identical(attr(x, 'chain'), TRUE))
  NoOfModules <- sapply(list(occurrence.module, covariate.module, 
    process.module, model.module, output.module), length)
  if(sum(NoOfModules[!isChain] > 1) > 1){
    stop('Only one module type can be a list of multiple modules.')
  }
  
  
  
  # Get the modules (functions) from github. 
  # Save name of functions as well as load functions into global namespace.
  # Will probably want to make this so it checks namespace first.
  occurrenceName <- LapplyGetModule(occurrence.module, forceReproducible) 
  covariateName <- LapplyGetModule(covariate.module, forceReproducible) 
  processName <- LapplyGetModule(process.module, forceReproducible) 
  # Check for val type lon lat covs
  modelName <- LapplyGetModule(model.module, forceReproducible) 
  # Test for predict method
  outputName <- LapplyGetModule(output.module, forceReproducible) 
  
  # Build module version list
  moduleVersions <- list(occurrence = sapply(occurrenceName,
                                             function(x) c(module = x$func,
                                                           version = x$version)),
                         covariate = sapply(covariateName,
                                            function(x) c(module = x$func,
                                                          version = x$version)),
                         process = sapply(processName,
                                          function(x) c(module = x$func,
                                                        version = x$version)),
                         model = sapply(modelName,
                                        function(x) c(module = x$func,
                                                      version = x$version)),
                         output = sapply(outputName,
                                         function(x) c(module = x$func,
                                                       version = x$version)))
  
  
  
  # Different to workflow(), We have an if statement before each module is run
  #   to check the 'from' argument. 

  # Run the modules. (these functions are in DoModuleFunctions.R)
  # But we have to check for chained modules and deal with them
  # And work out which module has been given as a list, and lapply over that.

  # Each module is in trycatch.
  # If a module breaks we want to save the progress so far and let the user 
  # know which module broke.


  # set up object to return on error
  
  # Collate output
  output <- list(occurrence.output = NULL,
                 covariate.output = NULL,
                 process.output = NULL,
                 model.output = NULL,
                 report = NULL,
                 call = workflow$call,
                 call.list = workflow$call.list,
                 session.info = session.info,
                 module.versions = moduleVersions)
  
  class(output) <- 'zoonWorkflow'
  
  # whether exiting on error, or successful completion, return this
  on.exit(return (output))
  
  # First the data collection modules
  # Actually tryCatch here only tells user which module broke, nothing to save.
  if (from <= 1) {
    tryCatch({
      occurrence.output <- lapply(occurrenceName, FUN = DoOccurrenceModule, e)
      # Then bind together if the occurrence modules were chained
      if (identical(attr(occurrence.module, 'chain'), TRUE)){
        occurrence.output <- list(do.call(rbind, occurrence.output))
        attr(occurrence.output[[1]], 'call_path') <- list(occurrence = paste('Chain(',
                                                                             paste(lapply(occurrenceName, function(x) x$module),
                                                                                   collapse = ', '),
                                                                             ')', sep = ''))
      }
      output$occurrence.output <- occurrence.output
    },  
      error = function(cond){
        ErrorModule(cond, 1, e)
      }
    )
  } else {
    occurrence.output <- workflow$occurrence.output
    output$occurrence.output <- occurrence.output
  }

  if (from <= 2) {
    tryCatch({
      covariate.output <- lapply(covariateName, FUN = DoCovariateModule, e)
      if (identical(attr(covariate.module, 'chain'), TRUE)){
        covariate.output <- list(do.call(raster::stack, covariate.output))
        attr(covariate.output[[1]], 'call_path') <- list(covariate = paste('Chain(',
                                                                           paste(lapply(covariateName, function(x) x$module),
                                                                                 collapse = ', '),
                                                                           ')', sep = ''))
      }
      output$covariate.output <- covariate.output
    },  
      error = function(cond){
        ErrorModule(cond, 2, e)
      }
    )
  } else {
    covariate.output <- workflow$covariate.output
    output$covariate.output <- covariate.output
  }


  # Simply combine data into basic df shape
  # This shape is then input and output of all process modules.
  # Also makes it easy to implement a NULL process
  
  if(length(covariateName) > 1){    
    data <- lapply(covariate.output, 
                   function(x) ExtractAndCombData(occurrence.output[[1]], x))
  } else {
    data <- lapply(occurrence.output, 
                   function(x) ExtractAndCombData(x, covariate.output[[1]]))
  }



  if (from <= 3) {
    tryCatch({  
      process.output <-  DoProcessModules(process.module, processName, data, e)
      output$process.output <- process.output
    },  
      error = function(cond){
        ErrorModule(cond, 3, e)
      }
    )
  } else {
    process.output <- workflow$process.output
    output$process.output <- process.output
  }

  
  # Model module
  if (from <= 4) {
    tryCatch({
      model.output <- DoModelModules(model.module, modelName, process.output, e)
      output$model.output <- model.output
    },  
      error = function(cond){
        ErrorModule(cond, 4, e)
      }
    )    
  } else {
    model.output <- workflow$model.output
    output$model.output <- model.output
  }
  #output module
  # If output isn't chained, might have to lapply over 
  #   output, covariate or process
  # If output is chained, either covariate or process only. 
  #  Within this need to chain output

  if (from <= 5) {
    tryCatch({
      output.output <- DoOutputModules(output.module, outputName, 
                         process.module, process.output, model.output, e)
      output$report <- output.output
    },  
      error = function(cond){
        ErrorModule(cond, 5, e)
      }
    )
  } else {
    output.output <- workflow$report
    output$report <- output.output
  }
}
Boodogs/zoon-clone documentation built on May 6, 2019, 7:59 a.m.