R/hbv-pso-run.R

Defines functions hbv_pso_run_single hbv_pso_run

Documented in hbv_pso_run hbv_pso_run_single

#' (Batch-) Run Particle Swarm Optimizations of the HBV Model
#'
#' Run Particle Swarm Optimizations of the HBV Model using [`hydroPSO`]
#' and [`TUWmodel`] by importing configurations from R files. Supports
#' specifiying multiple configuration files to run optimizations in batches.
#'
#' @details Runs Particle Swarm Optimizations of HBV for given parameter sets
#'   defined in simple R scripts. Timeseries input data (prec, airt, ep, obs,
#'   area, elevation_zones) can be automatically imported from csv or HBV-light
#'   format.
#'
#'   Each optimization run is given an identification string concatenated from
#'   the name of the input data directory (i.e. the location of the
#'   configuration file), the name of the gof variable and an optional suffix.
#'   This identifier is used as directory name for the output files, in plot
#'   titles and file names, and in the result list returned by `hbv_pso_run`.
#'
#'
#' @section Parameter definition: Parameters for the different optimization runs
#'   are defined by simple variable assignment, where the variable name
#'   corresponds to the parameter as used in [hbv_pso()]. They are defined in
#'   simple R files passed to `hbv_pso_run` (see `configpath`). In order for the
#'   configuration files to be picked up when passing directory paths to
#'   `hbv_pso_run`, the configuration file names must start with "run".
#'
#'   Parameters can also be set by passing them as additional arguments to
#'   `hbv_pso_run`, which will overwrite corresponding values defined in
#'   configuration file(s).
#'   Be aware that when using this option, any lists passed are not merged but
#'   completely replace the values set in the configfile.
#'
#'   In addition to parameters described in [hbv_pso()], the following optional
#'   parameters unique to `hbv_pso_run` can also be set in configuration files:
#'   `suffix`, `gof.name`,`from_validation`, `to_validation` (see above).
#'
#' @section Input data definition: For each optimization run, the following
#'   variables have to be defined and contain valid values as defined in
#'   [hbv_pso()]: prec, airt, ep, obs, area. The input data
#'   can be assinged in the following ways (in descending order of precedence):
#'
#'   1. *assignment inside the configfile*: Add code which assigns the input
#`      data to the corresponding variable name, e.g. `prec <-
#`      read.zoo(fpath, custom_options)
#'   2. *text files inside input data directories*:
#'       The files must be named as the input time series (with either
#'       .csv, .txt or no suffix, e.g. `prec.csv`) and be located in the same
#'       directory as the configfile. They must produce a valid zoo object with
#'       index when read by [read.zoo] with default arguments
#'   3. *HBV-light input files*:
#'       Analogue to the text files above, input data can also be imported from
#'       files as used by HBV-light ("ptq.text" and "ep.txt")
#'
#' If a given input data set is not defined in the configuration file,
#' `hbv_pso_run` will try to load it from a corresponding file in the input data
#' directory - first from a csv file, and then from the corrseponding HBV-light
#' file (e.g. PTQ.txt for prec, airt or obs).
#'
#' @param configpath character vector of full path and/or file names. Any file
#'   given is sourced and an `hbv_pso` optimization run is started with the
#'   imported arguments. Any directory given is searched for R files starting
#'   with "run", and an optimization run is started for each found in the same
#'   way as for file paths.
#' @param recursive logical, should config files be searched in subdirectories?
#'   If true, `hbv_pso_run` will recurse once into any directory given in
#'   `configpath` before searching for configuration files

#' @param suffix String, will be appended to the optimization run identifier
#' @param gof.name String, Used as part of the optimization run identifier.
#' Shorthand for setting plotting$gof.name. If gof.name is not set directly,
#' the value from plotting$gof.name is used.
#' @param from_validation Date or date string in default format, start of
#' validation run with best parameter set found during optimization
#' @param to_validation Date or date string in default format, end of validation
#' period
#' @param ... Parameters passed to hbv_pso, will overwrite those set in in the
#'   configuration files.
#'
#' @return A list of the following items:
#'
#' 1. `summary` data
#'   frame with id, gof, gof_valid, from, to, from_valid and to_valid for each
#'   optimization run
#' 2. `results` list with hydro_pso output from all
#'   optimization runs, named by id
#'
#' In addition, the output files from each optimization run are written inside a
#' directory which is located in the same directory as the configuration file,
#' and named by the identifier of the respective optimization run.
#'
#' @md
#' @export
#'
#' @examples
hbv_pso_run <-
  function(configpath, recursive = FALSE, suffix = NULL, gof.name = NULL,
           from_validation = NULL, to_validation = NULL, ...) {
  # TODO: abstract common tasks (e.g. ts loading) into util function
  # TODO: plotting list<->TRUE conversion done in multiple places?
  start_time <- Sys.time()
  isdir <- sapply(configpath,function(x) file.info(x)$isdir)
  dirs <- configpath[isdir]
  if (recursive && length(dirs)>0) {
    dirs <- list.dirs(dirs,recursive=FALSE, full.names=TRUE)
  }
  configs <- list.files(dirs, full.names = TRUE,
                        pattern = "(?i)^run.*\\.R$")
  all_runs <-lapply(c(configs,configpath[!isdir]),hbv_pso_run_single,...)
  # build summary dataframe and remove from results
  batch_summary <- do.call(rbind,lapply(all_runs, `[[`, "summary"))
  batch_summary_par<- do.call(rbind,lapply(all_runs, `[[`, "summary_par"))
  all_runs = lapply(all_runs,`[[`,"results")
  # flatten result list
  results <- unlist(all_runs,recursive = FALSE)
  run_time <- difftime(Sys.time(), start_time, units="secs")
  print(batch_summary)
  message("Execution time (hours:minutes:seconds): ",format(.POSIXct(run_time, tz="UTZ"), "%H:%M:%S"))
  return(list(summary=batch_summary,summary_par = batch_summary_par, results=results))
}

#' Title
#'
#' @param configfile
#' @param ...
#'
#' @return
#' @keywords internal
#'
#' @examples
hbv_pso_run_single <- function(configfile,...){
  # TODO: find a away to allow setting nested parameters through ...,
  #       e.g. maxit inside hydroGOF_args$control? list merges?
  # TODO: error handling if read.zoo fails/produces unexpected results?
  # TODO: test everything with non-ts inputs

  print(paste("Running",configfile))
  config_env <- new.env()
  source(configfile, local = config_env, chdir = TRUE)
  list2env(list(...), envir = config_env)

  # easier handling of plotting arg: list if we should plot, FALSE otherwise
  plotting <- config_env$plotting
  if (is.null(plotting)) {
    plotting <- FALSE
  } else if (isTRUE(plotting)) {
    plotting <- list()
  }

  # ensure objective function defaults are correct if FUN_gof is NULL
  if (is.null(config_env$FUN_gof)) {
    config_env$FUN_gof <- hydroGOF::NSE
    config_env$FUN_gof_args <- NULL
    config_env$gof.name <- "NSE"
  }

  # make disable_tr default to FALSE
  if (is.null(config_env$disable_tr)) {
    config_env$disable_tr <- FALSE
  }

  # if defined, gof.name sets plotting$gof.name. if not, get it from plotting
  gof.name <- config_env$gof.name
  if (is.list(plotting)) {
    if (is.null(gof.name)) {
      gof.name <- plotting$gof.name
    } else {
      plotting$gof.name <- gof.name
    }
  }

  suffix <- config_env$suffix

  config_dir <- dirname(configfile)
  id <- paste(c(basename(config_dir), gof.name, suffix), collapse = "_")
  if (is.null(config_env$outpath)) {
    output_path <- file.path(config_dir, "hbv", id)
  } else {
    output_path <- file.path(config_env$outpath, "hbv", id)
  }


  # set sim-obs plot titles and file names to id
  sim_obs_fname <- paste0(id,".png")
  if(is.list(plotting)) {
    plot_args <- list(modelout.best.png.fname=sim_obs_fname, main = id)
    if(is.list(plotting)) {
      plotting <- c(plotting,plot_args[!(names(plot_args) %in% names(plotting))])
    } else {
      plotting <- plot_args
    }
  }

  var_names <- c("prec","airt","ep","obs","area")
  # check for missing time series
  missing_vars <- var_names[sapply(var_names, function(x) is.null(config_env[[x]]))]
  if (length(missing_vars) > 0) {
    # read in missing data from csv files
    vars_from_csv <- sapply(missing_vars, function(x, config_dir) {
      fp <- list.files(config_dir, full.names = TRUE,
                       pattern = paste0("(?i)", x, "(\\.csv|\\.txt)?$"))[1]
      if (!is.na(fp)) {
        return(zoo::read.zoo(fp))
      }
      else {
        return(NULL)
      }
    }, simplify=FALSE, USE.NAMES=TRUE, config_dir)

    list2env(vars_from_csv,envir=config_env)
    # read in missing data from hbv-light files
    missing_vars <- sapply(var_names, function(x) is.null(config_env[[x]]))
    if (any(missing_vars)) {
        ts_from_hbv_light <- do.call(parse_hbv_light,c(hbv_light_dir=config_dir,as.list(missing_vars)))
        list2env(ts_from_hbv_light,envir=config_env)
    }
    missing_vars <- var_names[sapply(var_names, function(x) is.null(config_env[[x]]))]
    #TODO: area is optional!
    if (length(missing_vars) > 0)
      stop("Could not find the following input data for " ,id, ": ",paste(missing_vars,collapse=","))
  }

  param <- config_env$param

  # make tr constant if only ts is used
  if (config_env$disable_tr) {
      param[4,2] <- param[3,2]
      param[3,] <- -99
  }

  # skip optimization for parameters where min==max
  par_skip_optim <- param[,1] == param[,2]
  par_fixed <- replace(param,!par_skip_optim,NA)[,1]
  param <- param[!par_skip_optim,]


  # collecting arguments for hbv_pso
  hbv_pso_args <- list(
    prec = config_env$prec,
    airt = config_env$airt,
    ep = config_env$ep,
    area = config_env$area,
    param = param,
    disable_tr = config_env$disable_tr,
    par_fixed = par_fixed,
    obs = config_env$obs,
    from = config_env$from,
    to = config_env$to,
    warmup = config_env$warmup,
    incon = config_env$incon,
    outpath = output_path,
    hydroPSO_args = config_env$hydroPSO_args,
    FUN_gof = config_env$FUN_gof,
    FUN_gof_args = config_env$FUN_gof_args,
    plotting = plotting)

  # remove arguments with value NULL
  hbv_pso_args <- hbv_pso_args[!sapply(hbv_pso_args, is.null)]

  optimized <- do.call(hbv_pso,hbv_pso_args)

  if (!is.null(config_env$from_validation)) {
    hbv_pso_args$outpath <- file.path(output_path,"validation")
    hbv_pso_args$param <- optimized$pso_out$par
    hbv_pso_args$to <- config_env$to_validation
    hbv_pso_args$from <- config_env$from_validation
    hbv_pso_args$warmup <- config_env$warmup_validation

    if(is.list(plotting)) {
      sim_obs_fname <- file.path(output_path, paste0(id,"_ggof-validation",".png"))
      hbv_pso_args$plotting <- list(png.fname=sim_obs_fname, main = paste0(id,"-validation"))
    }

    optimized_validation <- do.call(hbv_pso,hbv_pso_args)
  } else {
    optimized_validation <- NULL
  }

  summary_base <- data.frame(
    id = paste(c(basename(config_dir),suffix),collapse = "-"),
    gof_name = ifelse(is.null(gof.name),NA, gof.name),
    gof = round(optimized$gof,3),
    stringsAsFactors = FALSE
  )
  summary_gof <- data.frame(
    gof_validation = ifelse(
      is.null(optimized_validation),NA,round(optimized_validation$gof,3)),
    from = ifelse(is.null(config_env$warmup),config_env$from, config_env$warmup),
    to = ifelse(is.null(config_env$to),NA, config_env$to),
    from_validation = ifelse(
      is.null(config_env$from_validation),NA, config_env$from_validation),
    to_validation = ifelse(is.null(config_env$to_validation),NA, config_env$to_validation),
    stringsAsFactors = FALSE
  )

  res <- list(results=list(),
              summary=cbind(summary_base,summary_gof),
              summary_par = cbind(summary_base,t(optimized$pso_out$par)))
  res$results[[id]] <- optimized
  res$results[[paste0(id,"_validation")]] <- optimized_validation
  return(res)
}
jthurner/hbvPSO documentation built on Oct. 15, 2020, midnight