R/read_forecast.R

Defines functions spread_df mbr_to_char read_forecast

Documented in read_forecast

#' Read forecast data from multiple files
#'
#' \code{read_forecast} generates file names, based on the arguments given,
#' reads data from them, and optionally performs a transformation on those data.
#' By default the function returns nothing due to the large volumes of data that
#' may be read from the files. Optionally, the read data can be returned to the
#' calling environment, and / or written to files.
#'
#' @param dttm A vector of date time strings to read. Can be in YYYYMMDD,
#'   YYYYMMDDhh, YYYYMMDDhhmm, or YYYYMMDDhhmmss format. Can be numeric or
#'   character. \code{\link[harpCore]{seq_dttm}} can be used to generate a
#'   vector of equally spaced date-time strings.
#' @param fcst_model The name of the forecast model(s) to read. Can be expressed
#'   as character vector if more than one model is wanted, or a named list of
#'   character vectors for a mutlimodel ensemble.
#' @param parameter The name of the forecast parameter(s) to read from the
#'   files. Should either be harp parameter names (see
#'   \code{\link{show_param_defs}}), or in the case of netcdf files can be the
#'   name of the parameters in the files. If reading from vfld files, set to
#'   NULL to read all parameters.
#' @param lead_time The lead times to read in. If a numeric vector is passed,
#'   the lead times are assumed to be in hours. Otherwise a character vector may
#'   be passed with a letter after each value to denote the time units: d =
#'   days, h = hours, m = minutes, s = seconds.
#' @param members For ensemble forecasts, a numeric vector giving the member
#'   numbers to read. If more than one forecast model is to be read in, the
#'   members may be given as a single vector, in which case they are recycled
#'   for each forecast model, or as a named list, with the forecast models (as
#'   given in \code{fcst_model}) as the names. For multimodel ensembles this
#'   would be a named list of named lists. If file names do not include the
#'   ensemble member, i.e. all members are in the same file, setting
#'   \code{members} to NULL will read all members from the files.
#' @param members_out If the members are to renumbered on output, the new member
#'   numbers are given in \code{members_out}. Should have the same structure as
#'   \code{members}.
#' @param lags A named list of members of an ensemble forecast model that are
#'   lagged and the amount by which they are lagged. The list names are the
#'   names of those forecast models, as given in \code{fcst_model} that have
#'   lagged members, and the lags are given as vectors that are the same length
#'   as the members vector. If the lags are numeric, it is assumed that they are
#'   in hours, but the units may be specified with a letter after each value
#'   where d = days, h = hours, m = minutes and s = seconds. \code{lags} is
#'   primarily used to generate the correct file names for lagged members - for
#'   example a lag of 1 hour will generate a file name with a date-time 1 hour
#'   earlier than the date-time in the sequence \code{(start_data, end_date, by
#'   = by)} and a lead time 1 hour longer.
#' @param vertical_coordinate For upper air data to be read the vertical
#'   coordinate in the files must be given. By default, this is "pressure", but
#'   may also be "height" or "model" for model levels. If reading from vfld
#'   files, set to NA to only read surface parameters.
#' @param file_path The parent path to all forecast data. All file names are
#'   generated to be under the \code{file_path} directory. The default is the
#'   current working directory.
#' @param file_format The format of files to read. harpIO includes functions to
#'   read 'vfld', 'netcdf', 'grib' and 'fa' format files. If set to NULL, an
#'   attempt will be made to guess the format of the files. However, you may
#'   write your own functions called read_<file_format> function and
#'   \code{read_forecast} will attempt to use that instead. See the vignette on
#'   writing read functions for more information.
#' @param file_template A template for the file names. For available built in
#'   templates see \code{\link{show_file_templates}}. If anything else is
#'   passed, it is returned unmodified, or with substitutions made for dynamic
#'   values. Available substitutions are {YYYY} for year, \{MM\} for 2 digit
#'   month with leading zero, \{M\} for month with no leading zero, and
#'   similarly \{DD\} or \{D\} for day, \{HH\} or \{H\} for hour, \{mm\} or
#'   \{m\} for minute. Also \{LDTx\} for lead time and \{MBRx\} where x is the
#'   length of the string including leading zeros. Note that the full path to
#'   the file will always be file_path/template.
#' @param file_format_opts A list of options specific to the file format. For
#'   netcdf this can be generated by \code{\link{netcdf_opts}} and for grib by
#'   \code{\link{grib_opts}}.
#' @param transformation The transformation to apply to the data once read in.
#'   "none" will return the data in its original form, "interpolate" will
#'   interpolate to points at latitudes and longitudes supplied in
#'   \code{transformation_opts}, "regrid" will regrid the data to a new domain
#'   given in \code{transformation_opts}, and "xsection" will interpolate to a
#'   vertical cross sectoin betweem two points given in
#'   \code{transformation_opts}.
#' @param transformation_opts Options for the transformation. For
#'   \code{transformation = "interpolate"} these can be generated by
#'   \code{\link{interpolate_opts}}, for \code{transformation = "regrid"} these
#'   can be generated by \code{\link{regrid_opts}}, and \code{transformation =
#'   "xsection"} these can be generated by \code{\link{xsection_opts}}.
#' @param param_defs A list of parameter definitions that includes the file
#'   format to be read. By default the built in list \code{\link{harp_params}}
#'   is used. Modifications and additions to this list can be made using
#'   \code{\link{modify_param_def}} and \code{\link{add_param_def}}
#'   respectively.
#' @param output_file_opts Options for output files. \code{read_forecast} can
#'   output data \code{transformation = "interpolate"} as sqlite files. The
#'   options for the sqlite files can be set with \code{\link{sqlite_opts}}.
#'   Most inportantly, the path argument in \code{link{sqlite_opts}} must not be
#'   NULL for data to be output to sqlite files.
#' @param return_data By default \code{read_forecast} does not return any data,
#'   since many GB of data could be read in. Set to TRUE to return the data read
#'   in to the global environment.
#' @param merge_lags Logical. Whether to merge the lagged members when
#'   \code{return_data = TRUE} (the default). When \code{TRUE}, the forecast
#'   time and lead time for the lagged members are adjusted to fit with the
#'   unlagged members.
#' @param show_progress Some files may contain a lot of data. Set to TRUE to
#'   show progress when reading these files.
#' @param stop_on_fail Logical. Set to TRUE to make execution stop if there are
#'   problems reading a file. Missing files are always skipped regardless of
#'   this setting. The default value is FALSE.
#' @param is_forecast Logical. When TRUE (the default), data are read on the
#'   basis if the forecast initialization time (from dttm) and lead_times. When
#'   FALSE the date-times from dttm are used to choose what data to read and
#'   lead_times is ignored. This is useful for analysis data where many dates
#'   are in the same file. \code{\link{read_analysis}} also provides this
#'   functionality.
#' @param start_date,end_date,by `r lifecycle::badge("deprecated")` The use of
#'   `start_date`, `end_date` and `by` is no longer supported. `dttm` together
#'   with \code{\link[harpCore]{seq_dttm}} should be used to generate equally
#'   spaced date-times.
#'
#' @return When \code{return_date = TRUE}, a harp_fcst object.
#' @export
#'
#' @examples
#' if (requireNamespace("harpData", quietly = TRUE)) {
#'
#'   # Read all parameters from vfld files for a deterministic model
#'   read_forecast(
#'     start_date  = 2019021700,
#'     end_date    = 2019021718,
#'     fcst_model  = "AROME_Arctic_prod",
#'     file_path   = system.file("vfld", package = "harpData"),
#'     return_data = TRUE
#'   )
#'
#'   # Ensure height corrections to 2m temperature are done and keep the
#'   # uncorrected data
#'   read_forecast(
#'     start_date          = 2019021700,
#'     end_date            = 2019021718,
#'     fcst_model          = "AROME_Arctic_prod",
#'     file_path           = system.file("vfld", package = "harpData"),
#'     transformation_opts = interpolate_opts(
#'       correct_t2m    = TRUE,
#'       keep_model_t2m = TRUE
#'     ),
#'     return_data = TRUE
#'   )
#'
#'   # Read 10m wind speed from the MEPS_prod ensemble
#'   read_forecast(
#'     start_date    = 2019021700,
#'     end_date      = 2019021718,
#'     fcst_model    = "MEPS_prod",
#'     parameter     = "S10m",
#'     lead_time     = seq(0, 12, 3),
#'     members       = seq(0, 10),
#'     file_path     = system.file("vfld", package = "harpData"),
#'     file_template = "vfld_eps",
#'     return_data   = TRUE
#'   )
#'
#'   # Read vertical profiles of temperature and dewpoint temperature
#'   read_forecast(
#'     start_date    = 2019021700,
#'     end_date      = 2019021718,
#'     fcst_model    = "MEPS_prod",
#'     parameter     = c("T", "Td"),
#'     lead_time     = seq(0, 12, 3),
#'     members       = seq(0, 10),
#'     file_path     = system.file("vfld", package = "harpData"),
#'     file_template = "vfld_eps",
#'     return_data   = TRUE
#'   )
#'
#'   # Read ensemble data from MEPS_prod and lagged ensemble data from
#'   # CMEPS_prod
#'   read_forecast(
#'     start_date    = 2019021700,
#'     end_date      = 2019021718,
#'     fcst_model    = c("MEPS_prod", "CMEPS_prod"),
#'     parameter     = c("T", "Td"),
#'     lead_time     = seq(0, 12, 3),
#'     members       = list(
#'       MEPS_prod = seq(0, 10),
#'       CMEPS_prod = c(0, 1, 3, 4, 5, 6)
#'     ),
#'     lags          = list(CMEPS_prod = c(0, 0, 2, 2, 1, 1)),
#'     file_path     = system.file("vfld", package = "harpData"),
#'     file_template = "vfld_eps",
#'     return_data   = TRUE
#'   )
#' }
read_forecast <- function(
  dttm,
  fcst_model,
  parameter,
  lead_time            = seq(0, 48, 3),
  members              = NULL,
  members_out          = members,
  lags                 = NULL,
  vertical_coordinate  = c("pressure", "model", "height", NA),
  file_path            = getwd(),
  file_format          = NULL,
  file_template        = "vfld",
  file_format_opts     = list(),
  transformation       = c("none", "interpolate", "regrid", "xsection", "subgrid"),
  transformation_opts  = NULL,
  param_defs           = get("harp_params"),
  output_file_opts     = sqlite_opts(),
  return_data          = FALSE,
  merge_lags           = TRUE,
  show_progress        = TRUE,
  stop_on_fail         = FALSE,
  is_forecast          = TRUE,
  start_date           = NULL,
  end_date             = NULL,
  by                   = "6h"

){

  if (missing(dttm)) {
    if (any(sapply(list(start_date, end_date, by), is.null))) {
      stop(
        "If `dttm` is not passed, `start_date`, `end_date` ",
        "and `by` must be passed."
      )
    }
    lifecycle::deprecate_warn(
      "0.1.0",
      I(paste(
        "The use of `start_date`, `end_date`, and `by`",
        "arguments of `read_forecast()`"
      )),
      "read_forecast(dttm)"
    )
    dttm <- harpCore::seq_dttm(start_date, end_date, by)
  }

  vertical_coordinate <- match.arg(vertical_coordinate)
  transformation      <- match.arg(transformation)

  if (missing(parameter)) parameter <- NULL

  if (!is_forecast) {
    lead_time <- 0
  }

  check_param_defs(param_defs)

  # Get a data frame of arguments in preparation for file name generation

  args_df <- process_read_forecast_args(
    fcst_model,
    file_path     = file_path,
    file_format   = file_format,
    file_template = file_template,
    members_in    = members,
    members_out   = members_out,
    lags          = lags
  )

  # Set up transformation - if no clim_file is passed, transformation_opts are checked
  # and nothing else is done.

  if (transformation == "none") {
    transformation_opts <- c(
      transformation_opts[names(transformation_opts) != "keep_raw_data"],
      list(keep_raw_data = TRUE)
    )
  } else {
    transformation_opts <- setup_transformation(transformation, transformation_opts)
  }

  if (return_data) {
    function_output <- list()
    list_counter    <- 0
  }

  failure_message <- list("There were problems reading:")
  failure_count   <- 0

  for (fcst_dttm in dttm) {

    if (return_data) list_counter <- list_counter + 1

    # Generate the file names
    files_df <- dplyr::group_by(
      args_df,
      .data[["fcst_model"]],
      .data[["sub_model"]],
      .data[["file_path"]],
      .data[["file_template"]],
      .data[["file_format"]]
    ) %>%
      tidyr::nest()
    files_df <- purrr::map_dfr(
      1:nrow(files_df),
      ~ do.call(
        generate_filenames,
        c(
          as.list(within(files_df[.x, ], rm("data"))),
          list(
            data           = within(files_df[.x, ][["data"]][[1]], rm("members_out")),
            lead_time      = lead_time,
            parameter      = parameter,
            filenames_only = FALSE,
            file_date      = fcst_dttm
          )
        )
      )
    )
    if (!is_forecast) {
      colnames(files_df)[colnames(files_df) == "fcdate"] <- "validdate"
    }
    files_df <- suppressMessages(dplyr::inner_join(files_df, args_df))

    # Nest by file name and remove rows with missing files
    data_df  <- tidyr::nest(dplyr::group_by(files_df, .data[["file_name"]]))

    # Do not check external sources
    on_disk_files <- grep(
      "https://|http://", data_df[["file_name"]], invert = TRUE, value = TRUE
    )

    missing_files <- on_disk_files[!file.exists(on_disk_files)]

    if (length(missing_files) > 0) {
      std_warn_length <- getOption("warning.length")
      options(warning.length = 8170)
      warning(
        cli::col_br_red("\n***\n"),
        "Files not found for ", fcst_dttm, ". Missing files:\n",
        paste(missing_files, collapse = "\n"),
        cli::col_br_red("\n***"),
        call.      = FALSE,
        immediate. = TRUE
      )
      options(warning.length = std_warn_length)
    }

    data_df <- dplyr::filter(data_df, !.data[["file_name"]] %in% missing_files)
    if (nrow(data_df) < 1) {
      warning("No files found for ", fcst_dttm, ".", call. = FALSE, immediate. = TRUE)
      if (return_data) function_output[[list_counter]] <- NULL
      next()
    }

    # If there was no clim_file and a transformation is to be done use the first file
    # as the clim_file.

    transformation_opts <- weights_from_fcst_file(
      data_df[["file_name"]][1],
      data_df[["data"]][[1]][["file_format"]][1],
      file_format_opts,
      transformation,
      transformation_opts,
      parameter
    )

    safe_read_grid <- purrr::possibly(
      ~read_grid(
        file_name           = .x,
        parameter           = .y[["parameter"]],
        is_forecast         = is_forecast,
        dttm                = fcst_dttm,
        file_format         = unique(.y[["file_format"]]),
        file_format_opts    = file_format_opts,
        vertical_coordinate = vertical_coordinate,
        lead_time           = .y[["lead_time"]],
        members             = .y[["members"]],
        transformation      = transformation,
        transformation_opts = transformation_opts,
        param_defs          = param_defs,
        show_progress       = show_progress,
        data_frame          = TRUE,
        readable_times      = FALSE
      ),
      FALSE
    )

    # Read the required data from the files
    data_df <- dplyr::mutate(
      data_df,
      forecast_data = purrr::map2(
        .data[["file_name"]],
        .data[["data"]],
        safe_read_grid
      )
    )

    failures <- which(sapply(data_df[["forecast_data"]], function(x) !inherits(x, "data.frame")))

    if (length(failures) > 0) {
      failure_count <- failure_count + 1
      failure_message[[failure_count + 1]] <- paste(
        data_df[["file_name"]][failures], collapse = "\n "
      )
      if (stop_on_fail) {
        stop(
          Reduce(function(x, y) paste(x, y, sep = "\n "), failure_message),
          call. = FALSE
        )
      }
      data_df <- data_df[-failures, ]
    }

    if (nrow(data_df) > 0) {
      transformation_opts <- attr(data_df[["forecast_data"]][[1]], "transformation_opts")
      attr(data_df, "transformation_opts") <- NULL
    }

    if (nrow(data_df) < 1) {
      warning("No data found for ", fcst_dttm, ".", call. = FALSE, immediate. = TRUE)
      if (return_data) function_output[[list_counter]] <- NULL
      next()
    }

    # If ensemble members were found in the file but not asked for, ensure that the
    # members column is removed from the metadata data frame

    data_df[["data"]] <- mapply(
      function(x, y) {
        if (all(is.na(x[["members"]])) && is.element("members", colnames(y))) {
          x[colnames(x) != "members"]
        } else {
          x
        }
      },
      data_df[["data"]],
      data_df[["forecast_data"]],
      SIMPLIFY = FALSE
    )

    # If ensemble members were asked for, but not found in the file data (i.e. just
    # in the file name, ensure that the members column is removed from the forecast_data
    # data frame)

    data_df[["forecast_data"]] <- mapply(
      function(x, y) {
        if (all(is.na(x[["members"]])) && is.element("members", colnames(y))) {
          x[colnames(x) != "members"]
        } else {
          x
        }
      },
      data_df[["forecast_data"]],
      data_df[["data"]],
      SIMPLIFY = FALSE
    )

    # Join the data to the metadata
    not_lgl <- function(x) !is.logical(x)

    # If parameter was supplied as a harp_parameter we need to join on the basename
    data_df[["data"]] <- lapply(
      data_df[["data"]],
      function(x) {
        if (is.element("parameter", colnames(x))) {
          x[["parameter"]] <- sapply(
            x[["parameter"]],
            function(y) ifelse(inherits(y, "harp_parameter"), y[["basename"]], y)
          )
        }
        x
      }
    )

    data_df = purrr::map2(
      data_df[["data"]],
      data_df[["forecast_data"]],
      ~ dplyr::inner_join(
        dplyr::select_if(.x, not_lgl), .y,
        by = intersect(colnames(dplyr::select_if(.x, not_lgl)), colnames(.y))
      )
    )

    data_df <- dplyr::bind_rows(data_df)

    # If no members were specified but ensemble members were read,
    # make the data frame consistent

    member_rows <- which(!is.na(data_df[["members"]]) & is.na(data_df[["members_out"]]))
    data_df[["members_out"]][member_rows] <- data_df[["members"]][member_rows]

    member_rows <- which(!is.na(data_df[["members"]]) & is.na(data_df[["sub_model"]]))
    data_df[["sub_model"]][member_rows] <- data_df[["fcst_model"]][member_rows]

    # Modify the members to mbrXXX format
    data_df <- mbr_to_char(data_df, c("members", "members_out"))

    # Do 2m temperature correction if data are station data.
    if (
      is.element("station_data", colnames(data_df)) &&
        is.element("t2m", unique(tolower(data_df[["parameter"]])))
    ) {
      data_df <- correct_t2m(data_df, transformation_opts)
      transformation_opts <- data_df[["opts"]]
      data_df             <- data_df[["data_df"]]
    }

    # Add valid_dttm column
    data_df <- data_df[!grepl("file", colnames(data_df))]
    if (
      !is.element("valid_dttm", colnames(data_df)) &&
      is.element("fcst_dttm", colnames(data_df)) &&
      is.element("lead_time", colnames(data_df))
    ) {
      data_df[["valid_dttm"]] <- data_df[["fcst_dttm"]] +
        harpCore::to_seconds(data_df[["lead_time"]])
    }

    if (return_data) function_output[[list_counter]] <- data_df

    # If a file path is given in output_file_opts then write out the data - only
    # applies to data interpolated to stations.
    if (!is.null(output_file_opts) && !is.null(output_file_opts[["path"]])) {

      if (is.element("station_data", colnames(data_df))) {

        # Ensure data frame contains data that were asked for, even if they were not found
        meta_df <- args_df
        meta_df[["fcst_dttm"]]    <- suppressMessages(
          harpCore::as_unixtime(fcst_dttm)
        )
        meta_df[["lead_time"]] <- list(lead_time)
        meta_df[["parameter"]] <- list(parameter)

        unnest_func <- function(df, col) {
          if (tidyr_new_interface()) {
            df <- tidyr::unnest(df, tidyr::one_of(col))
          } else {
            df <- tidyr::unnest(df, .data[[col]], .drop = FALSE)
          }
        }

        meta_df <- unnest_func(meta_df, "lead_time")
        if (all(vapply(meta_df[["parameter"]], is.null, logical(1)))) {
          meta_df[["parameter"]] <- NULL
        } else {
          meta_df <- unnest_func(meta_df, "parameter")
        }
        meta_df <- meta_df[intersect(
          colnames(meta_df),
          c(
            "fcst_model", "sub_model", "lags", "members",
            "members_out", "fcst_dttm", "lead_time", "parameter"
          )
        )]

        # Make meta data consistent with lags
        if (is.element("lags", colnames(meta_df))) {
          meta_df[["fcst_dttm"]] <- meta_df[["fcst_dttm"]] -
            harpCore::to_seconds(meta_df[["lags"]])
          meta_df[["lead_time"]] <- meta_df[["lead_time"]] +
            harpCore::to_seconds(meta_df[["lags"]]) / 3600
         }

        data_df <- suppressMessages(dplyr::full_join(
          data_df,
          mbr_to_char(
            meta_df,
            c("members", "members_out")
          )
        ))

        write_forecast(data_df, output_file_opts)

      } else {

        warning(
          "You need `transformation = \"interpolate\"` to write out data.",
          call. = FALSE
        )

      }

    }

  } # End loop over dttm

  if (failure_count > 0) {
    warning(Reduce(function(x, y) paste(x, y, sep = "\n "), failure_message), call. = FALSE)
  }

  if (return_data) {

    if (length(function_output) < 1) {
      warning("No data to return.", call. = FALSE, immediate. = TRUE)
      return(invisible(NULL))
    }

    function_output <- dplyr::bind_rows(function_output)

    if (is.element("lags", colnames(function_output))) {
      if (all(harpCore::to_seconds(unique(function_output[["lags"]])) == 0)) {
        function_output <- dplyr::select(function_output, -.data[["lags"]])
      }
    }

    if (is.element("lags", colnames(function_output)) && merge_lags) {
      function_output <- dplyr::group_by(function_output,
        .data[["fcst_dttm"]],
        .data[["lead_time"]],
        .data[["lags"]]
      ) %>%
        tidyr::nest()

      lag_seconds <- sapply(function_output[["lags"]], char_to_time, "lags")
      lag_units   <- gsub("[[:digit:]]", "", function_output[["lags"]])
      if (is.element("fcst_dttm", colnames(function_output))) {
        function_output[["fcst_dttm"]] <- function_output[["fcst_dttm"]] + lag_seconds
      }
      if (is.element("lead_time", colnames(function_output))) {
        function_output[["lead_time"]] <- function_output[["lead_time"]] -
          lag_seconds / sapply(lag_units, units_multiplier)
      }

      function_output <- tidyr::unnest(function_output, .data[["data"]])
      function_output <- dplyr::ungroup(function_output)

    }

    if (is.element("fcst_dttm", colnames(function_output))) {
      function_output[["fcst_dttm"]]  <- harpCore::unixtime_to_dttm(
        function_output[["fcst_dttm"]]
      )
      function_output[["fcst_cycle"]] <- format(function_output[["fcst_dttm"]], "%H")
    }

    if (is.element("valid_dttm", colnames(function_output))) {
      function_output[["valid_dttm"]] <- harpCore::unixtime_to_dttm(
        function_output[["valid_dttm"]]
      )
    }


    if (!is_forecast) {
      function_output <- dplyr::select(
        function_output,
        -dplyr::matches("^lead[[:graph:]]*time$"),
        -dplyr::any_of(c("fcst_dttm", "fcst_cycle"))
      )
    }

    function_output <- split(function_output, function_output[["fcst_model"]])
    function_output <- lapply(function_output, spread_df)

    add_harp_class <- function(df, opts) {

      df <- dplyr::ungroup(df)

      list_cols <- which(sapply(df, typeof) == "list")

      for (df_col in list_cols) {
        if (all(sapply(df[[df_col]], meteogrid::is.geofield))) {
          if (!inherits(df[[df_col]], "geolist")) {
            df[[df_col]] <- harpCore::geolist(df[[df_col]])
          }
        }
        if (all(sapply(df[[df_col]], inherits, "harp_xs"))) {
          if (!inherits(df[[df_col]], "harp_xs_list")) {
            df[[df_col]] <- structure(
              df[[df_col]], class = c("harp_xs_list", class(df[[df_col]]))
            )
          }
        }

      }

      if (any(sapply(df, function(x) inherits(x, "geolist")))) {
        if (!inherits(df, "harp_spatial_fcst")) {
          class(df) <- c("harp_spatial_fcst", class(df))
        }
      }

      if (any(sapply(df, function(x) inherits(x, "harp_xs_list")))) {
        if (!inherits(df, "harp_xs_df")) {
          class(df) <- c("harp_xs_df", class(df))
        }
        if (is.null(attr(df, "transformation_opts"))) {
          attr(df, "transformation_opts") <- opts
        }
      }

      df

    }

    #function_output <- lapply(function_output, add_harp_class, transformation_opts)
    function_output <- harpCore::as_harp_list(
      mapply(
        function(x, y) {
          dplyr::relocate(
            dplyr::mutate(
              harpCore::as_harp_df(x),
              fcst_model = y
            ),
            dplyr::all_of("fcst_model")
          )
        },
        function_output,
        names(function_output),
        SIMPLIFY = FALSE
      )
    )

    if (length(function_output) == 1) {
      return(function_output[[1]])
    }

    function_output
    #structure(
    #  function_output,
    #  class = "harp_fcst"
    #)

  } else {

    message("return_data = FALSE - no data returned.")

  }

}

mbr_to_char <- function(df, cols) {
  for (col in cols) {
    if (is.element(col, colnames(df))) {
      df[[col]][!is.na(df[[col]])] <- paste0(
        "mbr",
        formatC(df[[col]][!is.na(df[[col]])], width = 3, flag = "0")
      )
    }
  }
  df
}

spread_df <- function(df) {

  data_col <- grep("data$", colnames(df), value = TRUE)
  if (length(data_col) > 1) {
    return(df)
  }

  if (is.element("members_out", colnames(df))) {

    df[["members_out"]] <- paste(df[["sub_model"]], df[["members_out"]], sep = "_")
    df[["members_out"]] <- gsub("_NA$", "_det", df[["members_out"]])
    lag_rows            <- which(as.numeric(gsub("[[:alpha:]]", "", df[["lags"]])) != 0)

    df[["members_out"]][lag_rows] <- paste(
      df[["members_out"]][lag_rows], df[["lags"]][lag_rows], sep = "_lag"
    )

    df <- df[
      !colnames(df) %in% c(
        "fcst_model", "sub_model", "members", "lags", "model_elevation"
      )
    ]
    df <- tidyr::pivot_wider(
      df, names_from = "members_out", values_from = dplyr::all_of(data_col)
    )

  } else {

    df[["fcst_model"]] <- paste0(df[["fcst_model"]], "_det")
    df                 <- df[!colnames(df) %in% c("lags", "model_elevation")]

    df <- tidyr::spread(df, key = .data[["fcst_model"]], value = .data[[data_col]])

  }

  harpCore::as_harp_df(df)

}
andrew-MET/harpIO documentation built on March 7, 2024, 7:48 p.m.