#' Read gridded data in various formats
#'
#' \code{read_grid} is a generic function to read gridded data from a file. A
#' transformation can optionally be applied to those data.
#'
#' @param file_name A character string with the (full) file name.
#' @param parameter The parameter to be read.
#' @param is_forecast Logical. If TRUE (the default), data are filtered based on
#' the forecast initialization time and the lead time. If FALSE, the data are
#' filtered on the valid time.
#' @param dttm A vector of date time strings of the form YYYYMMDD, YYYYMMDDhh,
#' YYYYMMDDhhmm, or YYYYMMDDhhmmss. For forecast data these would be the
#' forecast initialization times. \code{\link[harpCore]{seq_dttm}} can be used
#' to generate a vector of equally spaced date-time strings.
#' @param file_format The file format. Possible values include grib, netcdf, FA,
#' hdf5... Whatever the value is, it is supposed to correspond to a function
#' "read_XXX" that can deal with the format. If not specified, the format can
#' often be guessed correctly from file extension or the first few bytes.
#' @param file_format_opts Options for the file format as generated by one of
#' the options functions, e.g. \code{\link{netcdf_opts}}.
#' @param vertical_coordinate The vertical coordinate for upper air data. May be
#' "pressure" for pressure levels, "model" for model levels or "height" for
#' height levels.
#' @param lead_time The lead times to read from a forecast file. If set to NULL,
#' all lead times will be read from the file.
#' @param members For ensemble data, the ensemble members to read from the file.
#' If set to NULL all members will be read.
#' @param transformation The transformation to apply to the gridded data. Can be
#' "none", "interpolate", "regrid" or "xsection".
#' @param transformation_opts Options for the transformation as generated by
#' \code{\link{interpolate_opts}}, \code{\link{regrid_opts}} or
#' \code{\link{xsection_opts}}.
#' @param param_defs A list of parameter definitions that includes the file
#' format to be read. By default the built in list \code{\link{harp_params}}
#' is used. Modifications and additions to this list can be made using
#' \code{\link{modify_param_def}} and \code{\link{add_param_def}}
#' respectively.
#' @param show_progress Show progress when reading large amounts of data.
#' @param data_frame Logical. For compatibility with current version of
#' harpSpatial, make sure only a geofield is returned rather than a data frame
#' when set to FALSE.
#' @param readable_times When \code{data_frame = TRUE}, whether to convert times
#' in unix format to a data-time format. The default is TRUE.
#' @param spread_members When \code{data_frame = TRUE}, whether to spread the
#' members into a column for each member.
#' @param ... All arguments passed to the specified reader function.
#'
#' @return A geofield or (possibly) a plain matrix.
#' @export
#'
#' @examples
#' if (requireNamespace("Rgrib2", quietly = TRUE) & requireNamespace("harpData", quietly = TRUE)) {
#' read_grid(
#' system.file(
#' "grib/AROME_Arctic/2018/07/10/00/fc2018071000+000grib_fp",
#' package = "harpData"
#' ),
#' parameter = "T2m"
#' )
#' read_grid(
#' system.file(
#' "grib/AROME_Arctic/2018/07/10/00/fc2018071000+000grib_fp",
#' package = "harpData"
#' ),
#' parameter = "S10m"
#' )
#' read_grid(
#' system.file(
#' "grib/AROME_Arctic/2018/07/10/00/fc2018071000+000grib_fp",
#' package = "harpData"
#' ),
#' parameter = "tcc"
#' )
#' }
read_grid <- function(
file_name,
parameter,
is_forecast = TRUE,
dttm = NULL,
file_format = NULL,
file_format_opts = list(),
vertical_coordinate = c("pressure", "model", "height", NA),
lead_time = NULL,
members = NULL,
transformation = c("none", "interpolate", "regrid", "xsection", "subgrid"),
transformation_opts = list(),
param_defs = get("harp_params"),
show_progress = TRUE,
data_frame = FALSE,
readable_times = TRUE,
spread_members = FALSE,
...
) {
# Set progress bar to false for batch running
if (!interactive()) show_progress <- FALSE
if (missing(parameter)) parameter <- NULL
if (!is.null(dttm)) {
dttm <- str_datetime_to_unixtime(dttm)
}
if (any(is.na(vertical_coordinate))) vertical_coordinate <- as.character(vertical_coordinate)
vertical_coordinate <- match.arg(vertical_coordinate)
transformation <- match.arg(transformation)
transformation_opts <- check_opts(transformation, transformation_opts)
file_format <- check_for_na(file_format)
if (is.null(file_format)) {
file_format <- guess_format(file_name)
}
if (length(file_format) > 1) {
stop(
"More than one 'file_format' passed: '", paste(file_format, collpase = "','"), "'.",
call. = FALSE
)
}
if (is.na(file_format)) {
stop("Please provide explicit file format for '", file_name, "'.", call. = FALSE)
}
# Make sure default interpolate_opts are set for vfld files if none are passed
if (file_format == "vfld" && !is.element("correct_t2m", names(transformation_opts))) {
transformation_opts <- interpolate_opts(stations = harpCore::station_list)
}
read_func <- try(get(paste0("read_", file_format)), silent = TRUE)
if (inherits(read_func, "try-error")) {
stop(
"Cannot find function 'read_", file_format, "' to read ", file_format, " files.",
call. = FALSE
)
}
members <- unique(check_for_na(members))
lead_time <- unique(check_for_na(lead_time))
if (is.list(parameter) && length(parameter) == 1) {
parameter <- parameter[[1]]
}
if (!inherits(parameter, "harp_parameter")) {
parameter <- unique(check_for_na(parameter))
}
message("Reading ", file_name)
gridded_data <- read_func(
file_name = file_name,
parameter = parameter,
is_forecast = is_forecast,
date_times = dttm,
lead_time = lead_time,
members = members,
vertical_coordinate = vertical_coordinate,
transformation = transformation,
transformation_opts = transformation_opts,
format_opts = file_format_opts,
param_defs = param_defs,
show_progress = show_progress
)
if (is.element("transformation_opts", names(attributes(gridded_data)))) {
transformation_opts <- attr(gridded_data, "transformation_opts")
}
if (!is_forecast) {
gridded_data <- dplyr::select(
gridded_data,
-.data[["lead_time"]],
-.data[["fcdate"]]
)
}
data_col <- switch(
transformation,
"none" = "gridded_data",
"regrid" = "regridded_data",
"xsection" = "xsection_data",
"subgrid" = "subgrid_data",
NA
)
if (
transformation == "none" &&
is.element("station_data", colnames(gridded_data))
) {
data_col <- NA_character_
}
if (readable_times) {
if (!is.null(gridded_data[["fcdate"]])) {
gridded_data[["fcdate"]] <- unix2datetime(gridded_data[["fcdate"]])
}
if (!is.null(gridded_data[["validdate"]])) {
gridded_data[["validdate"]] <- unix2datetime(gridded_data[["validdate"]])
}
}
if (spread_members && is.element("members", colnames(gridded_data))) {
gridded_data[["members"]] <- paste0(
"fcst_mbr",
formatC(gridded_data[["members"]], width = 3, flag = "0")
)
gridded_data <- tidyr::spread(gridded_data, key = "members", value = data_col)
}
list_cols <- which(sapply(gridded_data, typeof) == "list")
for (df_col in list_cols) {
if (all(sapply(gridded_data[[df_col]], meteogrid::is.geofield))) {
gridded_data[[df_col]] <- harpCore::geolist(gridded_data[[df_col]])
}
}
if (any(sapply(gridded_data, function(x) inherits(x, "geolist")))) {
class(gridded_data) <- c("harp_spatial_fcst", class(gridded_data))
}
if (transformation == "xsection") {
all_attrs <- unique(lapply(gridded_data[[data_col]], attributes))
if (length(all_attrs) == 1) {
gridded_data <- tidyr::unnest(gridded_data, .data[[data_col]]) %>%
dplyr::group_by_at(
dplyr::vars(-.data[["level"]], -.data[["distance"]], -.data[["value"]])
) %>%
tidyr::nest(
!!rlang::sym(data_col) := c(
.data[["level"]], .data[["distance"]], .data[["value"]]
)
) %>%
dplyr::ungroup()
if (transformation_opts[["levels_ascending"]]) {
levels_direction <- get("c")
} else {
levels_direction <- get("rev")
}
gridded_data[[data_col]] <- purrr::map(
gridded_data[[data_col]],
~dplyr::mutate(
dplyr::arrange(.x, .data[["level"]]),
level_number = as.numeric(factor(levels_direction(.data[["level"]])))
)
)
gridded_data[[data_col]] <- lapply(
gridded_data[[data_col]],
function(x) structure(x, class = c("harp_xs", class(x)))
)
class(gridded_data[[data_col]]) <- c("harp_xs_list", class(gridded_data[[data_col]]))
class(gridded_data) <- c("harp_xs_df", class(gridded_data))
}
if (length(all_attrs) > 1) {
stop("Different x sections found in the same data", call. = FALSE)
}
}
if (!data_frame) {
if (is.na(data_col)) {
#message("For transformation = '", transformation, "' data must be returned as a data frame.")
return(gridded_data)
}
if (nrow(gridded_data) == 1) {
gridded_data <- gridded_data[[data_col]][[1]]
} else {
gridded_data <- gridded_data[[data_col]]
if (transformation == "xsection") {
class(gridded_data) <- union("xsection_list", class(gridded_data))
} else {
class(gridded_data) <- union("geolist", class(gridded_data))
}
}
} else {
attr(gridded_data, "transformation_opts") <- transformation_opts
gridded_data <- dplyr::rename_with(
gridded_data,
~gsub("date", "_dttm", .x),
dplyr::matches("fcdate|validdate")
)
gridded_data <- dplyr::rename_with(
gridded_data,
~gsub("fc", "fcst", .x),
dplyr::matches("^fc_dttm$")
)
gridded_data <- dplyr::rename_with(
gridded_data,
~gsub("leadtime", "lead_time", .x),
dplyr::matches("^leadtime$")
)
}
gridded_data
}
#' @rdname read_grid
#' @export
read_grid_interpolate <- function(file_name, parameter, file_format = NULL, ...) {
if (is.null(file_format)) file_format <- guess_format(file_name)
if (is.na(file_format)) stop("Please provide explicit file format for ", file_name, call. = FALSE)
reader <- get(paste0("read_", file_format, "_interpolate"))
reader(file_name, parameter = parameter, ...)
}
# Try to guess the binary format of a gridded data file.
# @param file_name The file name
# @return A character string with the format, or NA.
guess_format <- function(file_name) {
if (!file.exists(file_name)) stop("File not found.", call. = FALSE)
# first we try the file extension:
## TODO: tar? csv, txt...?
## for now, we assume tar -> fatar (it's the only one we know, anyway)
ext <- tools::file_ext(file_name)
ff <- switch(tolower(ext),
"grib" = ,
"grb" = "grib",
"nc" = ,
"nc4" = ,
"ncf" = "netcdf",
"h5" = ,
"hdf" = ,
"hdf5" = "hdf5",
"tar" = "fatar",
NA_character_)
## try reading the first few bytes of the file
if (is.na(ff)) {
df <- file(file_name, open = "rb")
ttt <- readBin(df, "raw", n = 4 * 8)
close(df)
if (rawToChar(ttt[1:4]) == "GRIB") return("grib")
if (rawToChar(ttt[1:4]) == "BUFR") return("bufr")
if (rawToChar(ttt[2:4]) == "HDF") return("hdf5")
## FA files have a header of 22 8-byte integers
## 2 and 4 should always have the same value
header <- readBin(ttt, "integer", size = 8, n = 22, endian = "big")
if (as.numeric(header[2]) == 16 && as.numeric(header[4]) == 22) return("fa")
first_line <- try(scan(file_name, nlines = 1, quiet = TRUE))
if (!inherits(first_line, "try-error")) {
if (is.numeric(first_line) && length(first_line) > 1 & length(first_line) < 4) {
if (nchar(first_line[1]) < 8) {
return("vfld")
}
if (length(first_line) == 2) {
try_date <- try(
str_datetime_to_unixtime(
paste0(
first_line[1],
formatC(as.integer(first_line[2]), width = 6, flag = "0")
)
),
silent = TRUE
)
if (!inherits(try_date, "try-error")) {
return("obsoul")
}
}
}
}
}
return(ff)
}
check_for_na <- function(x) {
x <- stats::na.omit(x)
if (length(x) < 1) {
x <- NULL
}
as.vector(x)
}
check_opts <- function(trans, trans_opts) {
if (length(trans_opts) < 1) {
if (trans == "none") {
trans_opts[["keep_raw_data"]] <- TRUE
return(trans_opts)
}
if (trans == "interpolate") {
message(
"transformation_opts not passed for transformation = 'interpolate'.\n",
"Setting to default interpolate_opts()."
)
return(interpolate_opts())
}
stop("transformation_opts must be passed for transformation = '", trans, "'.", call. = FALSE)
}
trans_opts
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.