Nothing
# nolint start
#' Process raw data wrapper function
#' @description
#' This function processes raw data files which have
#' been downloaded by [`download_data`]. `process_covariates` and
#' the underlying source-specific processing functions have been designed to
#' operate on the raw data files. To avoid errors, \strong{do not edit the raw
#' data files before passing to `process_covariates`}.
#' @param covariate character(1). Covariate type.
#' @param path character(1). Directory or file path to raw data
#' depending on `covariate` value.
#' @param ... Arguments passed to each raw data processing function.
#' @seealso
#' * \code{\link{process_modis_swath}}: "modis_swath"
#' * \code{\link{process_modis_merge}}: "modis_merge"
#' * \code{\link{process_blackmarble}}: "blackmarble"
#' * \code{\link{process_koppen_geiger}}: "koppen-geiger", "koeppen-geiger", "koppen"
#' * \code{\link{process_ecoregion}}: "ecoregion", "ecoregions"
#' * \code{\link{process_nlcd}}: "nlcd", "NLCD"
#' * \code{\link{process_tri}}: "tri", "TRI"
#' * \code{\link{process_nei}}: "nei", "NEI"
#' * \code{\link{process_geos}}: "geos", "GEOS"
#' * \code{\link{process_gmted}}: "gmted", "GMTED"
#' * \code{\link{process_aqs}}: "aqs", "AQS"
#' * \code{\link{process_hms}}: "hms", "smoke", "HMS"
#' * \code{\link{process_narr}}: "narr", "NARR"
#' * \code{\link{process_groads}}: "sedac_groads", "roads", "groads"
#' * \code{\link{process_population}}: "sedac_population", "population"
#' * \code{\link{process_merra2}}: "merra", "merra2", "MERRA2"
#' * \code{\link{process_gridmet}}: "gridmet", "gridMET"
#' * \code{\link{process_terraclimate}}: "terraclimate", "TerraClimate"
#' * \code{\link{process_huc}}: "huc", "HUC"
#' * \code{\link{process_cropscape}}: "cropscape", "cdl"
#' * \code{\link{process_prism}}: "prism", "PRISM"
#' @return `SpatVector`, `SpatRaster`, `sf`, or `character` depending on
#' covariate type and selections.
#' @author Insang Song
#' @examples
#' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large
#' ## amount of data which is not included in the package.
#' \dontrun{
#' process_covariates(
#' covariate = "narr",
#' date = c("2018-01-01", "2018-01-10"),
#' variable = "weasd",
#' path = system.file("extdata", "examples", "narr", "weasd")
#' )
#' }
#' @export
# nolint end
process_covariates <-
function(
covariate = c("modis_swath", "modis_merge",
"koppen-geiger",
"blackmarble",
"koeppen-geiger", "koppen", "koeppen",
"geos", "dummies", "gmted",
"hms", "smoke",
"sedac_population", "population",
"sedac_groads", "groads", "roads",
"nlcd", "tri", "narr", "nei",
"ecoregions", "ecoregion",
"merra", "merra2", "gridmet", "terraclimate",
"huc", "cropscape", "cdl", "prism"),
path = NULL,
...
) {
covariate <- tolower(covariate)
covariate <- match.arg(covariate)
if (startsWith(covariate, "ko")) {
covariate <- "koppen"
}
# select function to run
what_to_run <- switch(covariate,
modis_merge = process_modis_merge,
modis_swath = process_modis_swath,
blackmarble = process_blackmarble,
ecoregion = process_ecoregion,
ecoregions = process_ecoregion,
koppen = process_koppen_geiger,
narr = process_narr,
nlcd = process_nlcd,
smoke = process_hms,
hms = process_hms,
sedac_groads = process_groads,
roads = process_groads,
groads = process_groads,
sedac_population = process_population,
population = process_population,
nei = process_nei,
tri = process_tri,
geos = process_geos,
gmted = process_gmted,
merra = process_merra2,
merra2 = process_merra2,
gridmet = process_gridmet,
terraclimate = process_terraclimate,
huc = process_huc,
cropscape = process_cropscape,
cdl = process_cropscape,
prism = process_prism
)
res_covariate <-
tryCatch({
what_to_run(
path = path,
...
)
}, error = function(e) {
stop(
paste0(
e,
"\n",
paste0(deparse(args(what_to_run)), collapse = "\n"),
"\n",
"Please refer to the argument list and ",
"the error message above to rectify the error.\n"
)
)
})
return(res_covariate)
}
# nolint start
#' Process MODIS sub-datasets
#' @description
#' Selected MODIS sinusoidal grid product subdataset name selector.
#' Four presets are supported. `custom_sel` supersedes
#' presets of `product` values.
#' @param product character(1). Product code.
#' @param custom_sel character(1). Custom filter.
#' If this value is not NULL, preset filter is
#' overridden.
#' @param ... Placeholders.
#' @note
#' Preset product codes and associated variables include
#' * "MOD11A1" - Land surface temperature (LST)
#' * "MOD13A2" - Normalized Difference Vegetation Index (NDVI)
#' * "MOD09GA" - Surface reflectance, and
#' * "MCD19A2" - Aerosol optical depth (AOD).
#'
#' For a full list of available
#' MODIS product codes, see the "Short Name" column at
#' [NASA LP DAAC Search Data Catalog](https://lpdaac.usgs.gov/product_search/?collections=Combined+MODIS&collections=Terra+MODIS&collections=Aqua+MODIS&view=list).
#' When utilizing a product code from this "Short Name" column, \strong{do
#' not include} the version number following the period. For example, if "Short
#' Name" = MCD12C1.006, then `product = "MCD12C1"`.
# nolint end
#' @author Insang Song
#' @return A character object that conforms to the regular
#' expression. Details of regular expression in R can be found in [regexp].
#' @seealso [calculate_modis]
#' @examples
#' process_modis_sds(product = "MOD09GA")
#' @export
# previously modis_prefilter_sds
process_modis_sds <-
function(
product = c("MOD11A1", "MOD13A2", "MOD09GA", "MCD19A2"),
custom_sel = NULL,
...
) {
if (!is.null(custom_sel)) {
modis_sds <- custom_sel
} else {
product <- match.arg(product)
modis_sds <-
switch(product,
MOD11A1 = "(LST_)",
MOD13A2 = "(NDVI)",
MOD09GA = "(sur_refl_b0)",
MCD19A2 = "(Optical_Depth)"
)
if (product == "MCD19A2") {
message(
sprintf(
"For MCD19A2, use %s for 5km resolution sub-datasets.\n",
"(cos|RelAZ|Angle)"
)
)
}
}
return(modis_sds)
}
#' Process MODIS layers
#' @description
#' Aggregate layers in a sub-dataset in sinusoidal MODIS products.
#' @param path character(1). Full path to MODIS HDF4/HDF5 file.
#' Direct sub-dataset access is supported, for example,
#' HDF4_EOS:EOS_GRID:\{filename\}:\{base_grid_information\}:\{sub-dataset\}
#' @param subdataset character(1). Exact or regular expression filter of
#' sub-dataset. See [process_modis_sds] for details.
#' @param fun_agg character(1). Function name to aggregate layers.
#' Should be acceptable to [terra::tapp].
#' @param ... Placeholders.
#' @return a `SpatRaster` object
#' @author Insang Song
#' @seealso [terra::tapp], [terra::rast], [terra::describe]
#' @description Some MODIS products consist of multi-layer subdatasets.
#' This function aggregates multiple layers into single layer `SpatRaster.`
#' `fun_agg` is applied at overlapping cells.
#' @note HDF values are read as original without scaling.
#' Users should consult MODIS product documentation to apply proper
#' scaling factor for post-hoc adjustment. If users have no preliminary
#' information about MODIS sub-datasets, consider running
#' `terra::describe(__filename__, sds = TRUE)` to navigate the full
#' list of sub-datasets in the input file then consult the documentation
#' of MODIS product.
#' @importFrom terra describe
#' @importFrom terra rast
#' @importFrom terra nlyr
#' @importFrom terra tapp
#' @importFrom terra is.rotated
#' @examples
#' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large
#' ## amount of data which is not included in the package.
#' \dontrun{
#' mod09ga_flatten <- process_flatten_sds(
#' path =
#' list.files("./data", pattern = "MOD09GA.", full.names = TRUE)[1],
#' subdataset = process_modis_sds("MOD09GA"),
#' fun_agg = "mean"
#' )
#' }
#' @export
# previously modis_aggregate_sds
process_flatten_sds <-
function(
path = NULL,
subdataset = NULL,
fun_agg = "mean",
...
) {
# if curvilinear, halt
status_curv <-
suppressWarnings(terra::is.rotated(terra::rast(path)))
if (any(status_curv)) {
stop("The raster is curvilinear. Please rectify or warp
the input then flatten it manually.")
}
# describe provides subdataset information
if (!any(grepl(":", path))) {
# we use var to get detailed information in subdatasets
sds_desc <- terra::describe(path, sds = TRUE)
index_sds <- grep(subdataset, sds_desc$var)
sds_desc <- sds_desc[index_sds, c("name", "var", "nlyr")]
# raw is TRUE to ignore scaling factor.
sds_read <- terra::rast(path, subds = index_sds, raw = TRUE)
sds_nsds <- nrow(sds_desc)
sds_nlyr <- sds_desc$nlyr
sds_varn <- sds_desc$var
} else {
sds_read <- terra::rast(path, raw = TRUE)
sds_nsds <- 1L
sds_nlyr <- terra::nlyr(sds_read)
sds_varn <- names(sds_read)
}
if (all(sds_nlyr == 1L)) {
sds_agg <- sds_read
} else {
sds_aggindex <- rep(seq_len(sds_nsds), times = sds_nlyr)
# if there are multiple layers in a subdataset,
# aggregate overlapping pixel values
sds_agg <-
terra::tapp(sds_read,
index = sds_aggindex,
fun = fun_agg,
na.rm = TRUE)
}
# restore names
names(sds_agg) <- sds_varn
gc()
return(sds_agg)
}
# nolint start
#' Process MODIS .hdf files
#' @description
#' Get mosaicked or merged raster from multiple MODIS hdf files.
#' @param path character. Full list of hdf file paths.
#' preferably a recursive search result from [`base::list.files`].
#' @param date character(1). date to query. Should be in
#' `"YYYY-MM-DD"` format.
#' @param subdataset character(1). subdataset names to extract.
#' Should conform to regular expression. See [`base::regex`] for details.
#' Default is `NULL`, which will result in errors. Users should specify
#' which subdatasets will be imported.
#' @param fun_agg Function name or custom function to aggregate overlapping
#' cell values. See `fun` description in [`terra::tapp`] for details.
#' @param ... For internal use.
#' @note Curvilinear products (i.e., swaths) will not be accepted.
#' MODIS products downloaded by functions in `amadeus`,
#' [MODISTools](https://cran.r-project.org/package=MODISTools),
#' and [luna](https://github.com/rspatial/luna) are accepted.
#' @seealso [`download_data`]
#' @author Insang Song
#' @return a `SpatRaster` object
#' @examples
#' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large
#' ## amount of data which is not included in the package.
#' \dontrun{
#' mod09ga_merge <- process_modis_merge(
#' path =
#' list.files("./data", pattern = "MOD09GA.", full.names = TRUE),
#' date = "2024-01-01",
#' subdataset = "sur_refl_b01_1",
#' fun_agg = "mean"
#' )
#' }
#' @export
# nolint end
# previously modis_get_vrt
process_modis_merge <- function(
path = NULL,
date = NULL,
subdataset = NULL,
fun_agg = "mean",
...) {
if (!is.character(path)) {
stop("Argument path should be a list of hdf files (character).\n")
}
if (!(is.character(fun_agg) || is.function(fun_agg))) {
stop("Argument fun_agg should be a function or name of a function
that is accepted in terra::tapp.\n")
}
# date format check
is_date_proper(instr = date)
# interpret date
today <- as.character(date)
dayjul <- strftime(today, "%Y%j")
ftarget <- grep(sprintf("A%s", dayjul), path, value = TRUE)
# get layer information
layer_target <-
lapply(ftarget,
function(x) {
process_flatten_sds(
x,
subdataset = subdataset,
fun_agg = fun_agg
)
})
# Merge multiple rasters into one
# do.call(f, l) is equivalent to f(l[[1]], ... , l[[length(l)]])
if (length(path) > 1) {
result_merged <- do.call(terra::merge, layer_target)
gc()
} else {
result_merged <- layer_target[[1]]
}
return(result_merged)
}
# nolint start
#' Process Black Marble corners
#' @description
#' Tile corner generator for Black Marble products.
#' @param hrange integer(2). Both should be in 0-35.
#' @param vrange integer(2). Both should be in 0-17.
#' @description Black Marble products are in HDF5 format and are read without
#' georeference with typical R geospatial packages.
#' This function generates a `data.frame` of corner coordinates for assignment.
#' @return `data.frame` with xmin, xmax, ymin, and ymax fields
#' @author Insang Song
#' @references
#' - [Wang, Z. (2022). Black Marble User Guide (Version 1.3). NASA.](https://ladsweb.modaps.eosdis.nasa.gov/api/v2/content/archives/Document%20Archive/Science%20Data%20Product%20Documentation/VIIRS_Black_Marble_UG_v1.3_Sep_2022.pdf)
#' @examples
#' process_blackmarble_corners(hrange = c(1, 2), vrange = c(1, 2))
#' @export
# nolint end
process_blackmarble_corners <-
function(
hrange = c(5, 11),
vrange = c(3, 6)
) {
# should be in range
if (!all(hrange %in% seq(0, 35)) || !all(vrange %in% seq(0, 17))) {
stop("hrange or vrange are out of range.")
}
# in case range is put in reverse order
hrange <- sort(hrange)
vrange <- sort(vrange)
hseq <- seq(hrange[1], hrange[2])
vseq <- seq(vrange[1], vrange[2])
tile_df <-
expand.grid(
vaddr = sprintf("v%02d", vseq),
haddr = sprintf("h%02d", hseq)
)
hrangec <- -180 + (hseq * 10L)
vrangec <- 90 - (vseq * 10L)
hlen <- abs(diff(hrange)) + 1
vlen <- abs(diff(vrange)) + 1
tile_df$tile <- paste0(tile_df$haddr, tile_df$vaddr)
tile_df <- data.frame(tile = tile_df$tile)
#return(tile_df)
tile_df$xmin <- rep(hrangec, each = vlen)
tile_df$xmax <- tile_df$xmin + 10L
tile_df$ymin <- rep(vrangec, hlen) - 10L
tile_df$ymax <- tile_df$ymin + 10L
return(tile_df)
}
# nolint start
#' Assign VIIRS Black Marble products corner coordinates to retrieve a merged raster
#' @description This function will return a `SpatRaster` object with
#' georeferenced h5 files of Black Marble product. Referencing corner coordinates
#' are necessary as the original h5 data do not include such information.
#' @param path character. Full paths of h5 files.
#' @param date character(1). Date to query.
#' @param tile_df data.frame. Contains four corner coordinates in fields named
#' `c("xmin", "xmax", "ymin", "ymax")`.
#' See [`process_blackmarble_corners`] to generate a valid object for this argument.
#' @param subdataset integer(1). Subdataset number to process.
#' Default is 3L.
#' @param crs character(1). terra::crs compatible CRS.
#' Default is `"EPSG:4326"`
#' @param ... For internal use.
#' @return a `SpatRaster` object
#' @author Insang Song
#' @seealso
#' * [`terra::describe`]
#' * [`terra::merge`]
#' * [`process_blackmarble_corners`]
#' @references
#' - [Wang, Z. (2022). Black Marble User Guide (Version 1.3). NASA.](https://ladsweb.modaps.eosdis.nasa.gov/api/v2/content/archives/Document%20Archive/Science%20Data%20Product%20Documentation/VIIRS_Black_Marble_UG_v1.3_Sep_2022.pdf)
#' @importFrom terra rast
#' @importFrom terra ext
#' @importFrom terra crs
#' @importFrom terra merge
#' @examples
#' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large
#' ## amount of data which is not included in the package.
#' \dontrun{
#' vnp46a2 <- process_blackmarble(
#' path =
#' list.files("./data", pattern = "VNP46A2.", full.names = TRUE),
#' date = "2024-01-01",
#' tile_df =
#' process_blackmarble_corners(hrange = c(8, 10), vrange = c(4, 5)),
#' subdataset = 3L,
#' crs = "EPSG:4326"
#' )
#' }
#' @export
# previously modis_preprocess_vnp46
# nolint end
process_blackmarble <- function(
path = NULL,
date = NULL,
tile_df = process_blackmarble_corners(),
subdataset = 3L,
crs = "EPSG:4326",
...
) {
is_date_proper(instr = date)
# interpret date from paths
datejul <- strftime(as.Date(date), format = "%Y%j")
stdtile <- tile_df$tile
filepaths_today <- grep(sprintf("A%s", datejul), path, value = TRUE)
# today's filenames
filepaths_today <-
grep(
paste0(
"(", paste(stdtile, collapse = "|"), ")"
),
filepaths_today, value = TRUE
)
filepaths_today_tiles <-
regmatches(filepaths_today,
regexpr("h([0-2][0-9]|[3][0-6])v([0-1][0-9])", filepaths_today))
vnp_today <- unname(split(filepaths_today, filepaths_today))
filepaths_today_tiles_list <-
unname(split(filepaths_today_tiles, filepaths_today_tiles))
# for filenames,
# assign corner coordinates then merge
# Subdataset 3 is BRDF-corrected nighttime light
vnp_assigned <-
mapply(function(vnp, tile_in) {
vnp_ <- terra::rast(vnp, subds = subdataset)
tile_ext <- tile_df[tile_df$tile == tile_in, -1]
terra::crs(vnp_) <- terra::crs(crs)
terra::ext(vnp_) <- unlist(tile_ext)
return(vnp_)
}, vnp_today, filepaths_today_tiles_list, SIMPLIFY = FALSE)
if (length(filepaths_today) > 1) {
vnp_all <- do.call(terra::merge, vnp_assigned)
} else {
vnp_all <- vnp_assigned[[1]]
}
vnp_all[vnp_all == 65535L] <- NaN
vnp_all[is.nan(vnp_all)] <- NA
return(vnp_all)
}
#' Warp MODIS swath data into rectilinear grid raster
#' @description Swath data is a type of MODIS data,
#' where curvilinear points are stored with varying resolution depending on
#' the relative position of the sensor axis. As this type of data
#' typically does not work well with planar spatial data, users
#' should warp or rectify this data into a rectilinear raster.
#' Main procedure is done with [`stars::st_warp`], in which users are able to
#' customize the threshold to fill potential gaps that appear where
#' the target resolution is finer than the local resolution of curvilinear
#' grid points.
#' @param path File path of MODIS swath with exact sub-dataset specification.
#' @param cellsize numeric(1). Cell size (spatial resolution) of
#' output rectilinear grid raster.
#' @param threshold numeric(1). Maximum distance to fill gaps if occur.
#' @param crs integer(1)/character(1). Coordinate system definition.
#' Should be compatible with EPSG codes or WKT2.
#' See [`terra::crs`] and [`sf::st_crs`] / [EPSG](https://epsg.io/)
#' @param ... For internal use.
#' @note This function handles one file at a time.
#' @return a `stars` object
#' @author Insang Song
#' @seealso [`terra::rectify`]
#' @importFrom stars st_warp
#' @importFrom stars read_stars
#' @examples
#' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large
#' ## amount of data which is not included in the package.
#' \dontrun{
#' mod06l2_warp <- process_modis_warp(
#' path = paste0(
#' "HDF4_EOS:EOS_SWATH:",
#' list.files(
#' "./data/mod06l2",
#' full.names = TRUE,
#' pattern = ".hdf"
#' )[1],
#' ":mod06:Cloud_Fraction"
#' ),
#' cellsize = 0.1,
#' threshold = 0.4,
#' crs = 4326
#' )
#' }
#' @export
# previously modis_warp_stars
process_modis_warp <-
function(
path = NULL,
cellsize = 0.1,
threshold = cellsize * 4,
crs = 4326,
...
) {
options(sf_use_s2 = FALSE)
ras <- stars::read_stars(path)
rtd <-
stars::st_warp(
ras,
crs = crs,
segments = 500,
cellsize = cellsize,
threshold = threshold
)
return(rtd)
}
# nolint start
#' Mosaic MODIS swaths
#' @description This function will return a `SpatRaster` object with
#' values of selected subdatasets. Swath data include curvilinear
#' grids, which require warping/rectifying the original curvilinear grids
#' into rectilinear grids. The function internally warps each of inputs
#' then mosaic the warped images into one large `SpatRaster` object.
#' Users need to select a subdataset to process. The full path looks like
#' `"HDF4_EOS:EOS_SWATH:{file_path}:mod06:subdataset"`, where file_path is
#' the full path to the hdf file.
#' @param path character. Full paths of hdf files.
#' @param date character(1). Date to query.
#' @param subdataset character. Subdatasets to process.
#' __Unlike other preprocessing functions, this argument should specify
#' the exact subdataset name.__ For example, when using MOD06_L2 product,
#' one may specify `c("Cloud_Fraction", "Cloud_Optical_Thickness")`,
#' etc. The subdataset names can be found in `terra::describe()` output.
#' @param suffix character(1). Should be formatted `:{product}:`,
#' e.g., `:mod06:`
#' @param resolution numeric(1). Resolution of output raster.
#' Unit is degree (decimal degree in WGS84).
#' @param ... For internal use.
#' @seealso
#' * [`process_modis_warp()`], [`stars::read_stars()`], [`stars::st_warp()`]
#' * [GDAL HDF4 driver documentation](https://gdal.org/en/latest/drivers/raster/hdf4.html)
#' * [`terra::describe()`]: to list the full subdataset list with `sds = TRUE`
#' * [`terra::sprc()`], [`terra::rast()`]
#' @return
#' * a `SpatRaster` object (crs = `"EPSG:4326"`): if `path` is a single file with
#' full specification of subdataset.
#' * a `SpatRaster` object (crs = `"EPSG:4326"`): if `path` is a list of files. In this case, the returned object will have the maximal extent of multiple warped layers
#' @author Insang Song
#' @importFrom terra rast
#' @importFrom terra crop
#' @importFrom terra ext
#' @importFrom terra mosaic
#' @importFrom stars st_mosaic
#' @importFrom terra values
#' @importFrom terra sprc
#' @examples
#' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large
#' ## amount of data which is not included in the package.
#' \dontrun{
#' mod06l2_swath <- process_modis_swath(
#' path = list.files(
#' "./data/mod06l2",
#' full.names = TRUE,
#' pattern = ".hdf"
#' ),
#' date = "2024-01-01",
#' subdataset = "Cloud_Fraction",
#' suffix = ":mod06:",
#' resolution = 0.05
#' )
#' }
#' @export
# nolint end
# previously modis_mosaic_mod06
process_modis_swath <-
function(
path = NULL,
date = NULL,
subdataset = NULL,
suffix = ":mod06:",
resolution = 0.05,
...
) {
# check date format
is_date_proper(instr = date)
header <- "HDF4_EOS:EOS_SWATH:"
ras_mod06 <- vector("list", length = length(subdataset))
datejul <- strftime(date, format = "%Y%j")
## FIXME: this part may result in underperformance.
## Find a way to optimize this part.
paths_today <- grep(sprintf("A%s", datejul), path, value = TRUE)
# if two or more paths are put in,
# these are read into a list then mosaicked
for (element in seq_along(subdataset)) {
target_text <-
sprintf("%s%s%s%s", header, paths_today, suffix, subdataset[element])
# rectified stars objects to SpatRaster
mod06_element <- split(target_text, target_text) |>
lapply(process_modis_warp, cellsize = resolution)
# Remove all NA layers to avoid erroneous values
mod06_element_nas <-
sapply(
mod06_element,
function(x) {
xvals <- x[[subdataset[element]]]
all(is.na(xvals)) || all(is.nan(xvals))
}
)
mod06_element <-
mod06_element[!mod06_element_nas & !is.null(mod06_element_nas)]
# prepare a fail-safe alternative return
# It will be used again later.
alt <- terra::rast(
xmin = -128,
xmax = -64,
ymin = 20,
ymax = 52,
resolution = resolution
)
# initialize values with NA
alt[] <- NA
alt_dim <- dim(alt)
alt[1, 1] <- 0
alt[1, alt_dim[2]] <- 0
alt[alt_dim[1], 1] <- 0
alt[alt_dim[1], alt_dim[2]] <- 0
terra::crs(alt) <- "EPSG:4326"
if (is.null(mod06_element) || length(mod06_element) == 0) {
message("All layers are NA or NaN.")
mod06_element_mosaic <- terra::deepcopy(alt)
} else {
# mosaick the warped SpatRasters into one
mod06_element_mosaic <-
do.call(stars::st_mosaic, mod06_element) |>
terra::rast()
# assigning variable name
mod06_element_mosaic <-
terra::crop(mod06_element_mosaic, terra::ext(alt))
}
names(mod06_element_mosaic) <- subdataset[element]
ras_mod06[[element]] <- mod06_element_mosaic
}
# SpatRasterCollection can accommodate multiple SpatRasters
# with different extents (most flexible kind)
mod06_sprc <- terra::sprc(ras_mod06)
# post-hoc: stack multiple layers with different extent
# into one SpatRaster
# 1. mosaic all layers into one
mod06_mosaic <- terra::mosaic(mod06_sprc, fun = "median")
# 2. Assign NAs to prepare "etching"; NA will result in NaNs
terra::values(mod06_mosaic) <- NA
# 3. Looping main "etching"; each element is put first
mod06_etched <-
sapply(mod06_sprc, terra::mosaic, y = mod06_mosaic, fun = "first")
# 4. stack
mod06_return <- do.call(c, mod06_etched)
return(mod06_return)
}
#' Process climate classification data
#' @description
#' The \code{process_koppen_geiger()} function imports and cleans raw climate
#' classification data, returning a single `SpatRaster` object.
#' @param path character(1). Path to Koppen-Geiger
#' climate zone raster file
#' @param extent numeric(4) or SpatExtent giving the extent of the raster
#' if `NULL` (default), the entire raster is loaded
#' @param ... Placeholders.
#' @return a `SpatRaster` object
#' @author Insang Song
#' @importFrom terra rast
#' @examples
#' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large
#' ## amount of data which is not included in the package.
#' \dontrun{
#' kg <- process_koppen_geiger(
#' path = "./data/koppen_geiger_data.tif"
#' )
#' }
#' @export
process_koppen_geiger <-
function(
path = NULL,
extent = NULL,
...
) {
# import data
kg_rast <- terra::rast(path, win = extent)
# identify time period
period <- strsplit(
names(kg_rast),
"_"
)[[1]][4]
if (period == "present") {
terra::metags(kg_rast) <- c(year = "1980 - 2016")
} else {
terra::metags(kg_rast) <- c(year = "2071 - 2100")
}
return(kg_rast)
}
#' Process land cover data
#' @description
#' The \code{process_nlcd()} function imports and cleans raw land cover data,
#' returning a single `SpatRaster` object.
#' @param path character giving nlcd data path
#' @param year numeric giving the year of NLCD data used
#' @param extent numeric(4) or SpatExtent giving the extent of the raster
#' if `NULL` (default), the entire raster is loaded
#' @param ... Placeholders.
#' @description Reads NLCD file of selected `year`.
#' @return a `SpatRaster` object
#' @author Eva Marques, Insang Song
#' @importFrom utils read.csv
#' @importFrom tools file_path_sans_ext
#' @importFrom terra rast metags
#' @examples
#' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large
#' ## amount of data which is not included in the package.
#' \dontrun{
#' nlcd <- process_nlcd(
#' path = "./data/",
#' year = 2021
#' )
#' }
#' @export
process_nlcd <-
function(
path = NULL,
year = 2021,
extent = NULL,
...
) {
# check inputs
if (!is.character(path) || is.null(path)) {
stop("path is not a character.")
}
if (!dir.exists(path)) {
stop("path does not exist.")
}
if (!is.numeric(year)) {
stop("year is not a numeric.")
}
# open nlcd file corresponding to the year
nlcd_file <-
list.files(
path,
pattern = paste0("nlcd_", year, "_.*.(tif|img)$"),
full.names = TRUE
)
# check if name without extension is duplicated
nlcd_file_base <- basename(nlcd_file)
nlcd_file_base <- tools::file_path_sans_ext(nlcd_file_base)
if (any(duplicated(nlcd_file_base))) {
stop("Duplicated NLCD files are detected. Please remove duplicates.")
}
if (length(nlcd_file) == 0) {
stop("NLCD data not available for this year.")
}
nlcd <- terra::rast(nlcd_file, win = extent)
terra::metags(nlcd) <- c(year = year)
return(nlcd)
}
#' Process ecoregion data
#' @description
#' The [`process_ecoregion`] function imports and cleans raw ecoregion
#' data, returning a `SpatVector` object.
#' @param path character(1). Path to Ecoregion Shapefiles
#' @param extent numeric(4) or SpatExtent giving the extent of the raster
#' if `NULL` (default), the entire raster is loaded
#' @param ... Placeholders.
#' @note The function will fix Tukey's bridge in Portland, ME.
#' This fix will ensure that the EPA air quality monitoring sites
#' will be located within the ecoregion.
#' @author Insang Song
#' @return a `SpatVector` object
#' @importFrom terra vect
#' @importFrom sf st_read st_crs st_as_sfc st_transform st_intersects st_union
#' @importFrom data.table year
#' @examples
#' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large
#' ## amount of data which is not included in the package.
#' \dontrun{
#' ecoregion <- process_ecoregion(
#' path = "./data/epa_ecoregion.gpkg"
#' )
#' }
#' @export
process_ecoregion <-
function(
path = NULL,
extent = NULL,
...
) {
ecoreg <- sf::st_read(path)
# fix Tukey's bridge in Portland, ME
# nolint start
poly_tukey <-
"POLYGON ((-70.258 43.68, -70.2555 43.68, -70.255 43.6733, -70.2576 43.6732, -70.258 43.68))"
poly_tukey <- sf::st_as_sfc(poly_tukey, crs = "EPSG:4326")
poly_tukey <- sf::st_transform(poly_tukey, sf::st_crs(ecoreg))
# nolint end
ecoreg <- ecoreg[, grepl("^(L2_KEY|L3_KEY)", names(ecoreg))]
ecoreg_edit_idx <- sf::st_intersects(ecoreg, poly_tukey, sparse = FALSE)
ecoreg_edit_idx <- vapply(ecoreg_edit_idx, function(x) any(x), logical(1))
if (!all(ecoreg_edit_idx == 0)) {
ecoreg_else <- ecoreg[!ecoreg_edit_idx, ]
ecoreg_edit <- sf::st_union(ecoreg[ecoreg_edit_idx, ], poly_tukey)
ecoreg <- rbind(ecoreg_else, ecoreg_edit)
}
ecoreg$time <- paste0(
"1997 - ", data.table::year(Sys.time())
)
ecoreg <- terra::vect(ecoreg)
if (!is.null(extent)) {
ecoreg_crop <- terra::crop(ecoreg, extent)
return(ecoreg_crop)
} else {
return(ecoreg)
}
}
# nolint start
#' Process toxic release data
#' @description
#' This function imports and cleans raw toxic release data,
#' returning a single `SpatVector` (points) object for the selected `year`.
#' @param path character(1). Path to the directory with TRI CSV files
#' @param year integer(1). Single year to select.
#' @param variables integer. Column index of TRI data.
#' @param extent numeric(4) or SpatExtent giving the extent of the raster
#' if `NULL` (default), the entire raster is loaded
#' @param ... Placeholders.
#' @author Insang Song, Mariana Kassien
#' @return a `SpatVector` object (points) in `year`
#' `year` is stored in a field named `"year"`.
#' @note Visit [TRI Data and Tools](https://www.epa.gov/toxics-release-inventory-tri-program/tri-toolbox)
#' to view the available years and variables.
#' @references
#' https://www.epa.gov/toxics-release-inventory-tri-program/tri-toolbox
#' @importFrom terra vect
#' @importFrom terra crs
#' @importFrom terra nearby
#' @importFrom methods is
#' @importFrom data.table .SD
#' @importFrom utils read.csv
#' @importFrom data.table rbindlist
#' @importFrom dplyr filter
#' @importFrom dplyr mutate
#' @importFrom dplyr across
#' @importFrom dplyr ends_with
#' @importFrom dplyr all_of
#' @importFrom dplyr group_by
#' @importFrom dplyr ungroup
#' @importFrom dplyr summarize
#' @importFrom tidyr pivot_wider
#' @importFrom stats setNames
#' @examples
#' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large
#' ## amount of data which is not included in the package.
#' \dontrun{
#' tri <- process_tri(
#' path = "./data",
#' year = 2020,
#' variables = c(1, 13, 12, 14, 20, 34, 36, 47, 48, 49)
#' )
#' }
# nolint end
#' @export
process_tri <- function(
path = NULL,
year = 2018,
variables = c(1, 13, 12, 14, 20, 34, 36, 47, 48, 49),
extent = NULL,
...
) {
csvs_tri_from <-
list.files(path = path, pattern = "*.csv$", full.names = TRUE)
csvs_tri <- lapply(csvs_tri_from, read.csv)
col_sel <- variables
csvs_tri <- data.table::rbindlist(csvs_tri)
dt_tri <- csvs_tri[, col_sel, with = FALSE]
# column name readjustment
tri_cns <- colnames(dt_tri)
tri_cns <- sub(".*?\\.\\.", "", tri_cns)
tri_cns <- sub("^[^A-Za-z]*", "", tri_cns)
tri_cns <- gsub("\\.", "_", tri_cns)
dt_tri <- stats::setNames(dt_tri, tri_cns)
dt_tri <- dt_tri[dt_tri$YEAR == year, ]
# depending on the way the chemicals are summarized
# Unit is kilogram
# nolint start
YEAR <- NULL
LONGITUDE <- NULL
LATITUDE <- NULL
TRI_CHEMICAL_COMPOUND_ID <- NULL
dt_tri_x <-
dt_tri |>
dplyr::mutate(
dplyr::across(
dplyr::ends_with("_AIR"),
~ifelse(UNIT_OF_MEASURE == "Pounds", . * (453.592 / 1e3), . / 1e3)
)
) |>
dplyr::group_by(YEAR, LONGITUDE, LATITUDE, TRI_CHEMICAL_COMPOUND_ID) |>
dplyr::summarize(
dplyr::across(
dplyr::ends_with("_AIR"),
~sum(., na.rm = TRUE)
)
) |>
dplyr::ungroup() |>
tidyr::pivot_wider(
values_from = c("FUGITIVE_AIR", "STACK_AIR"),
names_from = "TRI_CHEMICAL_COMPOUND_ID",
names_sep = "_"
) |>
dplyr::filter(!is.na(LONGITUDE) | !is.na(LATITUDE))
names(dt_tri_x) <- sub(" ", "_", names(dt_tri_x))
spvect_tri <-
terra::vect(dt_tri_x,
geom = c("LONGITUDE", "LATITUDE"),
crs = "EPSG:4269", # all are NAD83
keepgeom = TRUE)
attr(spvect_tri, "tri_year") <- year
if(!is.null(extent)) {
tri_final <- apply_extent(spvect_tri, extent)
return(tri_final)
} else {
return(spvect_tri)
}
}
# nolint end
# nolint start
#' Process road emissions data
#' @description
#' The \code{process_nei()} function imports and cleans raw road emissions data,
#' returning a single `SpatVector` object.
#' @param path character(1). Directory with NEI csv files.
#' @param county `SpatVector`/`sf`. County boundaries.
#' @param year integer(1) Year to use. Currently only 2017 or 2020
#' is accepted.
#' @param ... Placeholders.
#' @return a `SpatVector` object
#' @author Insang Song
#' @note Base files for `county` argument can be downloaded directly from
#' [U.S. Census Bureau](https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html)
#' or by using `tigris` package. This function does not reproject census boundaries.
#' Users should be aware of the coordinate system of census boundary data for
#' other analyses.
#' @description NEI data comprises multiple csv files where emissions of
#' 50+ pollutants are recorded at county level. With raw data files,
#' this function will join a combined table of NEI data and county
#' boundary, then perform a spatial join to target locations.
#' @importFrom terra vect
#' @importFrom terra crs
#' @importFrom methods is
#' @importFrom data.table .SD
#' @importFrom data.table fread
#' @importFrom data.table rbindlist
#' @examples
#' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large
#' ## amount of data which is not included in the package.
#' \dontrun{
#' nei <- process_nei(
#' path = "./data",
#' county = system.file("gpkg/nc.gpkg", package = "sf"),
#' year = 2017
#' )
#' }
#' @export
# nolint end
process_nei <- function(
path = NULL,
county = NULL,
year = c(2017, 2020),
...
) {
if (is.null(county)) {
stop("county argument is required.")
}
if (!methods::is(county, "SpatVector")) {
county <- try(terra::vect(county))
if (inherits(county, "try-error")) {
stop("county is unable to be converted to SpatVector.\n")
}
}
if (!"GEOID" %in% names(county)) {
stop("county should include a field named \"GEOID\"")
}
if (!as.character(year) %in% c("2017", "2020")) {
stop("year should be one of 2017 or 2020.\n")
}
# Concatenate NEI csv files
csvs_nei <-
list.files(
path = path,
pattern = "*.csv$",
recursive = TRUE,
full.names = TRUE
)
csvs_nei <- grep(year, csvs_nei, value = TRUE)
if (is.null(csvs_nei) || length(csvs_nei) == 0) {
stop("No files found for the year. The file names should include the year")
}
csvs_nei <- lapply(csvs_nei, data.table::fread)
csvs_nei <- data.table::rbindlist(csvs_nei)
# column name readjustment
target_nm <- c("fips code", "total emissions", "emissions uom")
# not grep-ping at once for flexibility
target_cns <- sapply(target_nm, function(x) grep(x, colnames(csvs_nei)))
colnames(csvs_nei)[target_cns] <-
c("geoid", "emissions_total", "unit_measurement")
# unify unit of measurement
# TON here is short tonne, which is 2000 lbs.
csvs_nei$emissions_total_ton <-
ifelse(
csvs_nei$unit_measurement == "TON",
csvs_nei$emissions_total,
csvs_nei$emissions_total / 2000
)
emissions_total_ton <- NULL
geoid <- NULL
# yearabbr <- substr(year, 3, 4)
csvs_nei$geoid <- sprintf("%05d", as.integer(csvs_nei$geoid))
csvs_nei <-
csvs_nei[, list(
TRF_NEINP_0_00000 = sum(emissions_total_ton, na.rm = TRUE)
),
by = geoid]
csvs_nei$time <- as.integer(year)
# read county vector
cnty_geoid_guess <- grep("GEOID", names(county))
names(county)[cnty_geoid_guess] <- "geoid"
county$geoid <- sprintf("%05d", as.integer(county$geoid))
cnty_vect <- merge(county, as.data.frame(csvs_nei), by = "geoid")
cnty_vect <- cnty_vect[, c("geoid", "time", "TRF_NEINP_0_00000")]
return(cnty_vect)
}
# nolint start
#' Process U.S. EPA AQS daily CSV data
#' @description
#' The \code{process_aqs()} function cleans and imports raw air quality
#' monitoring sites from pre-generated daily CSV files, returning a single
#' `SpatVector` or `sf` object.
#' `date` is used to filter the raw data read from csv files.
#' Filtered rows are then processed according to `mode` argument.
#' Some sites report multiple measurements per day with and without
#' [exceptional events](https://www.epa.gov/sites/default/files/2016-10/documents/exceptional_events.pdf)
#' the internal procedure of this function keeps "Included" if there
#' are multiple event types per site-time.
#' @param path character(1). Directory path to daily measurement data.
#' @param date character(1 or 2). Date (1) or start and end dates (2).
#' Should be in `"YYYY-MM-DD"` format and sorted.
#' @param mode character(1). One of
#' * "date-location" (all dates * all locations)
#' * "available-data" (date-location pairs with available data)
#' * "location" (unique locations).
#' @param data_field character(1). Data field to extract.
#' @param return_format character(1). `"terra"` or `"sf"` or `"data.table"`.
#' @param extent numeric(4). Spatial extent of the resulting object.
#' The order should be `c(xmin, xmax, ymin, ymax)`.
#' The coordinate system should be WGS84 (EPSG:4326).
#' @param ... Placeholders.
#' @seealso
#' * [`download_aqs()`]
#' * [EPA, n.d., _AQS Parameter Codes_](
#' https://aqs.epa.gov/aqsweb/documents/codetables/parameters.csv)
#' @return a SpatVector, sf, or data.table object depending on the `return_format`
#' @importFrom data.table as.data.table
#' @importFrom utils read.csv
#' @importFrom terra vect project
#' @importFrom sf st_as_sf
#' @importFrom dplyr group_by ungroup filter mutate select distinct
#' @note Choose `date` and `mode` values with caution.
#' The function may return a massive data.table depending on the time range,
#' resulting in a long processing time or even a crash if data is too large
#' for your computing environment to process.
#' @examples
#' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large
#' ## amount of data which is not included in the package.
#' \dontrun{
#' aqs <- process_aqs(
#' path = "./data/aqs_daily_example.csv",
#' date = c("2022-12-01", "2023-01-31"),
#' mode = "full",
#' return_format = "terra"
#' )
#' }
#' @export
process_aqs <-
function(
path = NULL,
date = c("2018-01-01", "2022-12-31"),
mode = c("date-location", "available-data", "location"),
data_field = "Arithmetic.Mean",
return_format = c("terra", "sf", "data.table"),
extent = NULL,
...
) {
mode <- match.arg(mode)
return_format <- match.arg(return_format)
if (!is.null(date)) {
date <- try(as.Date(date))
if (inherits(date, "try-error")) {
stop("date has invalid format(s). Please check the values.")
}
if (length(date) != 2) {
if (length(date) == 1) {
date <- c(date, date)
} else {
stop("date should be a character vector of length 1 or 2.")
}
}
} else {
stop("date should be defined.")
}
if (length(path) == 1 && dir.exists(path)) {
path <- list.files(
path = path,
pattern = "*.csv$",
full.names = TRUE
)
}
if (length(path) == 0) {
stop("path does not contain csv files.")
}
pathfiles <- lapply(path, read.csv)
sites <- data.table::rbindlist(pathfiles, fill = TRUE)
## get unique sites
sites$site_id <-
sprintf("%02d%03d%04d%05d",
as.integer(sites$State.Code),
as.integer(sites$County.Code),
as.integer(sites$Site.Num),
as.integer(sites$Parameter.Code))
site_id <- NULL
Datum <- NULL
POC <- NULL
Date.Local <- NULL
Sample.Duration <- NULL
date_start <- as.Date(date[1])
date_end <- as.Date(date[2])
date_sequence <- seq(date_start, date_end, "day")
date_sequence <- as.character(date_sequence)
# select relevant fields only
sites <- sites |>
dplyr::as_tibble() |>
dplyr::filter(as.character(Date.Local) %in% date_sequence) |>
dplyr::filter(startsWith(Sample.Duration, "24")) |>
dplyr::group_by(site_id) |>
dplyr::filter(POC == min(POC)) |>
dplyr::mutate(time = Date.Local) |>
dplyr::ungroup()
col_sel <- c("site_id", "Longitude", "Latitude", "Datum")
if (mode != "available-data") {
sites_v <- unique(sites[, col_sel])
} else {
col_sel <- append(col_sel, "Event.Type")
col_sel <- append(col_sel, "time")
col_sel <- append(col_sel, data_field)
sites_v <- sites |>
dplyr::select(dplyr::all_of(col_sel)) |>
dplyr::distinct()
# excluding site-time with multiple event types
# sites_vdup will be "subtracted" from the original sites_v
sites_vdup <- sites_v |>
dplyr::group_by(site_id, time) |>
dplyr::filter(dplyr::n() > 1) |>
dplyr::filter(!!dplyr::sym("Event.Type") == "Excluded") |>
dplyr::ungroup()
sites_v <-
dplyr::anti_join(
sites_v, sites_vdup,
by = c("site_id", "time", "Event.Type")
)
}
names(sites_v)[2:3] <- c("lon", "lat")
sites_v <- data.table::as.data.table(sites_v)
# subset mainland
sites_v <- sites_v[!grepl("^(02|15|72|78|6|80)", site_id), ]
# NAD83 to WGS84
sites_v_nad <-
sites_v[sites_v$Datum == "NAD83", ]
sites_v_nad <-
terra::vect(
sites_v_nad,
keepgeom = TRUE,
crs = "EPSG:4269"
)
sites_v_nad <- terra::project(sites_v_nad, "EPSG:4326")
# postprocessing: combine WGS84 and new WGS84 records
sites_v_nad <- as.data.frame(sites_v_nad)
sites_v_wgs <- sites_v[sites_v$Datum == "WGS84"]
final_sites <- data.table::rbindlist(
list(sites_v_wgs, sites_v_nad), fill = TRUE)
final_sites <-
final_sites[, grep("Datum", names(final_sites), invert = TRUE), with = FALSE]
if (mode == "date-location") {
final_sites <-
split(date_sequence, date_sequence) |>
lapply(function(x) {
fs_time <- final_sites
fs_time$time <- x
return(fs_time)
})
final_sites <- data.table::rbindlist(final_sites, fill = TRUE)
}
if (mode == "available-data") {
final_sites <- unique(final_sites)
}
final_sites <-
switch(
return_format,
terra =
terra::vect(
final_sites,
keepgeom = TRUE,
crs = "EPSG:4326"
),
sf =
sf::st_as_sf(
final_sites,
remove = FALSE,
dim = "XY",
coords = c("lon", "lat"),
crs = "EPSG:4326"
),
data.table = final_sites
)
if (!is.null(extent)) {
if (return_format == "data.table") {
warning("Extent is not applicable for data.table. Returning data.table...\n")
return(final_sites)
}
final_sites <- apply_extent(final_sites, extent)
}
return(final_sites)
}
#' Process population density data
#' @description
#' The \code{process_secac_population()} function imports and cleans raw
#' population density data, returning a single `SpatRaster` object.
#' @param path character(1). Path to GeoTIFF (.tif) or netCDF (.nc) file.
#' @param extent numeric(4) or SpatExtent giving the extent of the raster
#' if `NULL` (default), the entire raster is loaded
#' @param ... Placeholders.
#' @author Mitchell Manware
#' @return a `SpatRaster` object
#' @importFrom terra rast
#' @examples
#' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large
#' ## amount of data which is not included in the package.
#' \dontrun{
#' pop <- process_population(
#' path = "./data/sedac_population_example.tif"
#' )
#' }
#' @export
# nolint end
process_population <- function(
path = NULL,
extent = NULL,
...) {
if (substr(path, nchar(path) - 2, nchar(path)) == ".nc") {
message(
paste0(
"netCDF functionality for SEDAC data is under construction.\n"
)
)
return()
}
#### check for variable
check_for_null_parameters(mget(ls()))
#### import data
data <- terra::rast(path, win = extent)
#### identify names
names_raw <- names(data)
#### create new names
for (r in seq_along(names_raw)) {
split1 <- strsplit(
names_raw[r],
"_rev11_",
)[[1]][[2]]
split2 <- strsplit(
split1,
"_"
)[[1]]
names(data[[r]]) <- paste0(
"gpw_v4_population_",
split2[1],
"_",
split2[2],
"_",
split2[3]
)
message(paste0(
"Cleaning ",
process_sedac_codes(
paste0(
split2[2],
"_",
split2[3]
),
invert = TRUE
),
" population data for year ",
split2[1],
"...\n"
))
#### year
terra::metags(data) <- c(year = split2[1])
}
return(data)
}
# nolint start
#' Process roads data
#' @description
#' The \code{process_groads()} function imports and cleans raw road data,
#' returning a single `SpatVector` object.
#' @param path character(1). Path to geodatabase or shapefiles.
#' @param extent numeric(4) or SpatExtent giving the extent of the raster
#' if `NULL` (default), the entire raster is loaded
#' @param ... Placeholders.
#' @note U.S. context. The returned `SpatVector` object contains a
#' `$description` column to represent the temporal range covered by the
#' dataset. For more information, see <https://earthdata.nasa.gov/data/catalog/sedac-ciesin-sedac-groads-v1-1.00>.
#' @author Insang Song
#' @return a `SpatVector` object
#' @importFrom terra vect
#' @examples
#' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large
#' ## amount of data which is not included in the package.
#' \dontrun{
#' groads <- process_groads(
#' path = "./data/groads_example.shp"
#' )
#' }
#' @export
# nolint end
process_groads <- function(
path = NULL,
extent = NULL,
...) {
#### check for variable
check_for_null_parameters(mget(ls()))
if (!grepl("(shp|gdb)$", path)) {
stop("Input is not in expected format.\n")
}
#### import data
data <- terra::vect(path, extent = extent)
#### time period
data$description <- "1980 - 2010"
return(data)
}
# nolint start
#' Process wildfire smoke data
#' @description
#' The \code{process_hms()} function imports and cleans raw wildfire smoke
#' plume coverage data, returning a single `SpatVector` object.
#' @param date character(1 or 2). Date (1) or start and end dates (2).
#' Format YYYY-MM-DD (ex. September 1, 2023 = "2023-09-01").
#' @param path character(1). Directory with downloaded NOAA HMS data files.
#' @param extent numeric(4) or SpatExtent giving the extent of the output
#' if `NULL` (default), the entire data is returned
#' @param ... Placeholders.
#' @note
#' \code{process_hms()} will return a character object if there are no wildfire
#' smoke plumes present for the selected dates and density. The returned
#' character will contain the density value and the sequence of dates
#' for which no wildfire smoke plumes were detected (see "Examples").
#' If multiple density polygons overlap, the function will return the
#' highest density value.
#' @examples
#' hms <- process_hms(
#' date = c("2018-12-30", "2019-01-01"),
#' path = "../tests/testdata/hms/"
#' )
# nolint end
#' @author Mitchell Manware
#' @return a `SpatVector` or character object
#' @importFrom terra vect
#' @importFrom terra aggregate
#' @importFrom terra subset
#' @importFrom stats na.omit
#' @export
process_hms <- function(
date = "2018-01-01",
path = NULL,
extent = NULL,
...) {
#### directory setup
path <- download_sanitize_path(path)
#### check for variable
check_for_null_parameters(mget(ls()))
#### check dates
if (length(date) == 1) date <- c(date, date)
stopifnot(length(date) == 2)
date <- date[order(as.Date(date))]
#### identify file paths
paths <- list.files(
path,
pattern = "hms_smoke*.*.shp$",
full.names = TRUE
)
paths <- paths[grep(
".shp",
paths
)]
#### identify dates based on user input
dates_of_interest <- generate_date_sequence(
date[1],
date[2],
sub_hyphen = TRUE
)
#### dates of interest with hyphen for return in 0 polygon case
dates_no_polygon <- generate_date_sequence(
date[1],
date[2],
sub_hyphen = FALSE
)
#### subset file paths to only dates of interest
data_paths <- unique(
grep(
paste(
dates_of_interest,
collapse = "|"
),
paths,
value = TRUE
)
)
#### process data
data_return <- terra::vect()
for (d in seq_along(data_paths)) {
data_date <- terra::vect(data_paths[d], extent = extent)
data_date_p <- terra::project(
data_date,
"EPSG:4326"
)
#### subset to density of interest
data_density <- data_date_p
#### absent polygons (ie. December 31, 2018)
if (nrow(data_density) == 0) {
message(paste0(
" smoke plume polygons absent for date ",
as.Date(
dates_of_interest[d],
format = "%Y%m%d"
),
". Returning empty SpatVector.\n"
))
data_missing <- data_density
data_missing$Density <- ""
data_missing$Date <- ""
data_return <- rbind(data_return, data_missing)
} else {
date <- as.Date(
substr(data_density$Start[1], 1, 7),
format = "%Y%j"
)
message(paste0(
"Cleaning smoke data for date ",
date[1],
"..."
))
#### zero buffer to avoid self intersection
data_0_buffer <- terra::buffer(
data_density,
width = 0
)
# add dummy polygons
poly_dummy <-
"POLYGON((-180 -86,-179.99 -86,-179.99 -85.99,-180 -85.99,-180 -86))"
poly_dummy <- terra::vect(poly_dummy)
poly_dummy <- rbind(poly_dummy, poly_dummy, poly_dummy)
poly_dummy <- terra::set.crs(poly_dummy, terra::crs(data_0_buffer))
poly_dummy$Density <- c("Heavy", "Medium", "Light")
poly_dummy$.hmsdummy <- 1
data_0_buffer_a <- rbind(data_0_buffer, poly_dummy)
data_0_buffer_a$.hmsdummy <-
ifelse(
is.nan(data_0_buffer_a$.hmsdummy),
NA,
data_0_buffer_a$.hmsdummy
)
#### aggregate polygons
data_aggregate <- terra::aggregate(
data_0_buffer_a,
by = "Density",
dissolve = TRUE
)
# Density index sorting to Heavy-Medium-Light
sort_index <- data_aggregate$Density
sort_index <-
match(
c("Heavy", "Medium", "Light"),
sort_index
)
sort_index <- stats::na.omit(sort_index)
data_aggregate <- data_aggregate[sort_index, ]
# union polygons. Heavy-Medium-Light are 1, 2, 3, respectively.
data_aggregate <- terra::union(data_aggregate)
data_aggregate$indx <-
as.numeric(
paste0(
data_aggregate$id_1,
data_aggregate$id_2,
data_aggregate$id_3
)
)
# cut integers into three classes, assign labels
data_aggregate$Density <-
cut(
data_aggregate$indx,
c(0, 10, 100, 1000),
labels = c("Light", "Medium", "Heavy"),
right = FALSE
)
# reaggregate to dissolve polygons
data_aggregate <- terra::aggregate(data_aggregate, by = "Density")
data_aggregate$Date <- date
#### factorize
#### select "Density" and "Date"
data_aggregate <- data_aggregate[, c("Density", "Date")]
#### merge with other data
data_return <- rbind(data_return, data_aggregate)
reext <- terra::ext(c(-180, 180, -65, 85))
data_return <- terra::crop(data_return, reext)
}
}
#### if no polygons
if (nrow(data_return) == 0) {
message(paste0(
"Smoke plume polygons absent from ",
as.Date(
dates_of_interest[1],
format = "%Y%m%d"
),
" to ",
as.Date(
dates_of_interest[length(dates_of_interest)],
format = "%Y%m%d"
),
". Returning vector of dates.\n"
))
no_polygon_return <- c(as.character(dates_no_polygon))
return(no_polygon_return)
} else if (nrow(data_return) > 0) {
message(paste0(
"Returning daily smoke data from ",
as.Date(
dates_of_interest[1],
format = "%Y%m%d"
),
" to ",
as.Date(
dates_of_interest[length(dates_of_interest)],
format = "%Y%m%d"
),
".\n"
))
#### return SpatVector
return(data_return)
}
}
#' Process elevation data
#' @description
#' The \code{process_gmted()} function imports and cleans raw elevation data,
#' returning a single `SpatRaster` object.
#' @param variable vector(1). Vector containing the GMTED statistic first and
#' the resolution second. (Example: variable = c("Breakline Emphasis",
#' "7.5 arc-seconds")).
#' * Statistic options: "Breakline Emphasis", "Systematic Subsample",
#' "Median Statistic", "Minimum Statistic", "Mean Statistic",
#' "Maximum Statistic", "Standard Deviation Statistic"
#' * Resolution options: "30 arc-seconds", "15 arc-seconds", "7.5 arc-seconds"
#' @param path character(1). Directory with downloaded GMTED "*_grd"
#' folder containing .adf files.
#' @param extent numeric(4) or SpatExtent giving the extent of the raster
#' if `NULL` (default), the entire raster is loaded
#' @param ... Placeholders.
#' @author Mitchell Manware
#' @note
#' `SpatRaster` layer name indicates selected variable and resolution, and year
#' of release (2010).
#' @return a `SpatRaster` object
#' @importFrom terra rast
#' @importFrom terra varnames
#' @examples
#' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large
#' ## amount of data which is not included in the package.
#' \dontrun{
#' gmted <- process_gmted(
#' variable = c("Breakline Emphasis", "7.5 arc-seconds"),
#' path = "./data/be75_grd"
#' )
#' }
#' @export
process_gmted <- function(
variable = NULL,
path = NULL,
extent = NULL,
...) {
#### directory setup
path <- download_sanitize_path(path)
#### check for variable
check_for_null_parameters(mget(ls()))
#### check for length of variable
if (!(length(variable) == 2)) {
stop(
paste0(
"Please provide a vector with the statistic and resolution.\n"
)
)
}
#### identify statistic and resolution
statistic <- variable[1]
statistic_code <- process_gmted_codes(
statistic,
statistic = TRUE,
invert = FALSE
)
resolution <- variable[2]
resolution_code <- process_gmted_codes(
resolution,
resolution = TRUE,
invert = FALSE
)
message(paste0(
"Cleaning ",
statistic,
" data at ",
resolution,
" resolution.\n"
))
#### identify file path
paths <- list.files(
path,
full.names = TRUE,
recursive = TRUE
)
#### select only the folder containing data
data_paths <- unique(
grep(
paste0(
statistic_code,
resolution_code,
"_grd",
collapse = "|"
),
paths,
value = TRUE
)
)
data_path <- data_paths[grep("(_grd$|w001001.adf)", data_paths)]
#### import data
data <- terra::rast(data_path, win = extent)
#### layer name
names(data) <- paste0(
"elevation_",
gsub(
"_grd",
"",
names(data)
),
"_2010"
)
#### varnames
terra::varnames(data) <- paste0(
"Elevation: ",
statistic,
" (",
resolution,
")"
)
#### year
terra::metags(data) <-
c(year = 2010L)
#### set coordinate reference system
return(data)
}
# nolint start
#' Process meteorological data
#' @description
#' The \code{process_narr()} function imports and cleans raw meteorological
#' data, returning a single `SpatRaster` object.
#' @param date character(1 or 2). Date (1) or start and end dates (2).
#' Format YYYY-MM-DD (ex. September 1, 2023 = "2023-09-01").
#' @param variable character(1). Variable name acronym. See [List of Variables in NARR Files](https://ftp.cpc.ncep.noaa.gov/NARR/fixed/merged_land_AWIP32corrected.pdf)
#' for variable names and acronym codes.
#' @param path character(1). Directory with downloaded netCDF (.nc) files.
#' @param extent numeric(4) or SpatExtent giving the extent of the raster
#' if `NULL` (default), the entire raster is loaded
#' @param ... Placeholders.
#' @note
#' Layer names of the returned `SpatRaster` object contain the variable acronym,
#' pressure level, and date.
#' @author Mitchell Manware
#' @return a `SpatRaster` object
#' @importFrom terra rast
#' @importFrom terra time
#' @importFrom terra subset
#' @importFrom stringi stri_pad
#' @examples
#' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large
#' ## amount of data which is not included in the package.
#' \dontrun{
#' process_narr(
#' date = c("2018-01-01", "2018-01-10"),
#' variable = "weasd",
#' path = "./tests/testdata/narr/weasd"
#' )
#' }
#' @export
# nolint end
process_narr <- function(
date = "2023-09-01",
variable = NULL,
path = NULL,
extent = NULL,
...) {
#### directory setup
path <- download_sanitize_path(path)
#### check for variable
check_for_null_parameters(mget(ls()))
#### check dates
if (length(date) == 1) date <- c(date, date)
stopifnot(length(date) == 2)
date <- date[order(as.Date(date))]
#### identify file paths
data_paths <- list.files(
path,
pattern = variable,
recursive = TRUE,
full.names = TRUE
)
data_paths <- grep(
sprintf("%s*.*.nc", variable),
data_paths,
value = TRUE
)
#### define date sequence
date_sequence <- generate_date_sequence(
date[1],
date[2],
sub_hyphen = TRUE
)
#### path ncar
path_ncar <- nchar(
strsplit(
data_paths[1],
paste0(
variable,
"."
)
)[[1]][3]
)
if (path_ncar == 9) {
#### define year-months of interest
ym_of_interest <- unique(
substr(
date_sequence,
1,
6
)
)
} else if (path_ncar == 7) {
#### define years of interest
ym_of_interest <- unique(
substr(
date_sequence,
1,
4
)
)
}
#### subset file paths to only dates of interest
data_paths_ym <- unique(
grep(
paste(
ym_of_interest,
collapse = "|"
),
data_paths,
value = TRUE
)
)
#### initiate for loop
data_full <- terra::rast()
for (p in seq_along(data_paths_ym)) {
#### import data
data_year <- terra::rast(data_paths_ym[p], win = extent)
#### check for mono or pressure levels
if (grepl("level", names(data_year)[1])) {
#### pressure levels data
message(paste0("Detected pressure levels data...\n"))
message(paste0(
"Cleaning ",
variable,
" data for ",
month.name[
as.numeric(
substr(
gsub(
"-",
"",
terra::time(data_year)[1]
),
5,
6
)
)
],
", ",
substr(
gsub(
"-",
"",
terra::time(data_year)[1]
),
1,
4
),
"...\n"
))
days <- sapply(
strsplit(
names(data_year),
"_"
),
function(x) x[3]
)
terra::time(data_year) <- as.Date(
paste0(
substr(
terra::time(data_year),
1,
4
),
substr(
gsub(
"-",
"",
terra::time(data_year)
),
5,
6
),
days
),
format = "%Y%m%d"
)
names(data_year) <- paste0(
variable,
"_",
sapply(
strsplit(
names(data_year),
"_"
),
function(x) x[2]
),
"_",
gsub(
"-",
"",
terra::time(data_year)
)
)
} else {
#### mono level data
message(paste0("Detected monolevel data...\n"))
message(paste0(
"Cleaning ",
variable,
" data for ",
substr(
gsub(
"-",
"",
terra::time(data_year)[1]
),
1,
4
),
"...\n"
))
names(data_year) <- paste0(
variable,
"_",
gsub(
"-",
"",
terra::time(data_year)
)
)
}
data_full <- c(
data_full,
data_year,
warn = FALSE
)
}
#### subset years to dates of interest
data_return <- terra::subset(
data_full,
which(
substr(
names(data_full),
nchar(names(data_full)) - 7,
nchar(names(data_full))
) %in% date_sequence
)
)
message(paste0(
"Returning daily ",
variable,
" data from ",
as.Date(
date_sequence[1],
format = "%Y%m%d"
),
" to ",
as.Date(
date_sequence[length(date_sequence)],
format = "%Y%m%d"
),
".\n"
))
#### return SpatRaster
return(data_return)
}
#' Process atmospheric composition data
#' @description
#' The \code{process_geos()} function imports and cleans raw atmospheric
#' composition data, returning a single `SpatRaster` object.
#' @param date character(1 or 2). Date (1) or start and end dates (2).
#' Format YYYY-MM-DD (ex. September 1, 2023 = "2023-09-01").
#' @param variable character(1). GEOS-CF variable name(s).
#' @param path character(1). Directory with downloaded netCDF (.nc4) files.
#' @param extent numeric(4) or SpatExtent giving the extent of the raster
#' if `NULL` (default), the entire raster is loaded
#' @param ... Placeholders.
#' @note
#' Layer names of the returned `SpatRaster` object contain the variable,
#' pressure level, date, and hour.
#' @author Mitchell Manware
#' @return a `SpatRaster` object;
#' @importFrom terra rast
#' @importFrom terra time
#' @importFrom terra varnames
#' @importFrom terra crs
#' @importFrom terra subset
#' @examples
#' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large
#' ## amount of data which is not included in the package.
#' \dontrun{
#' geos <- process_geos(
#' date = c("2024-01-01", "2024-01-10"),
#' variable = "O3",
#' path = "./data/aqc_tavg_1hr_g1440x721_v1"
#' )
#' }
#' @export
process_geos <-
function(date = c("2018-01-01", "2018-01-10"),
variable = NULL,
path = NULL,
extent = NULL,
...) {
#### directory setup
path <- download_sanitize_path(path)
#### check for variable
check_for_null_parameters(mget(ls()))
#### check dates
if (length(date) == 1) date <- c(date, date)
stopifnot(length(date) == 2)
date <- date[order(as.Date(date))]
#### identify file paths
paths <- list.files(
path,
pattern = "GEOS-CF.v01.rpl",
full.names = TRUE,
recursive = TRUE
)
paths <- grep(".nc4$", paths, value = TRUE)
#### identify dates based on user input
dates_of_interest <- generate_date_sequence(
date[1],
date[2],
sub_hyphen = TRUE
)
#### subset file paths to only dates of interest
data_paths <- unique(
grep(
paste(
dates_of_interest,
collapse = "|"
),
paths,
value = TRUE
)
)
#### identify collection
collection <- process_collection(
data_paths[1],
source = "geos",
collection = TRUE
)
message(
paste0(
"Identified collection ",
collection,
".\n"
)
)
#### initiate for loop
data_return <- terra::rast()
for (p in seq_along(data_paths)) {
#### import .nc4 data
data_raw <- terra::rast(data_paths[p])
data_datetime <- process_collection(
data_paths[p],
source = "geos",
datetime = TRUE
)
message(paste0(
"Cleaning ",
variable,
" data for ",
ISOdate(
year = substr(data_datetime, 1, 4),
month = substr(data_datetime, 5, 6),
day = substr(data_datetime, 7, 8),
hour = substr(
data_datetime,
9,
10
),
min = substr(
data_datetime,
11,
12
),
sec = 00,
tz = "UTC"
),
"...\n"
))
#### subset to user-selected variable
data_variable <- terra::subset(
data_raw,
subset = grep(
variable,
names(data_raw)
)
)
#### define variable time
terra::time(data_variable) <- rep(
ISOdate(
year = substr(data_datetime, 1, 4),
month = substr(data_datetime, 5, 6),
day = substr(data_datetime, 7, 8),
hour = substr(
data_datetime,
9,
10
),
min = substr(
data_datetime,
11,
12
),
sec = 00,
tz = "UTC"
),
terra::nlyr(data_variable)
)
#### define variable name based on date and time
names(data_variable) <- paste0(
names(data_variable),
"_",
gsub(
":", "",
gsub(
"-", "",
gsub(" ", "_", terra::time(data_variable))
)
)
)
if (substr(data_datetime, 9, 12) == "0000") {
names(data_variable) <- paste0(
names(data_variable),
"_000000"
)
}
#### combine data with same date
data_return <- c(
data_return,
data_variable,
warn = FALSE
)
}
#### set coordinate reference system
terra::crs(data_return) <- "EPSG:4326"
message(paste0(
"Returning hourly ",
variable,
" data from ",
as.Date(
dates_of_interest[1],
format = "%Y%m%d"
),
" to ",
as.Date(
dates_of_interest[length(dates_of_interest)],
format = "%Y%m%d"
),
".\n"
))
#### return SpatRaster
return(data_return)
}
#' Process meteorological and atmospheric data
#' @description
#' The \code{process_merra2()} function imports and cleans raw atmospheric
#' composition data, returning a single `SpatRaster` object.
#' @param date character(1 or 2). Date (1) or start and end dates (2).
#' Format YYYY-MM-DD (ex. September 1, 2023 = "2023-09-01").
#' @param variable character(1). MERRA2 variable name(s).
#' @param path character(1). Directory with downloaded netCDF (.nc4) files.
#' @param extent numeric(4) or SpatExtent giving the extent of the raster
#' if `NULL` (default), the entire raster is loaded
#' @param ... Placeholders.
#' @note
#' Layer names of the returned `SpatRaster` object contain the variable,
#' pressure level, date, and hour. Pressure level values utilized for layer
#' names are taken directly from raw data and are not edited to retain
#' pressure level information.
#' @author Mitchell Manware
#' @return a `SpatRaster` object;
#' @importFrom terra rast
#' @importFrom terra time
#' @importFrom terra names
#' @importFrom terra crs
#' @importFrom terra subset
#' @examples
#' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large
#' ## amount of data which is not included in the package.
#' \dontrun{
#' merra2 <- process_merra2(
#' date = c("2024-01-01", "2024-01-10"),
#' variable = "CPT",
#' path = "./data/inst1_2d_int_Nx"
#' )
#' }
#' @export
process_merra2 <-
function(date = c("2018-01-01", "2018-01-10"),
variable = NULL,
path = NULL,
extent = NULL,
...) {
#### directory setup
path <- download_sanitize_path(path)
#### check for variable
check_for_null_parameters(mget(ls()))
#### check dates
if (length(date) == 1) date <- c(date, date)
stopifnot(length(date) == 2)
date <- date[order(as.Date(date))]
#### identify file paths
paths <- list.files(
path,
pattern = "MERRA2_400",
full.names = TRUE
)
paths <- paths[grep(
".nc4",
paths
)]
#### identify dates based on user input
dates_of_interest <- generate_date_sequence(
date[1],
date[2],
sub_hyphen = TRUE
)
#### subset file paths to only dates of interest
data_paths <- unique(
grep(
paste(
dates_of_interest,
collapse = "|"
),
paths,
value = TRUE
)
)
#### identify collection
collection <- process_collection(
data_paths[1],
source = "merra2",
collection = TRUE
)
message(
paste0(
"Identified collection ",
collection,
".\n"
)
)
#### initiate for loop
data_return <- terra::rast()
for (p in seq_along(data_paths)) {
#### import .nc4 data
data_raw <- terra::rast(data_paths[p], win = extent)
data_date <- process_collection(
data_paths[p],
source = "merra2",
date = TRUE
)
message(
paste0(
"Cleaning ",
variable,
" data for ",
as.Date(
data_date,
format = "%Y%m%d"
),
"...\n"
)
)
#### subset to user-selected variable
data_variable <- terra::subset(
data_raw,
subset = grep(
variable,
names(data_raw)
)
)
#### identify time step
times <- process_merra2_time(
collection = collection,
from = data_variable
)
#### identify unique pressure levels
levels <-
unique(
grep(
"lev=",
unlist(
strsplit(names(data_variable), "_")
),
value = TRUE
)
)
#### empty `levels` if 2 dimensional data
if (length(levels) == 0) {
levels <- ""
}
#### merge levels and times
leveltimes <- merge(levels, times)
#### set layer names
names(data_variable) <- gsub(
"__",
"_",
paste0(
variable,
"_",
leveltimes[, 1],
"_",
data_date,
"_",
leveltimes[, 2]
)
)
#### set layer times
terra::time(data_variable) <- ISOdatetime(
year = substr(data_date, 1, 4),
month = substr(data_date, 5, 6),
day = substr(data_date, 7, 8),
hour = substr(leveltimes[, 2], 1, 2),
min = substr(leveltimes[, 2], 3, 4),
sec = substr(leveltimes[, 2], 5, 6),
tz = "UTC"
)
data_return <- c(
data_return,
data_variable,
warn = FALSE
)
}
terra::crs(data_return) <- "EPSG:4267"
message(paste0(
"Returning hourly ",
variable,
" data from ",
as.Date(
dates_of_interest[1],
format = "%Y%m%d"
),
" to ",
as.Date(
dates_of_interest[length(dates_of_interest)],
format = "%Y%m%d"
),
".\n"
))
#### return SpatRaster
return(data_return)
}
# nolint start
#' Process gridMET data
#' @description
#' The \code{process_gridmet()} function imports and cleans raw gridded surface meteorological
#' data, returning a single `SpatRaster` object.
#' @param date character(1 or 2). Date (1) or start and end dates (2).
#' Format YYYY-MM-DD (ex. September 1, 2023 = "2023-09-01").
#' @param variable character(1). Variable name or acronym code. See [gridMET Generate Wget File](https://www.climatologylab.org/wget-gridmet.html)
#' for variable names and acronym codes. (Note: variable "Burning Index" has code "bi" and variable
#' "Energy Release Component" has code "erc").
#' @param path character(1). Directory with downloaded netCDF (.nc) files.
#' @param extent numeric(4) or SpatExtent giving the extent of the raster
#' if `NULL` (default), the entire raster is loaded
#' @param ... Placeholders.
#' @note
#' Layer names of the returned `SpatRaster` object contain the variable acronym,
#' and date.
#' @author Mitchell Manware
#' @return a `SpatRaster` object
#' @importFrom terra rast
#' @importFrom terra time
#' @importFrom terra subset
#' @examples
#' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large
#' ## amount of data which is not included in the package.
#' \dontrun{
#' gridmet <- process_gridmet(
#' date = c("2023-01-01", "2023-01-10"),
#' variable = "Precipitation",
#' path = "./data/pr"
#' )
#' }
#' @export
# nolint end
process_gridmet <- function(
date = c("2023-09-01", "2023-09-10"),
variable = NULL,
path = NULL,
extent = NULL,
...) {
#### directory setup
path <- download_sanitize_path(path)
#### check dates
if (length(date) == 1) date <- c(date, date)
stopifnot(length(date) == 2)
date <- date[order(as.Date(date))]
#### check for variable
check_for_null_parameters(mget(ls()))
variable_checked <- process_variable_codes(
variables = variable,
source = "gridmet"
)
variable_checked_long <- process_gridmet_codes(
variable_checked,
invert = TRUE
)
#### identify file paths
data_paths <- list.files(
path,
pattern = variable_checked,
full.names = TRUE
)
data_paths <- data_paths[grep(
".nc",
data_paths
)]
#### define date sequence
date_sequence <- generate_date_sequence(
date[1],
date[2],
sub_hyphen = TRUE
)
#### years of interest
yoi <- unique(
substr(
date_sequence,
1, 4
)
)
#### subset file paths to only dates of interest
data_paths <- unique(
grep(
paste(
yoi,
collapse = "|"
),
data_paths,
value = TRUE
)
)
#### initiate for loop
data_full <- terra::rast()
for (p in seq_along(data_paths)) {
#### import data
data_year <- terra::rast(data_paths[p], win = extent)
message(paste0(
"Cleaning daily ",
variable_checked,
" data for year ",
gsub(
".nc",
"",
strsplit(
data_paths[p],
paste0(
variable_checked,
"_"
)
)[[1]][2]
),
"...\n"
))
time_numeric <- sapply(
strsplit(
names(data_year),
"="
),
function(x) as.numeric(x[2]) - 25567
)
terra::time(data_year) <- as.Date(time_numeric)
names(data_year) <- paste0(
variable_checked,
"_",
gsub(
"-",
"",
terra::time(data_year)
)
)
terra::varnames(data_year) <- variable_checked
terra::longnames(data_year) <- variable_checked_long
data_full <- c(
data_full,
data_year,
warn = FALSE
)
}
#### subset years to dates of interest
data_return <- terra::subset(
data_full,
which(
substr(
names(data_full),
nchar(names(data_full)) - 7,
nchar(names(data_full))
) %in% date_sequence
)
)
message(paste0(
"Returning daily ",
variable_checked_long,
" data from ",
as.Date(
date_sequence[1],
format = "%Y%m%d"
),
" to ",
as.Date(
date_sequence[length(date_sequence)],
format = "%Y%m%d"
),
".\n"
))
#### return SpatRaster
return(data_return)
}
# nolint start
#' Process TerraClimate data
#' @description
#' The \code{process_terraclimate()} function imports and cleans climate and water balance
#' data, returning a single `SpatRaster` object.
#' @param date character(1 or 2). Date (1) or start and end dates (2).
#' Format YYYY-MM-DD (ex. September 1, 2023 = "2023-09-01").
#' @param variable character(1). Variable name or acronym code. See [TerraClimate Direct Downloads](https://climate.northwestknowledge.net/TERRACLIMATE/index_directDownloads.php)
#' for variable names and acronym codes.
#' @param path character(1). Directory with downloaded netCDF (.nc) files.
#' @param extent numeric(4) or SpatExtent giving the extent of the raster
#' if `NULL` (default), the entire raster is loaded
#' @param ... Placeholders.
#' @note
#' Layer names of the returned `SpatRaster` object contain the variable acronym, year,
#' and month.
#' @note
#' TerraClimate data has monthly temporal resolution, so the first day of each month
#' is used as a placeholder temporal value.
#' @author Mitchell Manware
#' @return a `SpatRaster` object
#' @importFrom terra rast
#' @importFrom terra time
#' @importFrom terra subset
#' @examples
#' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large
#' ## amount of data which is not included in the package.
#' \dontrun{
#' terraclimate <- process_terraclimate(
#' date = c("2023-01-01", "2023-01-10"),
#' variable = "Precipitation",
#' path = "./data/ppt"
#' )
#' }
#' @export
# nolint end
process_terraclimate <- function(
date = c("2023-09-01", "2023-09-10"),
variable = NULL,
path = NULL,
extent = NULL,
...) {
#### directory setup
path <- download_sanitize_path(path)
#### check for variable
check_for_null_parameters(mget(ls()))
#### check dates
if (length(date) == 1) date <- c(date, date)
stopifnot(length(date) == 2)
date <- date[order(as.Date(date))]
variable_checked <- process_variable_codes(
variables = variable,
source = "terraclimate"
)
variable_checked_long <- process_terraclimate_codes(
variable_checked,
invert = TRUE
)
#### identify file paths
data_paths <- list.files(
path,
pattern = variable_checked,
full.names = TRUE
)
data_paths <- data_paths[grep(
".nc",
data_paths
)]
#### define date sequence
date_sequence <- generate_date_sequence(
date[1],
date[2],
sub_hyphen = TRUE
)
#### years of interest
yoi <- unique(
substr(
date_sequence,
1, 4
)
)
#### year-months of interest
ymoi <- unique(
substr(
date_sequence,
1, 6
)
)
#### subset file paths to only dates of interest
data_paths <- unique(
grep(
paste(
yoi,
collapse = "|"
),
data_paths,
value = TRUE
)
)
#### initiate for loop
data_full <- terra::rast()
for (p in seq_along(data_paths)) {
#### import data
data_year <- terra::rast(data_paths[p], win = extent)
message(paste0(
"Cleaning monthly ",
variable_checked_long,
" data for ",
substr(
terra::time(data_year)[1],
1,
4
),
"...\n"
))
names(data_year) <- paste0(
variable_checked,
"_",
substr(
gsub(
"-",
"",
terra::time(data_year)
),
1,
6
)
)
terra::varnames(data_year) <- variable_checked
terra::longnames(data_year) <- variable_checked_long
data_full <- c(
data_full,
data_year,
warn = FALSE
)
}
#### subset years to dates of interest
data_return <- terra::subset(
data_full,
which(
substr(
names(data_full),
nchar(names(data_full)) - 5,
nchar(names(data_full))
) %in% ymoi
)
)
message(paste0(
"Returning monthly ",
variable_checked_long,
" data from ",
month.name[
as.numeric(
substr(
ymoi[1],
5,
6
)
)
],
", ",
substr(
ymoi[1],
1,
4
),
" to ",
month.name[
as.numeric(
substr(
ymoi[length(ymoi)],
5,
6
)
)
],
", ",
substr(
ymoi[length(ymoi)],
1,
4
),
".\n"
))
#### return SpatRaster
return(data_return)
}
#' Retrieve Hydrologic Unit Code (HUC) data
#' @author Insang Song
#' @param path character. Path to the file or the directory containing HUC data.
#' @param layer_name character(1). Layer name in the `path`
#' @param huc_level character(1). Field name of HUC level
#' @param huc_header character(1). The upper level HUC code header to extract
#' lower level HUCs.
#' @param extent numeric(4) or SpatExtent giving the extent of the raster
#' if `NULL` (default), the entire raster is loaded
#' @param ... Arguments passed to `nhdplusTools::get_huc()`
#' @return a `SpatVector` object
#' @seealso [`nhdplusTools::get_huc`]
#' @importFrom terra vect
#' @importFrom terra vector_layers
#' @importFrom rlang inject
#' @importFrom nhdplusTools get_huc
#' @examples
#' ## NOTE: Examples are wrapped in `\dontrun{}` as function requires a large
#' ## amount of data which is not included in the package.
#' \dontrun{
#' library(terra)
#' getf <- "WBD_National_GDB.gdb"
#' # check the layer name to read
#' terra::vector_layers(getf)
#' test1 <- process_huc(
#' getf,
#' layer_name = "WBDHU8",
#' huc_level = "huc8"
#' )
#' test2 <- process_huc(
#' getf,
#' layer_name = "WBDHU8",
#' huc_level = "huc8"
#' )
#' test3 <- process_huc(
#' "",
#' layer_name = NULL,
#' huc_level = NULL,
#' huc_header = NULL,
#' id = "030202",
#' type = "huc06"
#' )
#' }
#' @export
process_huc <-
function(
path,
layer_name = NULL,
huc_level = NULL,
huc_header = NULL,
extent = NULL,
...
) {
# exclude the coverage due to write permission related to memoization
#nocov start
if (missing(path) || (!file.exists(path) && !dir.exists(path))) {
hucpoly <- try(
rlang::inject(nhdplusTools::get_huc(!!!list(...)))
)
if (inherits(hucpoly, "try-error")) {
stop("HUC data was not found.")
}
hucpoly <- terra::vect(hucpoly)
}
#nocov end
if (file.exists(path) || dir.exists(path)) {
if (!is.null(huc_header)) {
querybase <-
sprintf("SELECT * FROM %s WHERE %s LIKE '%s%%'",
layer_name, huc_level, huc_header)
} else {
querybase <-
sprintf("SELECT * FROM %s", layer_name)
}
if (!layer_name %in% terra::vector_layers(path)) {
stop(
paste0(
"Layer ",
layer_name,
" not found in ",
path
)
)
}
hucpoly <- try(
terra::vect(
path,
query = querybase,
extent = extent
)
)
}
return(hucpoly)
}
#' Process CropScape data
#' @description
#' This function imports and cleans raw CropScape data,
#' returning a single `SpatRaster` object.
#' @param path character giving CropScape data path
#' @param year numeric giving the year of CropScape data used
#' @param extent numeric(4) or SpatExtent giving the extent of the raster
#' if `NULL` (default), the entire raster is loaded
#' @param ... Placeholders.
#' @description Reads CropScape file of selected `year`.
#' @return a `SpatRaster` object
#' @author Insang Song
#' @importFrom utils read.csv
#' @importFrom terra rast
#' @importFrom terra metags
#' @examples
#' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large
#' ## amount of data which is not included in the package.
#' \dontrun{
#' cropscape <- process_cropscape(
#' path = "./data/cropscape_example.tif",
#' year = 2020
#' )
#' }
#' @export
process_cropscape <-
function(
path = NULL,
year = 2021,
extent = NULL,
...
) {
# check inputs
if (!is.character(path) || is.null(path)) {
stop("path is not a character.")
}
if (!is.numeric(year)) {
stop("year is not a numeric.")
}
# open cdl file corresponding to the year
if (dir.exists(path)) {
cdl_file <-
list.files(
path,
pattern = paste0("cdl_30m_*.*", year, "_*.*.tif$"),
full.names = TRUE
)
} else {
cdl_file <- path
}
cdl <- terra::rast(cdl_file, win = extent)
terra::metags(cdl) <- c(year = year)
return(cdl)
}
#' Process PRISM data
#' @description
#' This function imports and cleans raw PRISM data,
#' returning a single `SpatRaster` object.
#' @param path character giving PRISM data path
#' Both file and directory path are acceptable.
#' @param element character(1). PRISM element name
#' @param time character(1). PRISM time name.
#' Should be character in length of 2, 4, 6, or 8.
#' "annual" is acceptable.
#' @param extent numeric(4) or SpatExtent giving the extent of the raster
#' if `NULL` (default), the entire raster is loaded
#' @param ... Placeholders.
#' @description Reads time series or 30-year normal PRISM data.
#' @return a `SpatRaster` object with metadata of time and element.
#' @seealso [`terra::rast`], [`terra::metags`]
#' @author Insang Song
#' @importFrom utils read.csv
#' @importFrom terra rast
#' @importFrom terra metags
#' @examples
#' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large
#' ## amount of data which is not included in the package.
#' \dontrun{
#' prism <- process_prism(
#' path = "./data/PRISM_ppt_stable_4kmM3_202104_nc.nc",
#' element = "ppt",
#' time = "202104"
#' )
#' }
#' @export
# nolint start
process_prism <-
function(
path = NULL,
element = NULL,
time = NULL,
extent = NULL,
...
) {
# check inputs
if (!element %in%
c("ppt", "tmin", "tmax", "tmean", "tdmean",
"vpdmin", "vpdmax",
"solslope", "soltotal", "solclear", "soltrans")) {
stop("element is not a valid PRISM element.")
}
if (!is.character(path) || is.null(path)) {
stop("path is not a character.")
}
if (!nchar(time) %in% seq(2, 8, 2)) {
stop("time does not have valid length.")
}
if (dir.exists(path)) {
pattern <- "PRISM_%s*.*_%s_*.*(bil|nc|grib2|asc)$"
pattern <- sprintf(pattern, element, time)
prism_file <-
list.files(
path,
pattern = pattern,
full.names = TRUE
)
} else {
prism_file <- path
}
prism <- terra::rast(prism_file, win = extent)
terra::metags(prism) <- c(time = time, element = element)
return(prism)
}
# nolint end
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.