#' Ingest from global fields
#'
#' Read climate data from files as global fields
#'
#' @param siteinfo A data frame with rows for each site and columns `lon`
#' for longitude, `lat` for latitude, `date_start` and `date_end`
#' specifying required dates.
#' @param source A character used as identifiyer for the type of data source
#' (\code{"watch_wfdei"}, or \code{"cru"}).
#' @param getvars A named list of characters specifying the variable names in
#' the source dataset corresponding to standard names \code{"temp"} for
#' temperature.
#' @param dir A character specifying the directory where data is located.
#' \code{"prec"} for precipitation, \code{"patm"} for atmospheric pressure,
#' \code{"vpd"} for vapour pressure deficit, \code{"netrad"} for net radiation,
#' \code{"swin"} for shortwave incoming radiation.
#' @param timescale A character or vector of characters, specifying the time
#' scale of data used from the respective source (if multiple time scales are
#' available, otherwise is disregarded).
#' @param standardise_units A logical specifying whether units in ingested data
#' are to be standardised following ingestr-standard units.
#' @param layer (Optional) A character string specifying the layer from a
#' shapefile or a raster brick to be read or to be used to identify file
#' name for gsde.
#' @param verbose if \code{TRUE}, additional messages are printed.
#'
#' @return A data frame (tibble) containing the time series of ingested data,
#' nested for each site.
#' @import purrr dplyr
#' @export
#'
#' @examples \dontrun{inputdata <- ingest_bysite()}
#'
ingest_globalfields <- function(
siteinfo,
source,
getvars,
dir,
timescale,
standardise_units = TRUE,
layer = NULL,
verbose = FALSE
){
# CRAN compliance, define state variables
myvar <- temp <- rain <- snow <- sitename <- year <- moy <-
vap <- tmin <- tmax <- prec <- days_in_month <- nhx <- noy <-
lon <- lat <- data <- V1 <- elv <- varnam <- value <- fact <- NULL
if (any(is.na(siteinfo$sitename)) ||
any(is.null(siteinfo$sitename))){
stop("At least one entry for siteinfo$sitename is missing.")
}
if (!(source %in% c("etopo1", "stocker23", "wwf", "gsde", "worldclim"))){
# get a daily (monthly) data frame with all dates for all sites
# (if monthly, day 15 of each month)
df_out <- purrr::map(
as.list(seq(nrow(siteinfo))),
~ingestr::init_dates_dataframe(
lubridate::year(siteinfo$date_start[.]),
lubridate::year(siteinfo$date_end[.]),
noleap = TRUE,
timescale = timescale))
names(df_out) <- siteinfo$sitename
df_out <- df_out %>%
bind_rows(.id = "sitename")
} else {
df_out <- tibble()
}
if (source=="watch_wfdei"){
# Read WATCH-WFDEI data (extracting from NetCDF files for this site)
# vpd based on relative humidity, air temperature, and atmospheric pressure
if ("vpd" %in% getvars){
df_out <- ingest_globalfields_watch_byvar( df_out, siteinfo, dir, "Qair_daily" ) %>%
dplyr::rename(qair = myvar) %>%
dplyr::right_join(df_out, by = c("sitename", "date")) %>%
left_join(
ingest_globalfields_watch_byvar( df_out, siteinfo, dir, "Tair_daily" ) %>%
dplyr::rename(temp = myvar) %>%
dplyr::mutate(temp = temp - 273.15),
by = c("sitename", "date")
) %>%
left_join(
ingest_globalfields_watch_byvar( df_out, siteinfo, dir, "PSurf_daily" ) %>%
dplyr::rename(patm = myvar),
by = c("sitename", "date")
)
}
# precipitation
if ("prec" %in% getvars){
df_out <- ingest_globalfields_watch_byvar( df_out, siteinfo, dir, "Rainf_daily" ) %>%
dplyr::rename( rain = myvar ) %>%
left_join(
ingest_globalfields_watch_byvar( df_out, siteinfo, dir, "Snowf_daily" ) %>%
dplyr::rename( snow = myvar ),
by = c("sitename", "date")
) %>%
dplyr::mutate(prec = (rain + snow) ) %>% # kg/m2/s
dplyr::right_join(df_out, by = c("sitename", "date"))
}
# temperature
if ("temp" %in% getvars && !("temp" %in% names(df_out))){
df_out <- ingest_globalfields_watch_byvar( df_out, siteinfo, dir, "Tair_daily" ) %>%
dplyr::rename(temp = myvar) %>%
dplyr::mutate(temp = temp - 273.15) %>%
dplyr::right_join(df_out, by = c("sitename", "date"))
}
# atmospheric pressure
if ("patm" %in% getvars && !("patm" %in% names(df_out))){
df_out <- ingest_globalfields_watch_byvar( df_out, siteinfo, dir, "PSurf_daily" ) %>%
dplyr::rename(patm = myvar) %>%
dplyr::right_join(df_out, by = c("sitename", "date"))
}
# PPFD
if ("ppfd" %in% getvars){
kfFEC <- 2.04
df_out <- ingest_globalfields_watch_byvar( df_out, siteinfo, dir, "SWdown_daily" ) %>%
dplyr::mutate(ppfd = myvar * kfFEC * 1.0e-6 ) %>% # W m-2 -> mol m-2 s-1
dplyr::right_join(df_out, by = c("sitename", "date"))
}
# remove spurious myvar columns
df_out <- df_out %>%
dplyr::select(-starts_with("myvar"))
if (timescale=="m"){
stop("ingest_globalfields(): aggregating WATCH-WFDEI to monthly not implemented yet.")
}
} else if (source=="wfde5"){
# Read WFDE5 data (extracting from NetCDF files for this site)
# Development Checks
if (timescale != "h"){
rlang::abort("ingest_globalfields(): WFDE5 currently only available for hourly output.")
}
# vpd based on relative humidity, air temperature, and atmospheric pressure
if ("vpd" %in% getvars){
df_out <- ingest_globalfields_wfde5_byvar( df_out, siteinfo, dir, "Qair" ) %>%
dplyr::rename(qair = myvar) %>%
dplyr::right_join(df_out, by = c("sitename", "date")) %>%
left_join(
ingest_globalfields_wfde5_byvar( df_out, siteinfo, dir, "Tair" ) %>%
dplyr::rename(temp = myvar) %>%
dplyr::mutate(temp = temp - 273.15),
by = c("sitename", "date")
) %>%
left_join(
ingest_globalfields_wfde5_byvar( df_out, siteinfo, dir, "PSurf" ) %>%
dplyr::rename(patm = myvar),
by = c("sitename", "date")
)
}
# precipitation
if ("prec" %in% getvars){
df_out <- ingest_globalfields_wfde5_byvar( df_out, siteinfo, dir, "Rainf" ) %>%
dplyr::rename( rain = myvar ) %>%
left_join(
ingest_globalfields_wfde5_byvar( df_out, siteinfo, dir, "Snowf" ) %>%
dplyr::rename( snow = myvar ),
by = c("sitename", "date")
) %>%
dplyr::mutate(prec = (rain + snow) ) %>% # kg/m2/s
dplyr::right_join(df_out, by = c("sitename", "date"))
}
# temperature
if ("temp" %in% getvars && !("temp" %in% names(df_out))){
df_out <- ingest_globalfields_wfde5_byvar( df_out, siteinfo, dir, "Tair" ) %>%
dplyr::rename(temp = myvar) %>%
dplyr::mutate(temp = temp - 273.15) %>%
dplyr::right_join(df_out, by = c("sitename", "date"))
}
# atmospheric pressure
if ("patm" %in% getvars && !("patm" %in% names(df_out))){
df_out <- ingest_globalfields_wfde5_byvar( df_out, siteinfo, dir, "PSurf" ) %>%
dplyr::rename(patm = myvar) %>%
dplyr::right_join(df_out, by = c("sitename", "date"))
}
# PPFD
if ("ppfd" %in% getvars){
kfFEC <- 2.04
df_out <- ingest_globalfields_wfde5_byvar( df_out, siteinfo, dir, "SWdown" ) %>%
dplyr::mutate(ppfd = myvar * kfFEC * 1.0e-6 ) %>% # W m-2 -> mol m-2 s-1
dplyr::right_join(df_out, by = c("sitename", "date"))
}
# short-wave irradiation
if ("swin" %in% getvars && !("swin" %in% names(df_out))){
df_out <- ingest_globalfields_wfde5_byvar( df_out, siteinfo, dir, "SWdown" ) %>%
dplyr::rename(swin = myvar) %>%
dplyr::right_join(df_out, by = c("sitename", "date"))
}
# long-wave irradiation
if ("lwin" %in% getvars && !("lwin" %in% names(df_out))){
df_out <- ingest_globalfields_wfde5_byvar( df_out, siteinfo, dir, "LWdown" ) %>%
dplyr::rename(lwin = myvar) %>%
dplyr::right_join(df_out, by = c("sitename", "date"))
}
# wind
if ("wind" %in% getvars && !("wind" %in% names(df_out))){
df_out <- ingest_globalfields_wfde5_byvar( df_out, siteinfo, dir, "Wind" ) %>%
dplyr::rename(wind = myvar) %>%
dplyr::right_join(df_out, by = c("sitename", "date"))
}
# remove spurious myvar columns
df_out <- df_out %>%
dplyr::select(-starts_with("myvar"))
if (timescale=="m"){
rlang::abort("ingest_globalfields(): aggregating WATCH-WFDEI to monthly not implemented yet.")
}
} else if (source=="cru"){
# Read CRU monthly data (extracting from NetCDF files for this site)
# create a monthly data frame
mdf <- df_out %>%
dplyr::select(sitename, date) %>%
dplyr::mutate(year = lubridate::year(date), moy = lubridate::month(date)) %>%
dplyr::select(sitename, year, moy) %>%
dplyr::distinct()
cruvars <- c()
# temperature (daily mean air)
if ("temp" %in% getvars){
cruvars <- c(cruvars, "temp")
mdf <- ingest_globalfields_cru_byvar(siteinfo, dir, "tmp" ) %>%
dplyr::select(sitename, date, myvar) %>%
dplyr::rename(temp = myvar) %>%
dplyr::mutate(year = lubridate::year(date), moy = lubridate::month(date)) %>%
dplyr::select(-date) %>%
dplyr::right_join(mdf, by = c("sitename", "year", "moy"))
}
# daily minimum temperature
if ("tmin" %in% getvars){
cruvars <- c(cruvars, "tmin")
mdf <- ingest_globalfields_cru_byvar(siteinfo, dir, "tmn" ) %>%
dplyr::select(sitename, date, myvar) %>%
dplyr::rename(tmin = myvar) %>%
dplyr::mutate(year = lubridate::year(date), moy = lubridate::month(date)) %>%
dplyr::select(-date) %>%
dplyr::right_join(mdf, by = c("sitename", "year", "moy"))
}
# daily maximum temperature
if ("tmax" %in% getvars){
cruvars <- c(cruvars, "tmax")
mdf <- ingest_globalfields_cru_byvar(siteinfo, dir, "tmx" ) %>%
dplyr::select(sitename, date, myvar) %>%
dplyr::rename(tmax = myvar) %>%
dplyr::mutate(year = lubridate::year(date), moy = lubridate::month(date)) %>%
dplyr::select(-date) %>%
dplyr::right_join(mdf, by = c("sitename", "year", "moy"))
}
# precipitation
if ("prec" %in% getvars){
cruvars <- c(cruvars, "prec")
mdf <- ingest_globalfields_cru_byvar(siteinfo, dir, "pre" ) %>%
dplyr::select(sitename, date, myvar) %>%
dplyr::rename(prec = myvar) %>%
dplyr::mutate(year = lubridate::year(date), moy = lubridate::month(date)) %>%
dplyr::select(-date) %>%
dplyr::right_join(mdf, by = c("sitename", "year", "moy"))
# also get wet days to generate daily values
cruvars <- c(cruvars, "wetd")
mdf <- ingest_globalfields_cru_byvar(siteinfo, dir, "wet" ) %>%
dplyr::select(sitename, date, myvar) %>%
dplyr::rename(wetd = myvar) %>%
dplyr::mutate(year = lubridate::year(date), moy = lubridate::month(date)) %>%
dplyr::select(-date) %>%
dplyr::right_join(mdf, by = c("sitename", "year", "moy"))
}
# vpd from vapour pressure
if ("vpd" %in% getvars){
cruvars <- c(cruvars, "vap")
mdf <- ingest_globalfields_cru_byvar(siteinfo, dir, "vap" ) %>%
dplyr::select(sitename, date, myvar) %>%
dplyr::rename(vap = myvar) %>%
dplyr::mutate(year = lubridate::year(date), moy = lubridate::month(date)) %>%
dplyr::select(-date) %>%
dplyr::right_join(mdf, by = c("sitename", "year", "moy"))
# also get daily minimum and maximum temperature to convert vapour pressure to vpd
if (!("tmin" %in% names(mdf))){
if (!("tmin" %in% cruvars)) cruvars <- c(cruvars, "tmin")
mdf <- ingest_globalfields_cru_byvar(siteinfo, dir, "tmn" ) %>%
dplyr::select(sitename, date, myvar) %>%
dplyr::rename(tmin = myvar) %>%
dplyr::mutate(year = lubridate::year(date), moy = lubridate::month(date)) %>%
dplyr::select(-date) %>%
dplyr::right_join(mdf, by = c("sitename", "year", "moy"))
}
if (!("tmax" %in% names(mdf))){
if (!("tmax" %in% cruvars)) cruvars <- c(cruvars, "tmax")
mdf <- ingest_globalfields_cru_byvar(siteinfo, dir, "tmx" ) %>%
dplyr::select(sitename, date, myvar) %>%
dplyr::rename(tmax = myvar) %>%
dplyr::mutate(year = lubridate::year(date), moy = lubridate::month(date)) %>%
dplyr::select(-date) %>%
dplyr::right_join(mdf, by = c("sitename", "year", "moy"))
}
}
# cloud cover
if ("ccov" %in% getvars){
cruvars <- c(cruvars, "ccov")
mdf <- ingest_globalfields_cru_byvar(siteinfo, dir, "cld" ) %>%
dplyr::select(sitename, date, myvar) %>%
dplyr::rename(ccov = myvar) %>%
dplyr::mutate(year = lubridate::year(date), moy = lubridate::month(date)) %>%
dplyr::select(-date) %>%
dplyr::right_join(mdf, by = c("sitename", "year", "moy"))
}
if (timescale == "d"){
# expand monthly to daily data
if (length(cruvars)>0){
df_out <- expand_clim_cru_monthly( mdf, cruvars ) %>%
right_join( df_out, by = "date" )
}
if ("vpd" %in% getvars){
# Calculate VPD based on monthly data (vap is in hPa) - important: after downscaling to daily because of non-linearity
df_out <- df_out %>%
rowwise() %>%
mutate(vpd = calc_vpd( eact = 1e2 * vap, tmin = tmin, tmax = tmax ))
}
if ("prec" %in% getvars){
# convert units -> mm/sec
df_out <- df_out %>%
mutate(prec = prec / (60 * 60 * 24)) # mm/d -> mm/sec
}
} else if (timescale == "m"){
if ("vpd" %in% getvars){
# Calculate VPD based on monthly data (vap is in hPa)
mdf <- mdf %>%
rowwise() %>%
mutate(vpd = calc_vpd( eact = 1e2 * vap, tmin = tmin, tmax = tmax ))
}
df_out <- mdf %>%
right_join(df_out %>%
mutate(year = lubridate::year(date), moy = lubridate::month(date)),
by = c("sitename", "year", "moy")) %>%
dplyr::select(-year, -moy)
if ("prec" %in% getvars){
# convert units -> mm/sec
df_out <- df_out %>%
mutate(moy = lubridate::month(date)) %>%
mutate(prec = prec / days_in_month(moy)) %>% # mm/month -> mm/d
mutate(prec = prec / (60 * 60 * 24)) # mm/d -> mm/sec
}
}
} else if (source == "ndep"){
# create a annual data frame
adf <- df_out %>%
dplyr::select(sitename, date) %>%
dplyr::filter(lubridate::yday(date)==1) %>%
dplyr::distinct()
# extract the data for NHx
adf <- ingest_globalfields_ndep_byvar(siteinfo, dir, "nhx") %>%
dplyr::select(sitename, date, nhx) %>%
dplyr::right_join(adf, by = c("sitename", "date"))
# extract the data for NOy
adf <- ingest_globalfields_ndep_byvar(siteinfo, dir, "noy") %>%
dplyr::select(sitename, date, noy) %>%
dplyr::right_join(adf, by = c("sitename", "date"))
if (timescale != "y"){
stop("ingest_globalfields() for source = ndep: come up with solution for non-annual time step")
} else {
df_out <- adf
}
} else if (source == "etopo1"){
filename <- list.files(dir, pattern = "ETOPO1_Bed_g_geotiff.tif")
if (length(filename) > 1) stop("ingest_globalfields(): Found more than 1 file for source 'etopo1'.")
if (length(filename) == 0) stop("ingest_globalfields(): Found no files for source 'etopo1' in the directory provided by argument 'dir'.")
# re-construct this data frame (tibble) - otherwise SpatialPointsDataframe() won't work
df_lonlat <- tibble(
sitename = siteinfo$sitename,
lon = siteinfo$lon,
lat = siteinfo$lat
)
df_out <- extract_pointdata_allsites( paste0(dir, filename), df_lonlat, get_time = FALSE ) |>
dplyr::ungroup() |>
dplyr::select(-lon, -lat) |>
tidyr::unnest(data) |>
dplyr::rename(elv = ETOPO1_Bed_g_geotiff) |>
dplyr::select(sitename, elv)
} else if (source == "stocker23"){
filename <- list.files(dir, pattern = "cwdx80_forcing_halfdeg.nc")
if (length(filename) > 1) stop("ingest_globalfields(): Found more than 1 file for source 'stocker23'.")
if (length(filename) == 0) stop("ingest_globalfields(): Found no files for source 'stocker23' in the directory provided by argument 'dir'.")
# re-construct this data frame (tibble) - otherwise SpatialPointsDataframe() won't work
df_lonlat <- tibble(
sitename = siteinfo$sitename,
lon = siteinfo$lon,
lat = siteinfo$lat
)
df_out <- extract_pointdata_allsites( paste0(dir, filename), df_lonlat, get_time = FALSE ) |>
dplyr::ungroup() |>
dplyr::select(-lon, -lat) |>
tidyr::unnest(data) |>
dplyr::rename(whc = cwdx80_forcing) |>
dplyr::select(sitename, whc)
} else if (source == "gsde"){
# re-construct this data frame (tibble) - otherwise SpatialPointsDataframe() won't work
df_lonlat <- tibble(
sitename = siteinfo$sitename,
lon = siteinfo$lon,
lat = siteinfo$lat
)
# top soil layers
filename <- list.files(dir, pattern = paste0(layer, "1.nc"))
if (length(filename) > 1) stop("ingest_globalfields(): Found more than 1 file for source 'gsde'.")
if (length(filename) == 0) stop("ingest_globalfields(): Found no files for source 'gsde' in the directory provided by argument 'dir'.")
df_out_top <- extract_pointdata_allsites( paste0(dir, "/", filename), df_lonlat, get_time = FALSE ) %>%
dplyr::select(-lon, -lat) %>%
tidyr::unnest(data) %>%
tidyr::pivot_longer(cols = starts_with("PBR_depth")) %>%
dplyr::rename(!!layer := value, depth = name) %>%
dplyr::mutate(depth = as.numeric(str_remove(depth, "PBR_depth="))) %>%
dplyr::select(sitename, !!layer, depth)
# bottom soil layers
filename <- list.files(dir, pattern = paste0(layer, "2.nc"))
if (length(filename) > 1) stop("ingest_globalfields(): Found more than 1 file for source 'gsde'.")
if (length(filename) == 0) stop(paste("ingest_globalfields(): Found no files for source 'gsde' in the directory provided by argument 'dir' for layer", layer))
df_out_bottom <- extract_pointdata_allsites( paste0(dir, "/", filename), df_lonlat, get_time = FALSE ) %>%
dplyr::select(-lon, -lat) %>%
tidyr::unnest(data) %>%
tidyr::pivot_longer(cols = starts_with("PBR_depth")) %>%
dplyr::rename(!!layer := value, depth = name) %>%
dplyr::mutate(depth = as.numeric(str_remove(depth, "PBR_depth="))) %>%
dplyr::select(sitename, !!layer, depth)
# combine for layers read from each file
df_out <- bind_rows(df_out_top, df_out_bottom) %>%
group_by(sitename) %>%
tidyr::nest() %>%
mutate(data = purrr::map(data, ~mutate(., layer = 1:8))) %>%
tidyr::unnest(data)
# apply conversion factor
df_conv <- tibble(varnam := c("TC", "OC", "TN", "PHH2O", "PHK", "PHCA", "EXA", "PBR", "POL", "PNZ", "PHO", "PMEH", "TP", "TK"),
fact = c(0.01, 0.01, 0.01, 0.1, 0.1, 0.1, 0.01, 0.01, 0.01, 0.01, 0.0001, 0.01, 0.0001, 0.01))
df_out <- df_out %>%
mutate(varnam = !!layer) %>%
left_join(df_conv, by = "varnam") %>%
rename(value = !!layer) %>%
# interpret missing values
ungroup() %>%
mutate(value = ifelse(value == -999, NA, value)) %>%
mutate(value = ifelse(varnam %in% c("PHH2O", "PHK", "PHCA") & value == 100,
NA,
value)) %>%
# apply conversion factor
mutate(value = value * fact) %>%
dplyr::select(-fact, -varnam) %>%
rename(!!layer := value)
} else if (source == "wwf"){
df_biome_codes <- tibble(
BIOME = 1:14,
BIOME_NAME = c(
"Tropical & Subtropical Moist Broadleaf Forests",
"Tropical & Subtropical Dry Broadleaf Forests",
"Tropical & Subtropical Coniferous Forests",
"Temperate Broadleaf & Mixed Forests",
"Temperate Conifer Forests",
"Boreal Forests/Taiga",
"Tropical & Subtropical Grasslands, Savannas & Shrublands",
"Temperate Grasslands, Savannas & Shrublands",
"Flooded Grasslands & Savannas",
"Montane Grasslands & Shrublands",
"Tundra",
"Mediterranean Forests, Woodlands & Scrub",
"Deserts & Xeric Shrublands",
"Mangroves")
)
df_out <- extract_pointdata_allsites_shp( dir, dplyr::select(siteinfo, sitename, lon, lat), layer ) %>%
left_join(df_biome_codes, by = "BIOME")
} else if (source == "worldclim"){
# re-construct this data frame (tibble) - otherwise SpatialPointsDataframe() won't work
df_lonlat <- tibble(
sitename = siteinfo$sitename,
lon = siteinfo$lon,
lat = siteinfo$lat
)
ingest_globalfields_worldclim_byvar <- function(varnam){
vec_filn <- list.files(dir, pattern = paste0(varnam, ".*.tif"))
if (length(vec_filn) > 0){
df_out <- purrr::map2(
as.list(vec_filn),
as.list(stringr::str_remove(vec_filn,
paste0("wc2.1_30s_", varnam, "_")) %>%
stringr::str_remove(".tif")),
~{extract_pointdata_allsites( paste0(dir, "/", .x),
df_lonlat, get_time = FALSE ) %>%
dplyr::select(-lon, -lat) %>%
tidyr::unnest(data) %>%
dplyr::rename(!!paste0(varnam, "_", .y) := V1) %>%
dplyr::select(sitename, !!paste0(varnam, "_", .y))}) %>%
purrr::reduce(left_join, by = "sitename")
} else {
df_out <- tibble()
}
return(df_out)
}
df_out <- purrr::map(as.list(layer),
~ingest_globalfields_worldclim_byvar(.)) %>%
purrr::reduce(left_join, by = c("sitename"))
}
return( df_out )
}
# Extract temperature time series for a set of sites at once (opening
# each file only once).
ingest_globalfields_watch_byvar <- function( ddf, siteinfo, dir, varnam ) {
# define variables
yr <- mo <- filename <- drop_na <- data <- sitename <-
dom <- myvar <- doy <- data_pre <- . <- NULL
dirn <- paste0( dir, "/", varnam)
# loop over all year and months that are required
year_start <- ddf %>%
dplyr::pull(date) %>%
min() %>%
lubridate::year()
year_end <- ddf %>%
dplyr::pull(date) %>%
max() %>%
lubridate::year()
# check if data is required for years before 1979 (when watch wfdei is available)
pre_data <- year_start < 1979
# if pre-1979 data are required, read at least 10 first years to get mean climatology
if (pre_data){
year_start_read <- 1979
year_end_read <- max(1988, year_end)
} else {
year_start_read <- year_start
year_end_read <- year_end
}
# construct data frame holding longitude and latitude info
df_lonlat <- tibble(
sitename = siteinfo$sitename,
lon = siteinfo$lon,
lat = siteinfo$lat
)
if (varnam %in% c("Rainf_daily", "Snowf_daily")){
addstring <- "_WFDEI_CRU_"
} else {
addstring <- "_WFDEI_"
}
# extract all the data for all the dates (cutting to required dates by site is done in ingest())
allmonths <- 1:12
allyears <- year_start_read:year_end_read
df <- expand.grid(allmonths, allyears) %>%
dplyr::as_tibble() %>%
stats::setNames(c("mo", "yr")) %>%
rowwise() %>%
dplyr::mutate(filename = paste0( dirn, "/", varnam, addstring, sprintf( "%4d", yr ), sprintf( "%02d", mo ), ".nc" )) %>%
ungroup() %>%
dplyr::mutate(data = purrr::map(filename, ~extract_pointdata_allsites(., df_lonlat, get_time = TRUE ) ))
# rearrange to a daily data frame
complement_df <- function(df){
df <- df |>
dplyr::select(dom = tstep, myvar = value)
return(df)
}
ddf <- df %>%
tidyr::unnest(data) %>%
dplyr::mutate(data = purrr::map(data, ~complement_df(.))) %>%
tidyr::unnest(data) %>%
dplyr::select(sitename, mo, yr, dom, myvar) %>%
dplyr::mutate(date = lubridate::ymd(paste0(as.character(yr), "-", sprintf( "%02d", mo), "-", sprintf( "%02d", dom))) ) %>%
dplyr::select(-mo, -yr, -dom)
# create data frame containing all dates, using mean annual cycle (of 1979-1988) for all years before 1979
if (pre_data){
message("Data for years before 1979 requested. Taking mean annual cycle of 10 years (1979-1988) for all years before 1979.")
# get mean seasonal cycle, averaged over 1979:1988
ddf_meandoy <- ddf %>%
dplyr::filter(lubridate::year(date) %in% 1979:1988) %>%
mutate(doy = lubridate::yday(date)) %>%
group_by(sitename, doy) %>%
summarise(myvar = mean(myvar))
# get a data frame with all dates for all sites
ddf_tmp <- purrr::map(
as.list(seq(nrow(siteinfo))),
~ingestr::init_dates_dataframe(
lubridate::year(siteinfo$date_start[.]),
min(1978, lubridate::year(siteinfo$date_end[.])),
noleap = TRUE,
timescale = "d"))
names(ddf_tmp) <- siteinfo$sitename
ddf_pre <- ddf_tmp %>%
bind_rows(.id = "sitename") %>%
drop_na() %>%
mutate(doy = lubridate::yday(date)) %>%
left_join(ddf_meandoy, by = c("sitename", "doy")) %>%
dplyr::select(-doy)
# ddf_pre <- init_dates_dataframe(year_start, min(1978, year_end)) %>%
# mutate(doy = lubridate::yday(date)) %>%
# left_join(ddf_pre, by = "doy") %>%
# dplyr::select(-doy)
# combine the two along rows
ddf <- left_join(
ddf %>%
ungroup() %>%
group_by(sitename) %>%
tidyr::nest(),
ddf_pre %>%
ungroup() %>%
group_by(sitename) %>%
tidyr::nest() %>%
rename(data_pre = data),
by = "sitename") %>%
mutate(data = purrr::map2(data_pre, data, ~bind_rows(.x, .y))) %>%
dplyr::select(-data_pre) %>%
tidyr::unnest(data) %>%
arrange(date) %>% # to make sure
distinct() # out of desperation
}
return( ddf )
}
ingest_globalfields_wfde5_byvar <- function(ddf, siteinfo, dir, varnam) {
yr <- mo <- filename <- . <- data <- sitename <- dom <- hod <-
myvar <- doy <- drop_na <- data_pre <- NULL
dirn <- paste0( dir, "/", varnam)
# loop over all year and months that are required
year_start <- ddf %>%
dplyr::pull(date) %>%
min() %>%
lubridate::year()
year_end <- ddf %>%
dplyr::pull(date) %>%
max() %>%
lubridate::year()
# check if data is required for years before 1980 (when watch wfdei is available)
pre_data <- year_start < 1980
# if pre-1980 data are required, read at least 10 first years to get mean climatology
if (pre_data){
year_start_read <- 1980
year_end_read <- max(1989, year_end)
} else {
year_start_read <- year_start
year_end_read <- year_end
}
# construct data frame holding longitude and latitude info
df_lonlat <- tibble(
sitename = siteinfo$sitename,
lon = siteinfo$lon,
lat = siteinfo$lat
)
if (varnam %in% c("Rainf", "Snowf")){
addstring <- "_WFDE5_CRU+GPCC_"
} else {
addstring <- "_WFDE5_CRU_"
}
if (varnam %in% c("Tair", "Qair")) {
endstring <- "_v1.0"
} else {
endstring <- "_v1.1"
}
# extract all the data for all the dates (cutting to required dates by site is done in ingest())
alldays <- 1:31
allmonths <- 1:12
allyears <- year_start_read:year_end_read
df <-
expand.grid(allmonths, allyears) %>%
dplyr::as_tibble() %>%
stats::setNames(c("mo", "yr")) %>%
rowwise() %>%
dplyr::mutate(
filename = paste0( dirn, "/", varnam, addstring, sprintf( "%4d", yr ),
sprintf( "%02d", mo ), endstring, ".nc" )) %>%
ungroup() %>%
dplyr::mutate(
data = purrr::map(
filename,
~extract_pointdata_allsites(., df_lonlat, get_time = FALSE ) ))
# rearrange to a daily data frame
complement_df <- function(df){
df <- df %>%
stats::setNames(., c("myvar")) %>%
mutate(row = 1:nrow(.),
hod = rep(0:23, nrow(.)/24),
dom = ceiling(row/24))
return(df)
}
ddf <- df %>%
tidyr::unnest(data) %>%
dplyr::mutate(data = purrr::map(data, ~complement_df(.))) %>%
tidyr::unnest(data) %>%
dplyr::select(sitename, mo, yr, dom, hod, myvar) %>%
dplyr::mutate(
date = lubridate::ymd_h(
paste0(as.character(yr),
"-", sprintf( "%02d", mo),
"-", sprintf( "%02d", dom),
" ", sprintf( "%02d", hod)))
) %>%
dplyr::select(-mo, -yr, -dom, -hod)
# create data frame containing all dates, using mean annual cycle
# (of 1980-1989) for all years before 1980
if (pre_data){
message("
Data for years before 1979 requested.
Taking mean annual cycle of 10 years (1979-1989)
for all years before 1979.")
message("
NCDF file on Euler lacks first seven hours of 1979-01-01.
Thus, only cycle from 1980-1989 is taken.")
# get mean seasonal cycle, averaged over 1980:1989
ddf_mean <- ddf %>%
dplyr::filter(lubridate::year(date) %in% 1980:1989) %>%
mutate(hod = rep(0:23, nrow(.)/24),
doy = lubridate::yday(date)) %>%
group_by(sitename, hod, doy) %>%
summarise(myvar = mean(myvar), .groups = "keep")
# get a data frame with all dates for all sites
ddf_tmp <- purrr::map(
as.list(seq(nrow(siteinfo))),
~ingestr::init_dates_dataframe(
lubridate::year(siteinfo$date_start[.]),
min(1979, lubridate::year(siteinfo$date_end[.])),
noleap = TRUE,
timescale = timescale))
names(ddf_tmp) <- siteinfo$sitename
ddf_pre <- ddf_tmp %>%
bind_rows(.id = "sitename") %>%
drop_na() %>%
mutate(hod = rep(0:23, nrow(.)/24),
doy = lubridate::yday(date)) %>%
left_join(ddf_mean, by = c("sitename", "doy", "hod")) %>%
dplyr::select(-doy, -hod)
# combine the two along rows
ddf <- left_join(
ddf %>%
ungroup() %>%
group_by(sitename) %>%
tidyr::nest(),
ddf_pre %>%
ungroup() %>%
group_by(sitename) %>%
tidyr::nest() %>%
rename(data_pre = data),
by = "sitename") %>%
mutate(data = purrr::map2(data_pre, data, ~bind_rows(.x, .y))) %>%
dplyr::select(-data_pre) %>%
tidyr::unnest(data) %>%
arrange(date) %>% # to make sure
distinct() # out of desperation
}
return( ddf )
}
# Extract N deposition time series for a set of sites at once (opening
# each file only once).
ingest_globalfields_ndep_byvar <- function(siteinfo, dir, varnam){
# define variable
data <- NULL
# construct data frame holding longitude and latitude info
df_lonlat <- tibble(
sitename = siteinfo$sitename,
lon = siteinfo$lon,
lat = siteinfo$lat
)
# extract the data
filename <- list.files(
dir, paste0("ndep_", varnam, "_lamarque11cc_historical_halfdeg.nc") )
df <- extract_pointdata_allsites(
paste0(dir, filename), df_lonlat, get_time = TRUE) %>%
dplyr::mutate(
data = purrr::map(data, ~stats::setNames(., c(varnam, "year")))
) %>%
dplyr::mutate(
data = purrr::map(
data,
~mutate(., date = lubridate::ymd(paste0(as.character(year), "-01-01")))
)
)
adf <- df %>%
tidyr::unnest(data)
return(adf)
}
# Extract temperature time series for a set of sites at once (opening
# each file only once).
ingest_globalfields_cru_byvar <- function( siteinfo, dir, varnam ){
# define variables
data <- year <- moy <- NULL
# construct data frame holding longitude and latitude info
df_lonlat <- tibble(
sitename = siteinfo$sitename,
lon = siteinfo$lon,
lat = siteinfo$lat
)
# extract the data
filename <- list.files( dir, pattern=paste0( varnam, ".dat.nc" ) )
if (length(filename)==0) stop(paste("Aborting. No files found for CRU variable", varnam))
df <- extract_pointdata_allsites( paste0(dir, filename), df_lonlat, get_time = TRUE ) %>%
dplyr::mutate(data = purrr::map(data, ~stats::setNames(., c("myvar", "date"))))
# rearrange to a monthly data frame. Necesary work-around with date,
# because unnest() seems to have a bug
# when unnesting a dataframe that contains a lubridate ymd objet.
get_month_year <- function(df){
df %>%
mutate(year = lubridate::year(date),
moy = lubridate::month(date))
}
mdf <- df %>%
mutate(data = purrr::map(data, ~get_month_year(.))) %>%
mutate(data = purrr::map(data, ~dplyr::select(., -date))) %>%
tidyr::unnest(data) %>%
rowwise() %>%
mutate(date = lubridate::ymd(paste0(as.character(year),
"-", sprintf( "%02d", moy), "-15"))) %>%
dplyr::select(-year, -moy)
return( mdf )
}
# Interpolates monthly data to daily data using polynomials or linear
# for a single year
expand_clim_cru_monthly <- function( mdf, cruvars ){
ddf <- purrr::map(as.list(unique(mdf$year)),
~expand_clim_cru_monthly_byyr( ., mdf, cruvars ) ) %>%
bind_rows()
return( ddf )
}
# Interpolates monthly data to daily data using polynomials or linear
# for a single year
expand_clim_cru_monthly_byyr <- function( yr, mdf, cruvars ){
# define variables
year <- ccov_int <- NULL
nmonth <- 12
startyr <- mdf$year %>% first()
endyr <- mdf$year %>% last()
yr_pvy <- max(startyr, yr-1)
yr_nxt <- min(endyr, yr+1)
# add first and last year to head and tail of 'mdf'
first <- mdf[1:12,] %>% mutate( year = year - 1)
last <- mdf[(nrow(mdf)-11):nrow(mdf),] %>% mutate( year = year + 1 )
ddf <- init_dates_dataframe( yr, yr )
# air temperature: interpolate using polynomial
if ("temp" %in% cruvars){
mtemp <- dplyr::filter( mdf, year==yr )$temp
mtemp_pvy <- dplyr::filter( mdf, year==yr_pvy )$temp
mtemp_nxt <- dplyr::filter( mdf, year==yr_nxt )$temp
if (length(mtemp_pvy)==0){
mtemp_pvy <- mtemp
}
if (length(mtemp_nxt)==0){
mtemp_nxt <- mtemp
}
ddf <- init_dates_dataframe( yr, yr ) %>%
mutate(
temp = monthly2daily(
mtemp,
"polynom",
mtemp_pvy[nmonth],
mtemp_nxt[1],
leapyear = lubridate::leap_year(yr)
)
) %>%
right_join( ddf, by = c("date") )
}
# daily minimum air temperature: interpolate using polynomial
if ("tmin" %in% cruvars){
mtmin <- dplyr::filter( mdf, year==yr )$tmin
mtmin_pvy <- dplyr::filter( mdf, year==yr_pvy )$tmin
mtmin_nxt <- dplyr::filter( mdf, year==yr_nxt )$tmin
if (length(mtmin_pvy)==0){
mtmin_pvy <- mtmin
}
if (length(mtmin_nxt)==0){
mtmin_nxt <- mtmin
}
ddf <- init_dates_dataframe( yr, yr ) %>%
mutate( tmin = monthly2daily( mtmin, "polynom", mtmin_pvy[nmonth], mtmin_nxt[1], leapyear = lubridate::leap_year(yr) ) ) %>%
right_join( ddf, by = c("date") )
}
# daily minimum air temperature: interpolate using polynomial
if ("tmax" %in% cruvars){
mtmax <- dplyr::filter( mdf, year==yr )$tmax
mtmax_pvy <- dplyr::filter( mdf, year==yr_pvy )$tmax
mtmax_nxt <- dplyr::filter( mdf, year==yr_nxt )$tmax
if (length(mtmax_pvy)==0){
mtmax_pvy <- mtmax
}
if (length(mtmax_nxt)==0){
mtmax_nxt <- mtmax
}
ddf <- init_dates_dataframe( yr, yr ) %>%
mutate( tmax = monthly2daily( mtmax, "polynom", mtmax_pvy[nmonth], mtmax_nxt[1], leapyear = lubridate::leap_year(yr) ) ) %>%
right_join( ddf, by = c("date") )
}
# precipitation: interpolate using weather generator
if ("prec" %in% cruvars){
mprec <- dplyr::filter( mdf, year==yr )$prec
mwetd <- dplyr::filter( mdf, year==yr )$wetd
if (any(!is.na(mprec))&&any(!is.na(mwetd))){
ddf <- init_dates_dataframe( yr, yr ) %>%
mutate( prec = get_daily_prec( mprec, mwetd, leapyear = lubridate::leap_year(yr) ) ) %>%
right_join( ddf, by = c("date") )
}
}
# cloud cover: interpolate using polynomial
if ("ccov" %in% cruvars){
mccov <- dplyr::filter( mdf, year==yr )$ccov
mccov_pvy <- dplyr::filter( mdf, year==yr_pvy )$ccov
mccov_nxt <- dplyr::filter( mdf, year==yr_nxt )$ccov
if (length(mccov_pvy)==0){
mccov_pvy <- mccov
}
if (length(mccov_nxt)==0){
mccov_nxt <- mccov
}
ddf <- init_dates_dataframe( yr, yr ) %>%
mutate( ccov_int = monthly2daily( mccov, "polynom", mccov_pvy[nmonth], mccov_nxt[1], leapyear = lubridate::leap_year(yr) ) ) %>%
# Reduce CCOV to a maximum 100%
mutate( ccov = ifelse( ccov_int > 100, 100, ccov_int ) ) %>%
right_join( ddf, by = c("date") )
}
# VPD: interpolate using polynomial
if ("vap" %in% cruvars){
mvap <- dplyr::filter( mdf, year==yr )$vap
mvap_pvy <- dplyr::filter( mdf, year==yr_pvy )$vap
mvap_nxt <- dplyr::filter( mdf, year==yr_nxt )$vap
if (length(mvap_pvy)==0){
mvap_pvy <- mvap
}
if (length(mvap_nxt)==0){
mvap_nxt <- mvap
}
ddf <- init_dates_dataframe( yr, yr ) %>%
mutate( vap = monthly2daily( mvap, "polynom", mvap_pvy[nmonth], mvap_nxt[1], leapyear = lubridate::leap_year(yr) ) ) %>%
right_join( ddf, by = c("date") )
}
return( ddf )
}
# Finds the closest land cell in the CRU dataset at the same latitude
find_nearest_cruland_by_lat <- function( lon, lat, filn ){
if (!requireNamespace("ncdf4", quietly = TRUE))
stop("Please, install 'ncdf4' package")
nc <- ncdf4::nc_open( filn, readunlim=FALSE )
crufield <- ncdf4::ncvar_get( nc, varid="TMP" )
lon_vec <- ncdf4::ncvar_get( nc, varid="LON" )
lat_vec <- ncdf4::ncvar_get( nc, varid="LAT" )
crufield[crufield==-9999] <- NA
ncdf4::nc_close(nc)
ilon <- which.min( abs( lon_vec - lon ) )
ilat <- which.min( abs( lat_vec - lat ) )
if (!is.na(crufield[ilon,ilat])) {print("WRONG: THIS SHOULD BE NA!!!")}
for (n in seq(2*length(lon_vec))){
ilon_look <- (-1)^(n+1)*round((n+0.1)/2)+ilon
if (ilon_look > length(lon_vec)) {ilon_look <- ilon_look %% length(lon_vec)} # Wrap search around globe in latitudinal direction
if (ilon_look < 1) {ilon_look <- ilon_look + length(lon_vec) }
print(paste("ilon_look",ilon_look))
if (!is.na(crufield[ilon_look,ilat])) {
break
}
}
# if (!is.na(crufield[ilon_look,ilat])) {print("SUCCESSFULLY FOUND DATA")}
return( lon_vec[ ilon_look ] )
}
# Extracts point data for a set of sites given by df_lonlat using
# functions from the raster package.
extract_pointdata_allsites <- function(
filename,
df_lonlat,
get_time = FALSE
) {
# define variables
lon <- lat <- data <- NULL
# load file using the raster library
#print(paste("Creating raster brick from file", filename))
if (!file.exists(filename)) stop(paste0("File not found: ", filename))
# message(paste0("Reading file: ", filename))
# new code with terra library
rasta <- terra::rast(filename)
coords <- dplyr::select(df_lonlat, lon, lat)
points <- terra::vect(coords, geom = c("lon", "lat"), crs = "EPSG:4326")
values <- terra::extract(rasta, points, xy = FALSE, ID = FALSE, method = "bilinear")
if (get_time){
out <- df_lonlat |>
dplyr::select(sitename, lon, lat) |>
bind_cols(
values
) |>
tidyr::pivot_longer(-one_of(c("lon", "lat", "sitename")), names_to = "tstep") |>
tidyr::separate_wider_delim(
tstep,
delim = "=",
names = c("varnam", "tstep")
) |>
dplyr::mutate(
tstep = as.numeric(tstep) + 1,
varnam = stringr::str_remove(varnam, "_tstep")
) |>
dplyr::group_by(sitename, lon, lat) |>
tidyr::nest()
} else {
out <- df_lonlat |>
dplyr::select(sitename, lon, lat) |>
bind_cols(
values
) |>
dplyr::group_by(sitename, lon, lat) |>
tidyr::nest()
}
# # old code with {raster} library:
# rasta <- raster::brick(filename)
# df_lonlat <- raster::extract(
# rasta,
# sp::SpatialPoints(dplyr::select(df_lonlat, lon, lat)), # , proj4string = rasta@crs
# sp = TRUE
# ) %>%
# as_tibble() %>%
# tidyr::nest(data = c(-lon, -lat)) %>%
# right_join(df_lonlat, by = c("lon", "lat")) %>%
# mutate( data = purrr::map(data, ~dplyr::slice(., 1)) ) %>%
# dplyr::mutate(data = purrr::map(data, ~t(.))) %>%
# dplyr::mutate(data = purrr::map(data, ~as_tibble(.)))
#
# if (get_time){
# timevals <- raster::getZ(rasta)
# df_lonlat <- df_lonlat %>%
# mutate( data = purrr::map(data, ~bind_cols(., tibble(date = timevals))))
# }
return(out)
}
# Extracts point data for a set of sites given by df_lonlat for a
# shapefile. df_lonlat requires columns sitename, lon, and lat.
extract_pointdata_allsites_shp <- function(dir, df_lonlat, layer) {
# solves error, see https://stackoverflow.com/questions/75927165/error-in-wk-handle-wk-wkbwkb-s2-geography-writeroriented-oriented-loop-0
sf::sf_use_s2(FALSE)
# Load spatial data using sf
shp <- sf::st_read(dsn = dir, layer = layer)
# Create SpatialPoints object for sites
df_clean <- df_lonlat %>%
ungroup() %>%
dplyr::select(lon, lat) %>%
tidyr::drop_na()
# Create sf points object
pts <- sf::st_as_sf(df_clean, coords = c("lon", "lat"), crs = sf::st_crs(shp))
# Spatial join and data manipulation
df <- sf::st_join(pts, shp) |>
dplyr::select(-geometry) |>
dplyr::bind_cols(df_lonlat)
# Alternative fix:
# define variables
# lon <- lat <- . <- NULL
# sf::sf_use_s2(FALSE)
# create SpatialPoints object for plots
# df_clean <- df_lonlat |>
# ungroup() |>
# tidyr::drop_na(c(lon, lat))
# shp <- sf::st_read(dsn = dir, layer = layer)
# pts <- sf::st_as_sf(
# df_clean |>
# dplyr::select(lon, lat),
# coords = c("lon","lat"),
# crs = sf::st_crs(shp)
# )
# df <- sf::st_join(pts, shp, left = TRUE) |>
# as_tibble() |>
# bind_cols(df_clean, .)
# dplyr::select(-geometry) |>
# right_join(df_lonlat, by = join_by(lon, lat)) |>
# dplyr::select(-lon, -lat)
return(df)
}
# extract_pointdata_allsites_shp <- function( dir, df_lonlat, layer ){
# # define variables
# lon <- lat <- . <- NULL
# shp <- rgdal::readOGR(dsn = dir, layer = layer)
# geo.proj <- sp::proj4string(shp)
# # create SpatialPoints object for sites
# df_clean <- df_lonlat %>%
# ungroup() %>%
# dplyr::select(lon, lat) %>%
# tidyr::drop_na()
# pts <- sp::SpatialPoints(df_clean, proj4string = sp::CRS(geo.proj))
# # creates object that assigns each site index to an ecoregion
# df <- sp::over(pts, shp) %>%
# as_tibble() %>%
# bind_cols(df_clean, .) %>%
# right_join(df_lonlat, by = c("lon", "lat")) %>%
# dplyr::select(-lon, -lat)
# return(df)
# }
#' Implements a weather generator
#'
#' Implements a weather generator to simulate daily precipitation, given the monthly total and the number of days with rain for each month.
#'
#' @param mval_prec A vector of twelve numeric values for monthly values of total precipitation.
#' @param mval_wet A vector of twelve integer values for the number of wet days in each month.
#' @param set_seed A logical specifying whether a random seed is set.
#' @param leapyear A logical specifying whether interpolation is done for a leap year (with 366 days).
#'
#' @return A named list of data frames (tibbles) containing input data for each site is returned.
#'
get_daily_prec <- function( mval_prec, mval_wet, set_seed=FALSE, leapyear=FALSE ){
# Distributes monthly total precipitation to days, given number of
# monthly wet days. Adopted from LPX.
if (leapyear){
ndaymonth <- c(31,29,31,30,31,30,31,31,30,31,30,31)
} else {
ndaymonth <- c(31,28,31,30,31,30,31,31,30,31,30,31)
}
ndayyear <- sum(ndaymonth)
nmonth <- length(ndaymonth)
c1 <- 1.0
c2 <- 1.2
if (set_seed) {set.seed(0)}
prdaily_random <- array( NA, dim=c(ndayyear,2))
for (doy in 1:ndayyear){
prdaily_random[doy,] <- stats::runif(2)
}
dval_prec <- rep(NA,ndayyear)
doy <- 0
prob <- 0.0
prob_rain <- rep(NA,nmonth)
mprecave <- rep(NA,nmonth)
mprecip <- rep(NA,nmonth)
for (moy in 1:nmonth){
prob_rain[moy] <- 0.0
mprecave[moy] <- 0.0
mprecip[moy] <- 0.0
}
daysum <- 0
set.seed( prdaily_random[1,1] * 1e7 )
for (moy in 1:nmonth){
if ( mval_wet[moy]<=1.0 ) {mval_wet[moy] <- 1.0}
prob_rain[moy] <- mval_wet[moy] / ndaymonth[moy]
mprecave[moy] <- mval_prec[moy] / mval_wet[moy]
dry <- TRUE
iloop <- 0
while( dry ){
iloop <- iloop + 1
nwet <- 0
for (dm in 1:ndaymonth[moy]){
doy <- doy + 1
# Transitional probabilities (Geng et al. 1986)
if (doy>1) {
if (dval_prec[doy-1] < 0.1) {
prob <- 0.75 * prob_rain[moy]
} else {
prob <- 0.25 + (0.75 * prob_rain[moy])
}
}
# Determine we randomly and use Krysanova / Cramer estimates of
# parameter values (c1,c2) for an exponential distribution
if (iloop==1) {
vv <- prdaily_random[doy,1]
} else {
# problem: rand() generates a random number that leads to
# floating point exception
vv <- stats::runif(1)
}
if (vv>prob) {
dval_prec[doy] <- 0.0
} else {
nwet <- nwet + 1
v1 <- prdaily_random[doy,2]
dval_prec[doy] <- ((-log(v1))^c2) * mprecave[moy] * c1
if (dval_prec[doy] < 0.1) dval_prec[doy] <- 0.0
}
mprecip[moy] <- mprecip[moy] + dval_prec[doy]
}
# If it never rained this month and mprec[moy]>0 and mval_wet[moy]>0, do
# again
dry <- (nwet==0 && iloop<50 && mval_prec[moy]>0.1)
if (iloop>50) {
print('Daily.F, prdaily: Warning stopped after 50 tries in cell')
}
# Reset counter to start of month
if (dry) {
doy <- doy - ndaymonth[moy]
}
} #while
# normalise generated precipitation by monthly CRU values
if ( moy > 1 ) {daysum <- daysum + ndaymonth[moy-1]}
if ( mprecip[moy] < 1.0 ) {mprecip[moy] <- 1.0}
for (dm in 1:ndaymonth[moy]){
doy <- daysum + dm
dval_prec[doy] <- dval_prec[doy] * (mval_prec[moy] / mprecip[moy])
if ( dval_prec[doy] < 0.1 ) {dval_prec[doy] <- 0.0}
# dval_prec[doy] <- mval_prec[moy] / ndaymonth[moy] #no generator
}
# Alternative: equal distribution of rain for fixed number of wet days
# prob <- prob_rain[moy] + prob
# if (prob.ge.1.0) then
# dval_prec[doy] <- mprec[moy]
# prob <- prob-1.0
# } else {
# dval_prec[doy] <- 0.0
# prob <- prob
# }
}
return( dval_prec )
}
#' Interpolates monthly to daily values
#'
#' Implements different methods to interpolate from monthly to daily values,
#' including fitting a polynomial.
#'
#' @param mval A vector of twelve numeric values for monthly values.
#' @param method A character string specifying the method for interpolation.
#' Defaults to \code{"polynom"} for using a polynomial.
#' @param mval_prev The monthly value of the month before the twelve months for
#' which values are provided by argument \code{mval}.
#' @param mval_next The monthly value of the month after the twelve months for
#' which values are provided by argument \code{mval}.
#' @param leapyear A logical specifying whether interpolation is done for a
#' leap year (with 366 days).
#'
#' @return A named list of data frames (tibbles) containing
#' input data for each site is returned.
#'
monthly2daily <- function(
mval,
method="polynom",
mval_prev=mval[nmonth],
mval_next=mval[1],
leapyear=FALSE
) {
# mval <- 20*sin( seq(0, 2*pi, 2*pi/11)-0.5*pi)
# mval_prev <- mval[12]
# mval_next <- mval[1]
if (leapyear){
ndaymonth <- c(31,29,31,30,31,30,31,31,30,31,30,31)
} else {
ndaymonth <- c(31,28,31,30,31,30,31,31,30,31,30,31)
}
nmonth <- length(ndaymonth)
dval <- rep(NA,sum(ndaymonth))
if (method=="polynom"){
# Starting conditons of december in previous year
startt <- -30.5 # midpoint between Nov-Dec of previous year
endt <- 0.5 # midpoint between Dec-Jan of this year
dt <- 31.0 # number of Dec days
lastmonthtemp <- mval_prev # Dec mean temperature
day <- 0 # initialisation of this years days
for (month in 1:nmonth){
dtold <- dt
dt <- (ndaymonth[month])
startt <- endt
endt <- endt + dt
if (month<nmonth) {
dtnew <- (ndaymonth[month+1])
nextmonthtemp <- mval[month+1]
} else {
dtnew <- (ndaymonth[1])
nextmonthtemp <- mval_next
}
starttemp <- (mval[month]*dt+lastmonthtemp*dtold)/(dt+dtold)
endtemp <- (nextmonthtemp*dtnew+mval[month]*dt)/(dtnew+dt)
deltatemp <- endtemp-starttemp
# calculate vars for a,b,c coefficients in polynom y <- ax^2 +bx + c
d2t <- endt^2.0 - startt^2.0
d3t <- endt^3.0 - startt^3.0
# Take a sheet of paper and try solve the polynom,
# well here is the outcome
polya <- (mval[month]*dt - deltatemp*d2t/dt/2.0 -
starttemp*dt + deltatemp*startt) /
(d3t/3.0 - d2t^2.0/dt/2.0 - dt*startt^2.0 + startt*d2t)
polyb <- deltatemp/dt - polya*(startt+endt)
polyc <- starttemp - polya*startt^2.0 - polyb*startt
# calculate daily values with the polynom function
for (d in 1:ndaymonth[month]) {
day <- day + 1
dval[day] <- polya*(day)^2.0 + polyb*(day) + polyc
}
lastmonthtemp <- mval[month]
}
# calculate monthly means after interpolation -
# not absolutely identical to input
mtempint <- rep(NA,nmonth)
day <- 0
for (m in 1:nmonth){
mtempint[m] <- 0.0
for (d in 1:ndaymonth[m]){
day <- day + 1
mtempint[m] <- mtempint[m]+dval[day]/(ndaymonth[m])
}
}
} else if (method=="step"){
dval[] <- rep( mval, times=ndaymonth )
} else {
print( "Method (2nd argument) not valid." )
}
return(dval)
}
# fills gaps (NAs) by (1.) linear interpolation, (2.)
# extending first/last to head/tail
fill_gaps <- function( vec, is.prec = FALSE ){
xvals <- seq(length(vec))
if ( is.prec ){
# assume precipitation = 0 where missing
if (any(is.na(vec))){
vec[ is.na(vec) ] <- 0.0
}
} else {
# linear approximation
if ( any(is.na(vec)) && any(!is.na(vec)) ){
vec <- stats::approx( xvals, vec, xout=xvals )$y
}
# extend to missing in head and tail
if ( any(is.na(vec)) && any(!is.na(vec)) ){
for (idx in seq(length(vec))){
if ( any( is.na( utils::tail( vec, n=idx ) ) ) &&
any( !is.na(utils::tail( vec, n=(idx+1) ) ) ) ){
if (length(vec[ (length(vec)-idx) ])>0){
vec[ (length(vec)-idx+1):length(vec) ] <- vec[ (length(vec)-idx) ]
} else {
vec[ (length(vec)-idx+1):length(vec) ] <- NA
}
break
}
}
}
}
return( vec )
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.