#' Formatting PEP725 data
#'
#' Some pre-processing steps are required as downloading the PEP725.
#' So, for the species of interest download the separate zipped files.
#' Unzip all files and put the respective scientific data in one folder.
#'
#' The routine requires E-OBS climate, which can be downloaded from
#' the E-OBS website and should be put unzipped in a single folder.
#' (http://www.ecad.eu/download/ensembles/ensembles.php).
#'
#' BBCH list: https://www.reterurale.it/downloads/BBCH_engl_2001.pdf
#'
#' @param pep_path path to the PEP725 data (species files only)
#' @param eobs_path path to regular grid E-OBS data.
#' @param bbch which phenophase (bbch) to use (default = 11)
#' @param species species to select from merged PEP725 file
#' @param offset offset of the time series in DOY (default = 264, sept 21)
#' @param count minimum number of acquisitions per location
#' @param resolution resolution of the E-OBS data (0.25 or 0.5, default = 0.25)
#' @param pep_data prefiltered subset of the merged PEP725 data obtained through
#' merge_PEP725(), else the complete data contained in pep_path will be used
#' @return returns a nested list of site locations, their respective
#' phenological metrics and matching environmental data as extracted from
#' the E-OBS product (corrected for altitude using a lapse rate of 5C/km.)
#' @keywords phenology, model, preprocessing
#' @export
#' @examples
#'
#' # run with default settings
#' # looks for transition date files derived
#' # through phenocamr in your home directory
#' # change the path to match your setup
#' \dontrun{
#' phenocam_data = pr_fm_pep725(pep_path = "~/pep_data/",
#' eobs_path = "~/eobs_data/")
#'}
pr_fm_pep725 <- function(
pep_path = tempdir(),
eobs_path = tempdir(),
bbch = "11",
species = NULL,
offset = 264,
count = 60,
resolution = 0.25,
pep_data
) {
# helper function to format the data
# for a given site
format_data <- function(site = site,
offset = offset){
# subset the data based upon the year to evaluate
# necessary for E-OBS data subsetting
pep_subset <- pep_data[which(pep_data$pep_id == site),]
# create spatial object with
points <- sp::SpatialPoints(cbind(pep_subset$lon[1],
pep_subset$lat[1]),
proj4string = sp::CRS(raster::projection(eobs_data[[1]])))
# subset raster data (only once, then replicate data for speed)
# [subsetting the raster data is a bottleneck]
tmean <- raster::extract(eobs_data[[1]], points)
tmin <- raster::extract(eobs_data[[4]], points)
tmax <- raster::extract(eobs_data[[5]], points)
# close to water bodies raster cells can be empty and return
# NA, in this case set the output to NULL and remove these
# values later on
if (all(is.na(tmean))){
return(NULL)
} else {
precipitation <- raster::extract(eobs_data[[2]], points)
tmean <- matrix(rep(tmean, nrow(pep_subset)),
length(tmean),
nrow(pep_subset))
tmin <- matrix(rep(tmin, nrow(pep_subset)),
length(tmean),
nrow(pep_subset))
tmax <- matrix(rep(tmax, nrow(pep_subset)),
length(tmean),
nrow(pep_subset))
precipitation <- matrix(rep(precipitation, nrow(pep_subset)),
length(precipitation),
nrow(pep_subset))
}
# calculate long term mean temperature
ltm <- as.vector(unlist(by(tmean[which(years >= 1980),1],
INDICES = yday[which(years >= 1980)],
mean,
na.rm = TRUE)))[1:365]
# calculate lapse rate and correct temperatures
lapse_rate <- as.vector(
(raster::extract(eobs_data[[3]],
points[1]) - pep_subset$alt[1]) * 0.005)
tmean <- rbind(tmean + lapse_rate, pep_subset$year)
tmin <- rbind(tmin + lapse_rate, pep_subset$year)
tmax <- rbind(tmax + lapse_rate, pep_subset$year)
ltm <- ltm + lapse_rate
# now for all years create subsets for temperature
# and precipitation (wrap this in a function and a do call)
Ti <- apply(tmean, 2, function(x){
layers <- which((years == (x[length(x)] - 1) & yday >= offset) |
(years == x[length(x)] & yday < offset))[1:365]
return(x[layers])
})
Tmini <- apply(tmin, 2, function(x){
layers <- which((years == (x[length(x)] - 1) & yday >= offset) |
(years == x[length(x)] & yday < offset))[1:365]
return(x[layers])
})
Tmaxi <- apply(tmax, 2, function(x){
layers <- which((years == (x[length(x)] - 1) & yday >= offset) |
(years == x[length(x)] & yday < offset))[1:365]
return(x[layers])
})
Pi <- apply(rbind(precipitation, pep_subset$year), 2, function(x){
layers <- which((years == (x[length(x)] - 1) & yday >= offset) |
(years == x[length(x)] & yday < offset))[1:365]
return(x[layers])
})
# shift data when offset is < 365
if (offset < 365){
doy_neg <- c((offset - 366):-1,1:(offset - 1))
doy <- c(offset:365,1:(offset - 1))
} else {
doy <- doy_neg <- 1:365
}
# calculate daylength using the doy
l <- ncol(Ti)
Li <- daylength(doy = doy, latitude = pep_subset$lat[1])
Li <- matrix(rep(Li,l),length(Li),l)
# recreate the validation data structure (new format)
# but with concatted data
data <- list("site" = site,
"location" = c(pep_subset$lat[1], pep_subset$lon[1]),
"doy" = doy_neg,
"ltm" = ltm,
"transition_dates" = pep_subset$day,
"year" = pep_subset$year,
"Ti" = Ti,
"Tmini" = Tmini,
"Tmaxi" = Tmaxi,
"Li" = Li,
"Pi" = Pi,
"VPDi" = NULL,
"georeferencing" = NULL
)
# return the list
return(data)
}
message("* Merging and cleaning PEP725 data files in: \n")
message(sprintf(" %s\n", pep_path))
# User may provide prefiltered merge_PEP725 dataset
if (missing(pep_data)){
pep_data <- pr_merge_pep725(path = pep_path)
}
# removing out of E-OBS climate data range
# PEP725 observations
message(" |_ removing data out of range of the E-OBS climate data \n")
pep_data <- pep_data[which(pep_data$year >= 1950 &
pep_data$year <= max(pep_data$year)),]
# filtering on species
if (is.null(species)){
message(" |_ including all species \n")
} else {
message(sprintf(" |_ selecting species: %s \n", species))
pep_data <- pep_data[which(pep_data$species == species),]
}
# filtering based upon bbch value (phenophase)
message(sprintf(" |_ selecting phenophase: %s \n", bbch))
pep_data <- pep_data[which(pep_data$bbch == bbch),]
message(
sprintf(" |_ excluding sites with: < %s site years of observations \n",
count)
)
# count number of observations for each
# unique pep725 site, this does not screen
# for them being continuous or not !
sites <- unique(pep_data$pep_id)
counts <- unlist(lapply(sites,function(x){
length(which(pep_data$pep_id == x))
})
)
# make a selection based upon minimum number
# of observations required per site
selection <- sites[which(counts >= count)]
pep_data <- pep_data[pep_data$pep_id %in% selection,]
# sanity check on what remains
if (nrow(pep_data) == 0 ){
stop("no data remaining after screening for the requested criteria
check your species name and observation count restrictions!")
}
# get remaining unique years and sites
sites <- unique(pep_data$pep_id)
years <- unique(pep_data$year)
# loading E-OBS data for subsetting
message(
sprintf("* Extracting E-OBS climatology for %s sites\n ", length(sites))
)
# read E-OBS data
eobs_data <- lapply(c("tg","rr","elev","tn","tx"),function(x){
filename <- list.files(eobs_path,
sprintf("%s_ens_mean_%sdeg_reg[^/]*\\.nc",x,resolution))
# if the file exist use the local file
if (length(filename)>0){
r <- raster::brick(sprintf("%s/%s", eobs_path, filename))
return(r)
} else {
stop('No E-OBS files found in the referred path !')
}
})
# extract the yday and year strings and convert to numbers
yday <- as.numeric(format(as.Date(eobs_data[[1]]@z$Date),"%j"))
years <- as.numeric(format(as.Date(eobs_data[[1]]@z$Date),"%Y"))
# track progress
pb <- utils::txtProgressBar(min = 0, max = length(sites), style = 3)
env <- environment()
i <- 0
# process data
validation_data <- lapply(sites, function(x) {
utils::setTxtProgressBar(pb, i + 1)
assign("i", i+1, envir = env)
format_data(site = x,
offset = offset)
})
# close progress bar
close(pb)
# add proper list names (this should be the same name as the
# site name added by the format_data() helper function)
names(validation_data) <- sites
# assign a class for post-processing
class(validation_data) <- "phenor_time_series_data"
# screen for NULL values (climate grid cells with NA - near water bodies)
validation_data <- validation_data[!unlist(lapply(validation_data,is.null))]
# return the formatted data
return(validation_data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.