#' Format PhenoCam data
#'
#' Preprocessing of all PhenoCam data into a format which can be ingested
#' by the optimization routines etc. the original nested list is flattened
#' for speed.
#'
#' In addition to the normal transition dates the offset in julian days
#' to the previous phenophase is included as transition_dates_prior.
#'
#' @param path a path to 1 or 3-day PhenoCam transition date estimates
#' (no validation checks will be done, so mixed files will lead to
#' mixed or duplicated results!)
#' @param direction The phenophase considered, "rising" for spring and
#' "falling" for autumn. When using fall the offset should be 365.
#' (default = rising)
#' @param gcc_value The gcc time series from which the phenophase estimates
#' were derived either (default = gcc_90)
#' @param threshold Threshold (% of seasonal amplitude) used in estimating
#' the phenophases (10, 25, 50) (default = 50)
#' @param offset offset of the time series in DOY (default = 264, sept 21)
#' @param spread maximum distance between CI of phenophase (default = 30)
#' increase this value to include more uncertain phenophase estimates
#' @param internal return data as structured list to R workspace or write
#' to RDS file (default = TRUE)
#' @keywords phenology, model, preprocessing
#' @export
#' @examples
#'
#' # run with default settings
#' # looks for transition date files derived
#' # through phenocamr in your home directory
#' # change the path to match your setup
#' \dontrun{
#' phenocam_data = pr_fm_phenocam()
#'}
pr_fm_phenocam <- function(
path = tempdir(),
direction = "rising",
gcc_value = "gcc_90",
threshold = 50,
offset = 264,
spread = 30,
internal = TRUE
){
data_selection <- function(
data,
direction,
gcc_value,
threshold,
spread,
reverse = FALSE){
if(reverse){
if(direction == "rising"){
direction <- "falling"
} else {
direction <- "rising"
}
}
# throw out all data but the selected gcc value
data <- data[data$direction == direction &
data$gcc_value == gcc_value,
grep(threshold,names(data))]
transition <- as.Date(data[,grep(sprintf("^transition_%s$",threshold),
names(data))])
lower <- as.Date(data[,grep(sprintf("*%s_lower*",threshold),names(data))])
upper <- as.Date(data[,grep(sprintf("*%s_upper*",threshold),names(data))])
# kick out transition dates with large uncertainties (> 30 days)
# these are most likely false (consider it to be a parameter)
transition <- transition[abs(lower - upper) < spread]
# return transition dates selected and screened
return(as.Date(transition))
}
# helper function to process the data
format_data <- function(site,
transition_files,
path,
end,
metadata){
# for all sites merge the transition dates if there are multiple files
# after merging, download the corresponding daymet data and create
# the parts of the final structured list containing data for further
# processing
transition_files_full <- paste(path, transition_files,sep = "/")
# get individual sites from the filenames
sites <- unlist(lapply(strsplit(transition_files,"_"),"[[",1))
files <- transition_files_full[which(sites == site)]
# merge all transition date data
data <- do.call("rbind", lapply(files, function(fn){
data.frame(utils::read.table(fn, header = TRUE, sep = ",") )
}))
# if not transition dates exist, return NULL
if(!any(grepl(threshold,names(data)))){
return(NULL)
}
data_subset <- data_selection(data = data,
direction = direction,
gcc_value = "gcc_90",
spread = spread,
threshold = threshold)
data_subset_prior <- data_selection(data = data,
direction = direction,
gcc_value = "gcc_90",
threshold = threshold,
spread = spread,
reverse = TRUE)
transition <- do.call("rbind", lapply(data_subset, function(x){
ss <- data_subset_prior[which(as.Date(data_subset_prior) < as.Date(x))]
loc <- as.numeric(as.Date(ss) - as.Date(x))
if(length(loc) == 0){
data.frame(transition = x, previous = NA)
} else {
date <- ss[which(loc == max(loc, na.rm = TRUE))]
data.frame(transition = x, previous = date)
}
}))
# rbind fucks up the formatting if an NA is present
# it shouldn't but it does nontheless
transition$previous <- as.Date(transition$previous, origin = "1970-01-01")
transition$year <- as.numeric(format(transition$transition, "%Y"))
# grab the location of the site by subsetting the
site_info <- metadata[which(metadata$site == site),]
lat <- site_info$lat
lon <- site_info$lon
# download daymet data for a given site
daymet_data <- try(daymetr::download_daymet(
site = site,
lat = lat,
lon = lon,
start = 1980,
end = end,
internal = TRUE,
silent = TRUE
)$data,
silent = TRUE)
# trap sites outside daymet coverage
if (inherits(daymet_data,"try-error")){
return(NULL)
}
# calculate the mean daily temperature
daymet_data$tmean <- (daymet_data$tmax..deg.c. + daymet_data$tmin..deg.c.)/2
# calculate saturated VP (based on temp)
vps <- 0.611 * exp((17.502*daymet_data$tmean)/(daymet_data$tmean+240.97))
#calculate VPD
daymet_data$vpd <- vps - (daymet_data$vp..Pa./1000)
# calculate the long term daily mean temperature
# and realign it so the first day will be sept 21th (doy 264)
# and the matching DOY vector
ltm <- as.vector(by(daymet_data$tmean,
INDICES = list(daymet_data$yday),
mean,
na.rm = TRUE))
# shift data when offset is < 365
if (offset < 365){
ltm = c(ltm[offset:365],ltm[1:(offset - 1)])
doy_neg = c((offset - 366): -1, 1:(offset - 1))
doy = c(offset:365,1:(offset - 1))
} else {
doy = doy_neg = 1:365
}
# how many years to process
years <- unique(transition$year)
# only select the first instance of a phenophase_doy
# currently the model frameworks do not handle multiple cycles
phenophase <- do.call("c",lapply(years, function(x) {
transition$transition[which(transition$year == x)[1]]
}))
phenophase_prior <- do.call("c",lapply(years, function(x) {
transition$previous[which(transition$year == x)[1]]
}))
# finally select all the transition dates for model validation
phenophase_years <- as.numeric(format(phenophase,"%Y"))
phenophase_doy <- as.numeric(format(phenophase,"%j"))
# finally select all the transition dates for model validation
# of the previous cycle, if rising this is falling or vise versa
phenophase_doy_prior <- as.numeric(phenophase - phenophase_prior)
# create output matrix (holding mean temp.)
tmean <- matrix(NA,
nrow = 365,
ncol = length(years))
# create output matrix (holding min temp.)
tmin <- matrix(NA,
nrow = 365,
ncol = length(years))
# create output matrix (holding max temp.)
tmax <- matrix(NA,
nrow = 365,
ncol = length(years))
# create output matrix (holding vpd)
vpd <- matrix(NA,
nrow = 365,
ncol = length(years))
# create output matrix (holding precip)
precip <- matrix(NA,
nrow = 365,
ncol = length(years))
# create a matrix containing the mean temperature between
# sept 21th in the previous year until sept 21th in
# the current year (make this a function parameter)
# for the default offset, if offset is 365 or larger
# use the current year only
for (j in 1:length(years)) {
if (offset < 365) {
loc <- which((daymet_data$year == (years[j] - 1) &
daymet_data$yday >= offset) |
(daymet_data$year == years[j] &
daymet_data$yday < offset))
} else {
loc <- which(daymet_data$year == years[j])
}
tmean[, j] <- daymet_data$tmean[loc]
tmin[, j] <- daymet_data$tmin..deg.c.[loc]
tmax[, j] <- daymet_data$tmax..deg.c.[loc]
precip[, j] <- daymet_data$prcp..mm.day.[loc]
vpd[, j] <- daymet_data$vpd[loc]
}
# calculate daylength
l <- ncol(tmean)
Li <- daylength(doy = doy, latitude = lat)
Li <- matrix(rep(Li,l),length(Li),l)
# format and return the data
return(list("site" = site,
"location" = c(lat,lon),
"doy" = doy_neg,
"ltm" = ltm,
"transition_dates" = phenophase_doy,
"transition_dates_prior" = phenophase_doy_prior,
"year" = phenophase_years,
"Ti" = as.matrix(tmean),
"Tmini" = as.matrix(tmin),
"Tmaxi" = as.matrix(tmax),
"Li" = Li,
"Pi" = as.matrix(precip),
"VPDi" = as.matrix(vpd),
"georeferencing" = NULL
))
}
# query site list with metadata from the phenocam servers
metadata <- jsonlite::fromJSON("https://phenocam.nau.edu/webcam/network/siteinfo/")
# query max year as available through Daymet, lags by a year so
# subtract 1 year by default. If download fails subtract another year
end <- as.numeric(format(as.Date(Sys.Date()),"%Y")) - 1
daymet_test <- try(daymetr::download_daymet(
start = end,
end = end,
internal = TRUE,
silent = TRUE
))
if (inherits(daymet_test,"try-error")){
end <- end - 1
}
# list all files in the referred path
transition_files <- list.files(
path,
"*_transition_dates.csv")
# warning on no transition files
if(length(transition_files) == 0){
stop("No transition files found in this path, check your path!")
}
# get individual sites form the filenames
sites <- unique(unlist(lapply(strsplit(transition_files,"_"),"[[",1)))
# track progress
message(sprintf('Processing %s sites\n', length(sites)))
pb <- utils::txtProgressBar(min = 0, max = length(sites), style = 3)
env <- environment()
i <- 0
# process data
validation_data <- lapply(sites, function(x) {
utils::setTxtProgressBar(pb, i + 1)
assign("i", i + 1, envir = env)
format_data(site = x,
transition_files = transition_files,
path = path,
end = end,
metadata = metadata)
})
# close progress bar
close(pb)
# rename list variables using the proper site names
names(validation_data) <- sites
# assign a class for post-processing
class(validation_data) <- "phenor_time_series_data"
# remove out of daymet range sites (prune sites)
na_loc <- which(is.na(validation_data))
if (length(na_loc) != 0){
validation_data = validation_data[-na_loc]
}
# return the formatted data
# either internally or saved as an rds (binary R data file)
if (internal){
return(validation_data)
} else {
saveRDS(validation_data,
file = sprintf("%s/phenor_phenocam_data_%s_%s_%s_%s.rds",
path,
direction,
gcc_value,
threshold,
offset))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.