#' Formatting USA-NPN data
#'
#' Combines data into a format which can be ingested
#' by the optimization routines. Data is aggregated on an
#' individual_id basis, rather than on the user defined site_id.
#'
#' @param data an USA-NPN data frame as returned by download_npn()
#' @param phenophase a phenophase to include as validation statistic
#' @param offset offset of the time series in DOY (default = 264, sept 21)
#' @param internal return data as structured list to R workspace or write
#' to RDS file (default = TRUE)
#' @param path path where to save the generated data file
#' @keywords phenology, model, preprocessing
#' @export
#' @examples
#'
#' \dontrun{
#' npn_data <- pr_fm_npn()
#'}
pr_fm_npn <- function(
data,
phenophase = 371,
offset = 264,
path = tempdir(),
internal = TRUE
){
# check
if (missing(data)){
stop("No data provided")
}
# helper function to process the data
format_data <- function(site){
# subset the original data
data_subset <- data[data$individual_id == site &
data$phenophase_id == phenophase,]
# if nothing in subset return NULL
if(nrow(data_subset)==0){
return(NULL)
}
# grab the location of the site by subsetting the
lat <- unique(data_subset$latitude)
lon <- unique(data_subset$longitude)
# get mean transition dates per year by site
mean_doy <- by(data_subset$first_yes_doy,
INDICES = data_subset$first_yes_year,
mean,
na.rm = TRUE)
transition_dates <- round(as.vector(mean_doy))
# get years matching transition dates
years <- as.numeric(names(mean_doy))
# download daymet data for a given site
daymet_data <- try(daymetr::download_daymet(
site = site,
lat = lat,
lon = lon,
start = 1980,
end = end,
internal = TRUE,
silent = TRUE
)$data)
# trap sites outside daymet coverage
if (inherits(daymet_data,"try-error")){
#cat(sprintf('Site: %s outside Daymet coverage will be pruned!\r\r', site))
return(NULL)
}
# calculate the mean daily temperature
daymet_data$tmean <- (daymet_data$tmax..deg.c. + daymet_data$tmin..deg.c.)/2
# calculate the long term daily mean temperature
# and realign it so the first day will be sept 21th (doy 264)
# and the matching DOY vector
ltm <- as.vector(by(daymet_data$tmean,
INDICES = list(daymet_data$yday),
mean,
na.rm = TRUE))
# shift data when offset is < 365
if (offset < 365){
ltm <- c(ltm[offset:365],ltm[1:(offset - 1)])
doy_neg <- c((offset - 366):-1,1:(offset - 1))
doy <- c(offset:365,1:(offset - 1))
} else {
doy <- doy_neg <- 1:365
}
# create output matrix (holding mean temp.)
tmean <- matrix(NA,
nrow = 365,
ncol = length(years))
# create output matrix (holding min temp.)
tmin <- matrix(NA,
nrow = 365,
ncol = length(years))
# create output matrix (holding max temp.)
tmax <- matrix(NA,
nrow = 365,
ncol = length(years))
# create output matrix (holding vpd)
vpd <- matrix(NA,
nrow = 365,
ncol = length(years))
# create output matrix (holding precip)
precip <- matrix(NA,
nrow = 365,
ncol = length(years))
# create a matrix containing the mean temperature between
# sept 21th in the previous year until sept 21th in
# the current year (make this a function parameter)
# for the default offset, if offset is 365 or larger
# use the current year only
for (j in 1:length(years)) {
if (offset < 365) {
loc <- which((daymet_data$year == (years[j] - 1) &
daymet_data$yday >= offset) |
(daymet_data$year == years[j] &
daymet_data$yday < offset))
} else {
loc <- which(daymet_data$year == years[j])
}
tmean[, j] <- daymet_data$tmean[loc]
tmin[, j] <- daymet_data$tmin..deg.c.[loc]
tmax[, j] <- daymet_data$tmax..deg.c.[loc]
precip[, j] <- daymet_data$prcp..mm.day.[loc]
vpd[, j] <- daymet_data$vp..Pa.[loc]
}
# calculate daylength
l <- ncol(tmean)
Li <- daylength(doy = doy, latitude = lat)
Li <- matrix(rep(Li,l),length(Li),l)
# format and return the data
return(list("site" = as.character(site),
"location" = c(lat,lon),
"doy" = doy_neg,
"ltm" = ltm,
"transition_dates" = transition_dates,
"year" = years,
"Ti" = as.matrix(tmean),
"Tmini" = as.matrix(tmin),
"Tmaxi" = as.matrix(tmax),
"Li" = Li,
"Pi" = as.matrix(precip),
"VPDi" = as.matrix(vpd),
"georeferencing" = NULL
))
}
# query max year as available through Daymet, lags by a year so
# subtract 1 year by default. If download fails subtract another year
end <- as.numeric(format(as.Date(Sys.Date()),"%Y")) - 1
daymet_test <- try(daymetr::download_daymet(
start = end,
end = end,
internal = TRUE,
silent = TRUE
))
if (inherits(daymet_test,"try-error")){
end = end - 1
}
# get individual sites form the filenames
# a "site" in this case is defined as a single
# individual (or location) for which recurrent data is observed
sites <- unique(data$individual_id)
# track progress
pb <- utils::txtProgressBar(min = 0, max = length(sites), style = 3)
env <- environment()
i <- 0
cat(sprintf('Processing %s individuals (sites)\n', length(sites)))
# process data
validation_data <- lapply(sites, function(x) {
utils::setTxtProgressBar(pb, i + 1)
assign("i", i+1, envir = env)
format_data(site = x)
})
# close progress bar
close(pb)
# rename list variables using the proper site names
names(validation_data) <- sites
# assign a class for post-processing
class(validation_data) <- "phenor_time_series_data"
# remove out of daymet range sites (prune sites)
na_loc <- which(is.na(validation_data))
if (length(na_loc) != 0){
validation_data <- validation_data[-na_loc]
}
# return the formatted data
# either internally or saved as an rds (binary R data file)
if (internal){
return(validation_data)
} else {
saveRDS(validation_data,
file = sprintf("%s/phenor_npn_data_%s_%s.rds",
path,
phenophase,
offset))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.