#' This function appends annual dataframes produced via FS_fProcessingThruREddyProc (and package REddyProc)
#' Here, annual files are stacked and joined to a continuous (half-)houry time series for the entire record of the site, with NAs assigned to gaps.
#' @export
#' @title Append annual REddyProc output files
#' @param inpath character string, navigate to the directory containing REddyProc output files (must contain the following subdirectory names: '1_after-night-part/' and '2_after-day-part/')
#' @param outpath character string, set the directory by which appended dataframes will be exported (must contain the following subdirectory names: '1_after-night-part/' and '2_after-day-part/')
#' @param site.meta dataframe, site metadata file contained in FluxSynthU
#' @param export.files logical, if TRUE (default) exports dataframes as text files in the outpath directory.
#' @importFrom stringr str_remove str_detect str_remove
#' @importFrom patchwork plot_annotation
# Author: Kenny Smith
# Version notes -----------------------------------------------------------
# v5.0.KS
# 201005
# Major overhaul to ensure more flexible coding. Functionally there is nothing new here, but the code is more organized and relies on less hard-coding
FS_fAppendREddyProcOutputs <- function(inpath, outpath, site.meta, export.files = TRUE) {
if (!dir.exists(outpath)) {
message(paste('Warning! No such directory exists!'))
x <- askYesNo(paste("Would you like to create a new directory in: ",outpath))
if (x) {
dir.create(file.path(paste0(outpath, "1_after-night-part/")), showWarnings = FALSE, recursive = TRUE)
dir.create(file.path(paste0(outpath, "2_after-day-part/")), showWarnings = FALSE, recursive = TRUE)
}
}
## Timestamp handling
# Create the 'ideal' dataframe first (including leap days), then use left_join to unite the two dfs.
# This will append the relevant info you need (GPP, PPFD) to a full-time series, leaving NAs for DoY = 366
# Create a vector of half-hourly timestamps over a 30-year window
# NOTE: By default, uses the timezone of your machine. This shouldn't be an issue, because we're just using the POSIX string to match Years, Hours, and Min from dat
halfhours_null <- seq(as.POSIXct("1990-01-01 00:00:00", tz = "UTC"), as.POSIXct("2040-01-01 00:00:00", tz = "UTC"), by="30 mins")
# Convert timestamp vector into a dataframe, and then parse date into Year, DoY, and Hour
date.ref <- data.frame(halfhours_null)
date.ref <- transform(date.ref,
Year = lubridate::year(halfhours_null),
DoY = lubridate::yday(halfhours_null),
Hour = as.numeric(format(halfhours_null, format = "%H")) + (lubridate::minute(halfhours_null)/60))
# remove unnecessary objects
rm(halfhours_null)
## File Import
# find all raw data files (including rejected sites)
all.files <- list.files(paste(inpath), recursive = TRUE)
# extract paths for FLUXNET2015 and AMERIFLUX data
fluxnet.files <- all.files[grep(pattern = '\\FLX', all.files)]
amf.files <- all.files[grep(pattern = '\\AMF', all.files)]
neon.files <- all.files[grep(pattern = '\\NEO', all.files)]
# concatenate the flux datasets
parent.files <- c(fluxnet.files, amf.files, neon.files)
# separate into night vs. day partitioned
night.parent.files <- parent.files[grep(paste("night"), parent.files)]
day.parent.files <- parent.files[grep(paste("day"), parent.files)]
night.files <- sapply(strsplit(night.parent.files, "/\\s*"), tail, 1)
day.files <- sapply(strsplit(day.parent.files, "/\\s*"), tail, 1)
files <- c(night.files, day.files)
# list of all available sites with database type (eg, 'AMF_CA-Qcu')
db.and.sites <- unique(substr(x = files, start = 1, stop = 10))
# matching database for each
all.site.db <- substr(x = files, start = 1,stop = 3)
neon_index <- all.site.db %in% "NEO"
# all available site names
flx.amf.site.names <- substr(x = files[!neon_index], start = 5,stop = 10)
neon.site.names <- substr(x = files[neon_index], start = 5,stop = 8)
all.site.names <- c(flx.amf.site.names, neon.site.names)
# update db.and.sites to fix NEON names
db.and.sites[neon_index] <- gsub("_N", "", db.and.sites[neon_index])
# set a description for your routine:
routine_description <- "Append annual REddyProc output files into single dataframe"
# select one or more sites
choices <- fSiteSelect(db.and.sites, descrip = routine_description)
if (length(choices[["AMF"]]) > 0) {
AMF_selected <- paste0("AMF_", choices[["AMF"]])
} else {AMF_selected <- NULL}
if (length(choices[["FLX"]]) > 0) {
FLX_selected <- paste0("FLX_", choices[["FLX"]])
} else {FLX_selected <- NULL}
if (length(choices[["NEO"]]) > 0) {
NEO_selected <- paste0("NEO_", choices[["NEO"]])
} else {NEO_selected <- NULL}
# Includes database name in site selection
selected.db_sites <- c(AMF_selected, FLX_selected, NEO_selected)
# Omits database name in site selection
selected.sites <- c(choices[["AMF"]], choices[["FLX"]], choices[["NEO"]])
for (i in 1:length(selected.db_sites)) {
# The current site (and database)
db_site <- selected.db_sites[i]
# Extract the database name and site name from the 'db_site' string
db <- substr(x = db_site, start = 1, stop = 3)
site <- str_remove(db_site, paste0(db, "_"))
# Get site metadata
site.info <- fGetSiteInfo(dat = site.meta,
site = site,
db = db,
site_col = "site_ID",
db_col = "database",
site_name_col = "site_Name",
start_yr_col = "start_year",
end_yr_col = "end_year",
exclude_yr_col = "exclude_years",
lat_col = "lat",
long_col = "long",
country_col = "country",
IGBP_col = "IGBP",
elev_col = "elev_orig",
MAT_col = "MAT_orig",
MAP_col = "MAP_orig")
message('')
message('Processing site (', min(which(selected.sites %in% site)), '/',
length(selected.sites), '):\t', site, ', ', db, " (", site.info$country, ")")
message('\t\tName: ', site.info$info)
message('\t\tlat: ', round(site.info$lat,3), '\tlong: ', round(site.info$long,3))
message('\t\tMAT: ', round(site.info$MAT,1), '°C\tMAP: ', round(site.info$MAP,0), 'mm')
message('\t\tForest Type: ', site.info$IGBP)
message('')
message(' ', 'Years of record: ', site.info$start_year, '-', site.info$end_year)
message('')
message(' ', 'Navigating to data directory: ')
message(' ', paste0(inpath))
site.files <- parent.files[grep(paste(site, collapse="|"), parent.files)]
years_of_record <- as.numeric(gsub(".*-(\\d{4})-.+", "\\1", site.files)) ## Looks for 4-char numeric strings, followed by a hyphen
start_year <- years_of_record[1]
end_year <- tail(years_of_record, 1)
# Store all years for a given site into a list
dat.list <- list()
message('')
message(' ', 'Storing REddyProc output data in a list...')
message(' ', 'Data imported from:')
message(' ', paste0(inpath))
# Write a small loop that will combine your two datasets (night and day)
# If either doesnt exist, this can't work. So return an error.
# Assign the two partitioning methods to an object
partition.method <- c("night", "day")
for (j in 1:length(partition.method)) {
method <- partition.method[j]
if (method == 'night') {
message('')
message(' ', 'Stacking REddyProc output data: night method')
message('')
} else if (method == 'day') {
message('')
message(' ', 'Stacking REddyProc output data: day method')
message('')
}
# create progress bar
pb <- txtProgressBar(min = start_year, max = end_year, style = 3)
for (process_year in start_year:end_year) {
tryCatch(
expr = {
Sys.sleep(0.1)
#+++ Load data with 1 header and 1 unit row from (tab-delimited) text file
if (method == 'night') {
infile <- sprintf("%s_%s-%d-after-night-part.txt", db, site, process_year)
subdirectory <- "1_after-night-part/"
} else if (method == 'day') {
infile <- sprintf("%s_%s-%d-after-day-part.txt", db, site, process_year)
subdirectory <- "2_after-day-part/"
}
dat.list[[process_year]] <- read.table(paste(inpath, subdirectory, infile, sep=""), header=TRUE)[-1,]
index <- sapply(dat.list[[process_year]], is.factor)
dat.list[[process_year]][index] <- lapply(dat.list[[process_year]][index], function(x) as.numeric(as.character(x)))
# update progress bar
setTxtProgressBar(pb, process_year)},
# If loop fails for a particular year,
error = function(e){
message("")
message(" * NO data available for process year: ", process_year)
message(" (NAs will be assigned to data during missing years)")
message("")
print(e)
}
)
}
close(pb)
message('')
message(' ', 'Appending years into dataframe:')
message(' ', 'NOTE: As of 08/26/2020, the following variables are being extracted from output files:')
message(' ', '\tNEE_U50, GPP_U50, Reco_U50, PPFD, SW_IN, Tair, Tsoil, VPD')
if (method == 'night') {
dat <- dat.list %>%
bind_rows(.id = "source") %>%
group_by(source) %>%
select(source, Year, DoY, Hour,
NEE_U50_f, GPP_U50_f, Reco_U50, PPFD_f,
Rg_f, Tair_f, Tsoil_f, VPD_f) %>% # Here you can add other variables of interest
rename('NEE_NT_U50_gf'='NEE_U50_f', 'GPP_NT_U50_gf'='GPP_U50_f', 'Reco_NT_U50_gf'='Reco_U50', 'PPFD_NT_gf'='PPFD_f',
'SW_IN_NT_gf'='Rg_f', 'Tair_NT_gf'='Tair_f', 'Tsoil_NT_gf'='Tsoil_f', 'VPD_NT_gf'='VPD_f')
} else if (method == 'day') {
dat <- dat.list %>%
bind_rows(.id = "source") %>%
group_by(source) %>%
select(source, Year, DoY, Hour,
NEE_U50_f, GPP_DT_U50, Reco_DT_U50, PPFD_f,
Rg_f, Tair_f, Tsoil_f, VPD_f) %>% # Here you can add other variables of interest
rename('NEE_DT_U50_gf'='NEE_U50_f', 'GPP_DT_U50_gf'='GPP_DT_U50', 'Reco_DT_U50_gf'='Reco_DT_U50', 'PPFD_DT_gf'='PPFD_f',
'SW_IN_DT_gf'='Rg_f', 'Tair_DT_gf'='Tair_f', 'Tsoil_DT_gf'='Tsoil_f', 'VPD_DT_gf'='VPD_f')
}
message('')
message(' ', 'Parsing datetime strings...')
# Convert dat to a dataframe, and convert all na strings (-9999) to 'NA'
dat <- as.data.frame(dat)
dat[dat == -9999] <- NA
# Append data to full year timestamps ----------------------------------------------------
df <- date.ref %>%
left_join(dat, by = c("Year", "DoY", "Hour")) %>%
filter(Year >= start_year & Year <= end_year) %>% # Constrains the dataframe to years where you have data
select(-c(source, halfhours_null))
# At this point we can be pretty darn sure that there aren't any data gaps, and since we indexed our data to the Year,DoY,Hour cols, we don't have to worry about timezones either.
## Export data
if (export.files) {
if (method == 'night') {
outfile <- sprintf('%s_%s-%d-%d-after-night-part.txt', db, site, start_year, end_year)
message('')
message(' ', 'Exporting stacked data to: ')
message(' ', paste0(outpath, '1_after-night-part/'))
} else if (method == 'day') {
outfile <- sprintf('%s_%s-%d-%d-after-day-part.txt', db, site, start_year, end_year)
message('')
message(' ', 'Exporting stacked data to: ')
message(' ', paste0(outpath, '2_after-day-part/'))
}
write.table(df, file = paste0(outpath, subdirectory, outfile),
sep = "\t", row.names = FALSE,
col.names = TRUE,
append = FALSE)
}
} # end partition loop
} # end site loop
} # end function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.