#' This routine uses a modified version of the REddyProc package to perform gap-filling and flux partitioning on AmeriFlux, FLUXNET2015, and NEON (half-)hourly flux data.
#' @export
#' @title Perform gap-filling and flux partitioning using REddyProc (modified)
#' @param method character, specify which partitioning method you'd like to use: 'night' (default), 'day', or 'both' which runs both partitioning routines
#' @param inpath character string, navigate to the directory containing REddyProc input files
#' @param outpath character string, set the directory by which gap-filled data will be exported
#' @param site.meta dataframe, site metadata file contained in FluxSynthU
#' @param batch_process logical, if TRUE, batch processes all sites within the inpath directory (defaults to FALSE).
#' @importFrom REddyProc fConvertTimeToPosix sEddyProc usGetAnnualSeasonUStarMap fWriteDataframeToFile
# Author: Kenny Smith
# Version notes -----------------------------------------------------------
# v2.1.KS
# 200826 KS
# Added minor tweaks to how site metadata is displayed
# v1.0.KS
# 200512
# FS_fProcessingThruREddyProc is a function that processes the files created from './Source/1a_RawDataProcessing_v1.0.KS.R/'
# (located in './Data/Processed data/REddyProc Inputs/') and performs gap-filling and flux partitioning using either the day or night method.
# This function executes fingerprint plots in real time, then exports gap-filled data to './Data/Processed data/REddyProc Outputs/'
FS_fProcessingThruREddyProc <- function(method = 'night', inpath, outpath, site.meta, batch_process = FALSE) {
if (!dir.exists(outpath)) {
message(paste('Warning! No such directory exists!'))
x <- askYesNo(paste("Would you like to create a new directory in: ",outpath))
if (x) {
dir.create(file.path(paste0(outpath, "1_after-night-part/")), showWarnings = FALSE, recursive = TRUE)
dir.create(file.path(paste0(outpath, "2_after-day-part/")), showWarnings = FALSE, recursive = TRUE)
}
}
# find all raw data files (including rejected sites)
all.files <- list.files(paste(inpath), recursive = TRUE)
# extract paths for FLUXNET2015 and AMERIFLUX (half-) hourly data
fluxnet.files <- all.files[grep(pattern = '\\FLX', all.files)]
amf.files <- all.files[grep(pattern = '\\AMF', all.files)]
neon.files <- all.files[grep(pattern = '\\NEO', all.files)]
# concatenate the two flux datasets
files <- c(fluxnet.files, amf.files, neon.files)
# list of all available sites with database type (eg, 'AMF_CA-Qcu')
db.and.sites <- unique(substr(x = files, start = 1,stop = 10))
# matching database for each
all.site.db <- substr(x = files, start = 1,stop = 3)
neon_index <- all.site.db %in% "NEO"
# all available site names
flx.amf.site.names <- substr(x = files[!neon_index], start = 5,stop = 10)
neon.site.names <- substr(x = files[neon_index], start = 5,stop = 8)
all.site.names <- c(flx.amf.site.names, neon.site.names)
# update db.and.sites to fix NEON names
db.and.sites[neon_index] <- gsub("_N", "", db.and.sites[neon_index])
# Select site(s) or batch process all sites at once
if (batch_process) {
selected.db_sites <- db.and.sites
selected.sites <- all.site.names
} else {
# give a description for your routine:
routine_description <- "Perform gap-filling and flux partitioning with REddyProc"
# select one or more sites
choices <- fSiteSelect(db.and.sites, descrip = routine_description)
if (length(choices[["AMF"]]) > 0) {
AMF_selected <- paste0("AMF_", choices[["AMF"]])
} else {AMF_selected <- NULL}
if (length(choices[["FLX"]]) > 0) {
FLX_selected <- paste0("FLX_", choices[["FLX"]])
} else {FLX_selected <- NULL}
if (length(choices[["NEO"]]) > 0) {
NEO_selected <- paste0("NEO_", choices[["NEO"]])
} else {NEO_selected <- NULL}
# Includes database name in site selection
selected.db_sites <- c(AMF_selected, FLX_selected, NEO_selected)
# Omits database name in site selection
selected.sites <- c(choices[["AMF"]], choices[["FLX"]], choices[["NEO"]])
}
for (i in 1:length(selected.db_sites)) {
# The current site (and database)
db_site <- selected.db_sites[i]
# Extract the database name and site name from the 'db_site' string
db <- substr(x = db_site, start = 1, stop = 3)
site <- str_remove(db_site, paste0(db, "_"))
logical_test_for_selected_sites <- all.site.names %in% site & all.site.db %in% db
site.files <- files[logical_test_for_selected_sites]
years_of_record <- as.numeric(gsub(".*-(\\d{4})-.+", "\\1", site.files)) ## Looks for 4-char numeric strings, followed by a hyphen
start_year <- years_of_record[1]
end_year <- tail(years_of_record, 1)
# Get site metadata
site.info <- fGetSiteInfo(dat = site.meta,
site = site,
db = db,
site_col = "site_ID",
db_col = "database",
site_name_col = "site_Name",
start_yr_col = "start_year",
end_yr_col = "end_year",
exclude_yr_col = "exclude_years",
lat_col = "lat",
long_col = "long",
country_col = "country",
IGBP_col = "IGBP",
elev_col = "elev_orig",
MAT_col = "MAT_orig",
MAP_col = "MAP_orig")
message('')
message('Processing site (', min(which(unique(selected.sites) %in% site)), '/',
length(unique(selected.sites)), '):\t', site, ', ', db, " (", site.info$country, ")")
message('\t\tName: ', site.info$info)
message('\t\tlat: ', round(site.info$lat,3), '\tlong: ', round(site.info$long,3))
message('\t\tMAT: ', round(site.info$MAT,1), '°C\tMAP: ', round(site.info$MAP,0), 'mm')
message('\t\tForest Type: ', site.info$IGBP)
message('')
message(' ', 'Years of record: ', site.info$start_year, '-', site.info$end_year)
message('')
message(' ', 'Navigating to data directory: ')
message(' ', paste0(inpath))
if (method == 'both') {
partition.methods <- c('night', 'day')
for (k in seq_along(partition.methods)) {
current.method <- partition.methods[k]
message('')
message(' ', 'Starting REddyProc (', paste0(current.method), ' method)...')
# The following routine loops through each available year of data and performs gap-filling and flux partitioning.
# Note: It is known to crash when large datagaps are encountered.
# As of version 1.4 of this script, this routine should be able to handle hourly and half-hourly datatypes.
for (process_year in start_year:end_year) {
tryCatch(
expr = {
# Set infile name for data
infile <- sprintf("%s_%s-%d-ToBePartitioned.txt", db, site, process_year)
message('')
message(' ', 'Reading site data: ', paste0(site, sep=', ', db, sep=' (Year = ', process_year, sep=")"))
message('')
#+++ Load data with 1 header and 1 unit row from (tab-delimited) text file
EddyData.F <- read.table(paste(inpath, infile, sep=""), header=TRUE, na.strings = -9999, stringsAsFactors = FALSE)
EddyData.F <- fCharacter2Numeric(EddyData.F)
EddyData.F <- fLogic2Numeric(EddyData.F)
#+++ Add time stamp in POSIX time format
# the routine fConvertTimeToPosix() is a part of the REddyProc package,
# but not called using EddyProc.C$fConvertTimeToPosix as with other examples below - why not?
for (j in 1:ncol(EddyData.F)) {
if (sum(is.na(EddyData.F[,j])) == nrow(EddyData.F)) {
EddyData.F[,j] = -9999
}
}
EddyDataWithPosix.F <- fConvertTimeToPosix(EddyData.F, 'YDH', Year = 'Year', Day = 'DoY', Hour = 'Hour')
# Logical test to determine whether the hour column contains any half-hours (hourly datafiles will return a vector of all TRUE)
test_for_hourly_dat <- fCheckInteger(unique(EddyDataWithPosix.F$Hour))
if (!FALSE %in% test_for_hourly_dat) {
# TRUE for hourly data
# Set DTS to 24 for hourly data
# #+++ Initalize R5 reference class sEddyProc for post-processing of eddy data
#+++ with the variables needed for post-processing later
# note EddyProc.C has class "package"
EddyProc.C <- sEddyProc$new(site, EddyDataWithPosix.F, c('NEE','Rg','Tair','Tsoil','VPD', 'Ustar', 'PPFD'), DTS = 24)
} else {
# ELSE for half-hourly data
# Set DTS to 48 for half-hourly data
EddyProc.C <- sEddyProc$new(site, EddyDataWithPosix.F, c('NEE','Rg','Tair','Tsoil','VPD', 'Ustar', 'PPFD'), DTS = 48)
}
# fingerprint plot for NEE, with gaps
EddyProc.C$sPlotFingerprintY('NEE', Year = process_year)
# get u* thresholds
uStarTh <- EddyProc.C$sEstUstarThresholdDistribution(nSample = 100L, probs = c(0.05, 0.5, 0.95))
# the [-2] omits the second element of any object
uStarThAnnual <- usGetAnnualSeasonUStarMap(uStarTh)[-2]
uStarSuffixes <- colnames(uStarThAnnual)[-1]
# gap fill NEE
EddyProc.C$sMDSGapFillAfterUStarDistr('NEE',
uStarTh = uStarThAnnual,
uStarSuffixes = uStarSuffixes,
FillAll = TRUE
)
# plot of gap-filled NEE (note year)
EddyProc.C$sPlotFingerprintY('NEE_U50_f', Year = process_year)
# specify location for site (needed to distinguish night and day)
EddyProc.C$sSetLocationInfo(LatDeg = site.info$lat, LongDeg = site.info$long, TimeZoneHour = site.info$UTC_offset)
# gap fill met
EddyProc.C$sMDSGapFill('Tair', FillAll = FALSE)
EddyProc.C$sMDSGapFill('VPD', FillAll = FALSE)
EddyProc.C$sMDSGapFill('Tsoil', FillAll = FALSE)
EddyProc.C$sMDSGapFill('Rg', FillAll = FALSE)
EddyProc.C$sMDSGapFill('PPFD', T1 = 100, FillAll = FALSE)
# partition NEE using night-time method, using default parameters
# note that lapply is similar to putting a function call in a for loop
if (current.method == 'night') {
resPart <- lapply(uStarSuffixes, function(suffix){
EddyProc.C$sMRFluxPartition(Suffix = suffix)
})
}
# partition NEE, daytime method, Tair
# original
if (current.method == 'day') {
resPart <- lapply(uStarSuffixes, function(suffix) {
EddyProc.C$sGLFluxPartition(Suffix = suffix)
})
}
# stack results on to the original data file
FilledEddyData.F <- EddyProc.C$sExportResults()
CombinedData.F <- cbind(EddyData.F, FilledEddyData.F)
if (current.method == 'night') {
# set outpath_directory and outfile names
outpath_directory <- paste0(outpath, "1_after-night-part/")
outfile <- sprintf('%s_%s-%d-after-night-part.txt', db, site, process_year)
} else {
outpath_directory <- paste0(outpath, "2_after-day-part/")
outfile <- sprintf('%s_%s-%d-after-day-part.txt', db, site, process_year)
}
message('')
message('')
message(' ', 'Exporting output file for site: ', paste(site, sep=', ', db), ' (Year = ', paste(process_year), ')')
message(' ', paste0(outpath_directory))
message('')
message('')
fWriteDataframeToFile(CombinedData.F, outfile, Dir = outpath_directory)
},
# If loop fails for a particular year,
error = function(e){
message(" * Caught an error on process year: ", process_year)
print(e)
}
) # end TryCatch
} # end year loop
message('')
message(' ', 'Finishing current REddyProc method (', paste0(current.method), ' method)...')
message('')
message('')
rm(current.method)
}
} else {
current.method <- method
message('')
message(' ', 'Starting REddyProc (', paste0(current.method), ' method)...')
# The following routine loops through each available year of data and performs gap-filling and flux partitioning.
# Note: It is known to crash when large datagaps are encountered.
# As of version 1.4 of this script, this routine should be able to handle hourly and half-hourly datatypes.
for (process_year in start_year:end_year) {
tryCatch(
expr = {
# Set infile name for data
infile <- sprintf("%s_%s-%d-ToBePartitioned.txt", db, site, process_year)
message('')
message(' ', 'Reading site data: ', paste0(site, sep=', ', db, sep=' (Year = ', process_year, sep=")"))
message('')
#+++ Load data with 1 header and 1 unit row from (tab-delimited) text file
EddyData.F <- read.table(paste(inpath, infile, sep=""), header=TRUE, na.strings = -9999, stringsAsFactors = FALSE)
EddyData.F <- fCharacter2Numeric(EddyData.F)
EddyData.F <- fLogic2Numeric(EddyData.F)
#+++ Add time stamp in POSIX time format
# the routine fConvertTimeToPosix() is a part of the REddyProc package,
# but not called using EddyProc.C$fConvertTimeToPosix as with other examples below - why not?
for (j in 1:ncol(EddyData.F)) {
if (sum(is.na(EddyData.F[,j])) == nrow(EddyData.F)) {
EddyData.F[,j] = -9999
}
}
EddyDataWithPosix.F <- fConvertTimeToPosix(EddyData.F, 'YDH', Year = 'Year', Day = 'DoY', Hour = 'Hour')
# Logical test to determine whether the hour column contains any half-hours (hourly datafiles will return a vector of all TRUE)
test_for_hourly_dat <- fCheckInteger(unique(EddyDataWithPosix.F$Hour))
if (!FALSE %in% test_for_hourly_dat) {
# TRUE for hourly data
# Set DTS to 24 for hourly data
# #+++ Initalize R5 reference class sEddyProc for post-processing of eddy data
#+++ with the variables needed for post-processing later
# note EddyProc.C has class "package"
EddyProc.C <- sEddyProc$new(site, EddyDataWithPosix.F, c('NEE','Rg','Tair','Tsoil','VPD', 'Ustar', 'PPFD'), DTS = 24)
} else {
# ELSE for half-hourly data
# Set DTS to 48 for half-hourly data
EddyProc.C <- sEddyProc$new(site, EddyDataWithPosix.F, c('NEE','Rg','Tair','Tsoil','VPD', 'Ustar', 'PPFD'), DTS = 48)
}
# fingerprint plot for NEE, with gaps
EddyProc.C$sPlotFingerprintY('NEE', Year = process_year)
# get u* thresholds
uStarTh <- EddyProc.C$sEstUstarThresholdDistribution(nSample = 100L, probs = c(0.05, 0.5, 0.95))
# the [-2] omits the second element of any object
uStarThAnnual <- usGetAnnualSeasonUStarMap(uStarTh)[-2]
uStarSuffixes <- colnames(uStarThAnnual)[-1]
# gap fill NEE
EddyProc.C$sMDSGapFillAfterUStarDistr('NEE',
uStarTh = uStarThAnnual,
uStarSuffixes = uStarSuffixes,
FillAll = TRUE
)
# plot of gap-filled NEE (note year)
EddyProc.C$sPlotFingerprintY('NEE_U50_f', Year = process_year)
# specify location for site (needed to distinguish night and day)
EddyProc.C$sSetLocationInfo(LatDeg = site.info$lat, LongDeg = site.info$long, TimeZoneHour = site.info$UTC_offset)
# gap fill met
EddyProc.C$sMDSGapFill('Tair', FillAll = FALSE)
EddyProc.C$sMDSGapFill('VPD', FillAll = FALSE)
EddyProc.C$sMDSGapFill('Tsoil', FillAll = FALSE)
EddyProc.C$sMDSGapFill('Rg', FillAll = FALSE)
EddyProc.C$sMDSGapFill('PPFD', T1 = 100, FillAll = FALSE)
# partition NEE using night-time method, using default parameters
# note that lapply is similar to putting a function call in a for loop
if (current.method == 'night') {
resPart <- lapply(uStarSuffixes, function(suffix){
EddyProc.C$sMRFluxPartition(Suffix = suffix)
})
}
# partition NEE, daytime method, Tair
# original
if (current.method == 'day') {
resPart <- lapply(uStarSuffixes, function(suffix) {
EddyProc.C$sGLFluxPartition(Suffix = suffix)
})
}
# stack results on to the original data file
FilledEddyData.F <- EddyProc.C$sExportResults()
CombinedData.F <- cbind(EddyData.F, FilledEddyData.F)
if (current.method == 'night') {
# set outpath_directory and outfile names
outpath_directory <- paste0(outpath, "1_after-night-part/")
outfile <- sprintf('%s_%s-%d-after-night-part.txt', db, site, process_year)
} else {
outpath_directory <- paste0(outpath, "2_after-day-part/")
outfile <- sprintf('%s_%s-%d-after-day-part.txt', db, site, process_year)
}
message('')
message('')
message(' ', 'Exporting output file for site: ', paste(site, sep=', ', db), ' (Year = ', paste(process_year), ')')
message(' ', paste0(outpath_directory))
message('')
message('')
fWriteDataframeToFile(CombinedData.F, outfile, Dir = outpath_directory)
},
# If loop fails for a particular year,
error = function(e){
message(" * Caught an error on process year: ", process_year)
print(e)
}
) # end TryCatch
} # end year loop
}
message('')
message('Finalizing site (', min(which(unique(selected.sites) %in% site)), '/',
length(unique(selected.sites)), '): ', site, ', ', db)
message('')
message('')
} # end site loop
} # end function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.