#' Subset flux data for the GPP-free period only
#' @export
#' @title Subset winter fluxes.
#'
#' @param path_to_fluxdat character string, navigate to the directory containing the complete output datasets
#' @param path_to_critdates character string, navigate to the directory containing critical date files (must have dates associated with end of current winter (ECW) and start of next winter (SNW))
#' @param outpath character string, specify path to where you'd like to export the winter-only output files
#' @param site.meta dataframe, site metadata file contained in FluxSynthU
#' @param export.files logical, if TRUE (default) exports dataframes as text files in the outpath directory.
#' @param batch_process logical, if TRUE, batch processes all sites within the inpath directory (defaults to FALSE).
#' @param qc choices: 'auto', 'none' or 'specified', If the user selects 'auto', the data will be subset according to quality check flags that were automatically generated during Critical Date estimation. If 'specified' the user must provide a column name containing quality check flags (0 = bad, 1 = good). If 'none' the data is not subset.
#' @param qc_col character, specify the name of a column containing qc flags (0 = bad, 1 = good). Defaults to NULL.
#' @importFrom stringr str_remove str_detect
#' @importFrom lubridate interval days ymd
#' @importFrom dplyr mutate filter select arrange rename left_join bind_rows group_by
#' @importFrom tibble enframe
#' @importFrom tidyr unnest
FS_fExtractWinterFluxes <- function(path_to_fluxdat, path_to_critdates, outpath, site.meta, export.files = TRUE, batch_process = FALSE, qc = c('auto', 'none', 'specified'), qc_col = NULL) {
if (!dir.exists(outpath)) {
message('')
message(paste('Warning! No such directory exists!'))
x <- askYesNo(paste("Would you like to create a new directory in: ",outpath))
message('')
if (x) {
dir.create(file.path(outpath), showWarnings = FALSE, recursive = TRUE)
}
}
CritDates.infile <- tail(list.files(path_to_critdates, pattern = ".txt"),1)
dat.CritDates.tmp <- read.table(paste0(path_to_critdates, CritDates.infile), stringsAsFactors = FALSE, header = TRUE)
# Here we're going to use dat.CritDates as a lookup table to subset our data based on the GPP-free period
dat.CritDates <- dat.CritDates.tmp %>%
filter(!(site == "CA-SF1" | site == "CA-SF2" | site == "US-Me2" | site == "US-Me5")) %>% # Filter out sites with little to no useable data: "CA-SF1" "US-Me2" "US-Me5"
merge(select(site.meta, c(database, site_ID, MAT_orig)), by.x = c("db", "site"), by.y = c("database", "site_ID")) %>% # Merge site metadata with the dat.fluxsynth_filtered dataframe
mutate(MAT_bin = cut_width(MAT_orig, width = 3)) # Add a grouping variable for MAT (to bin sites into MAT categories/levels)
# Select sites to process -------------------------------------------------
site.list <- unique(dat.CritDates$site)
temp_df <- dat.CritDates %>%
mutate(db_site = paste0(db, "_", site))
db_site.list <- unique(temp_df$db_site)
routine_description <- "Select which sites you would like to extract winter fluxes from"
choices <- FluxSynthU::fSiteSelect(db_site.list, descrip = routine_description)
if (length(choices[["AMF"]]) > 0) {
AMF_selected <- paste0("AMF_", choices[["AMF"]])
} else {AMF_selected <- NULL}
if (length(choices[["FLX"]]) > 0) {
FLX_selected <- paste0("FLX_", choices[["FLX"]])
} else {FLX_selected <- NULL}
if (length(choices[["NEO"]]) > 0) {
NEO_selected <- paste0("NEO_", choices[["NEO"]])
} else {NEO_selected <- NULL}
# Includes database name in site selection
selected.db_sites <- c(AMF_selected, FLX_selected, NEO_selected)
# Omits database name in site selection
selected.sites <- c(choices[["AMF"]], choices[["FLX"]], choices[["NEO"]])
rm(dat.CritDates.tmp, temp_df)
# Make sure the 'k_POSIXdate_plotting' column is in date format
dat.CritDates$k_POSIXdate_plotting <- as.Date(dat.CritDates$k_POSIXdate_plotting)
## Quality check behavior
if (qc == 'none') { # when qc == 'none', all estimates of SOS/EOS and ECW/SNW are included (even questionable ones)
dat.CritDates_good <- dat.CritDates
} else if (qc == 'auto') { # when qc == 'auto', critical date estimates are subset by the CritThreshold_auto_qc column, which automatically removes dates that are suspicious
dat.CritDates_good <- subset(dat.CritDates, CritThreshold_auto_qc != 0 )
} else if (qc == 'specified') { # when qc == 'specified', the user must supply a column name containing their own quality check info (0 = bad, 1 = good). RECOMMENDED
if (is.null(qc_col)) { # NOTE: the user must supply a qc column name, otherwise the data will be subset using the CritThreshold_auto_qc column
message('')
message('Warning: No qc column name specified! Defaulting to auto_qc column')
message('')
dat.CritDates_good <- subset(dat.CritDates, CritThreshold_auto_qc != 0 )
} else {
dat.CritDates_good <- subset(dat.CritDates, dat.CritDates[,qc_col] == 1 )
}
}
rm(dat.CritDates)
for (i in 1:length(selected.db_sites)) {
# The current site (and database)
db_site <- selected.db_sites[i]
# Extract the database name and site name from the 'db_site' string
db <- substr(x = db_site, start = 1, stop = 3)
site <- str_remove(db_site, paste0(db, "_"))
# Get site metadata
site.info <- fGetSiteInfo(dat = site.meta,
site = site,
db = db,
site_col = "site_ID",
db_col = "database",
site_name_col = "site_Name",
start_yr_col = "start_year",
end_yr_col = "end_year",
exclude_yr_col = "exclude_years",
lat_col = "lat",
long_col = "long",
country_col = "country",
IGBP_col = "IGBP",
elev_col = "elev_orig",
MAT_col = "MAT_orig",
MAP_col = "MAP_orig")
message('')
message('')
message('Processing site (', min(which(selected.sites %in% site)), '/',
length(selected.sites), '):')
message('\t\t', site, ', ', db, " (", site.info$country, ")")
message('\t\tName: ', site.info$info)
message('\t\tlat: ', round(site.info$lat,3), '\tlong: ', round(site.info$long,3))
message('\t\tMAT: ', round(site.info$MAT,1), '°C\tMAP: ', round(site.info$MAP,0), 'mm')
message('\t\tForest Type: ', site.info$IGBP)
message('')
message(' ', 'Years of record: ', site.info$start_year, '-', site.info$end_year)
# Step 1. Import (half-)hourly data
# Load (half-)hourly dataset -----------------------------------------------------
## Load complete half-hourly dataset for site (includes *some* MET data )
# Create a string matching the site to its database (ex: AMF_US-NR1)
db_site <- paste(db, "_", site, sep ="")
# List all the files in the complete output datasets folder
complete_HH_files <- list.files(paste(path_to_fluxdat), recursive = TRUE)
# Find the output dataset for the current site
complete_HH_site_file <- complete_HH_files[grepl(db_site, complete_HH_files)]
message('')
message(' ', 'Reading (half-)hourly data from: ' )
message(' ', path_to_fluxdat)
# Load complete output half-hourly dataset (be sure to delete later)
complete_HH_dataset <- read.table(paste(path_to_fluxdat, complete_HH_site_file, sep=""), header=TRUE)
# Make sure k_POSIXdate_plotting is a date column:
complete_HH_dataset$k_POSIXdate_plotting <- as.Date(complete_HH_dataset$k_POSIXdate_plotting)
# Add db and site as cols to complete_HH_dataset
complete_HH_dataset <- complete_HH_dataset %>%
mutate(db = db, site = site) %>%
select(db, site, everything())
# Don't worry too much about this line - this is necessary when you want to subset a column that has the same name as an object
# So if you have an object called 'site' and a column named 'site' and you try to subset the column by the object, it will fail.
# This creates a temporary object that doesn't have the same name
site_i <- site
WinterCritDates_bySite <- dat.CritDates_good %>%
filter(., site == site_i &
CritThreshold %in% c("ECW", "SNW")) %>%
arrange(k_POSIXdate_plotting)
if (nrow(WinterCritDates_bySite) == 0) {
message(" * No winter critical dates found for site: ", site)
next
}
# Within each site, we're going to loop through winter years
# first we'll define our winter years as unique process_years within each site
# then we'll perform a test: each winter year must begin with a SNW from the prev year and an ECW for the current year
# if only one (or neither) of these values is available, the loop skips to the next process year
years_of_record <- unique(WinterCritDates_bySite$process_year)
start_year <- years_of_record[1]
end_year <- tail(years_of_record, 1)
site.data.list <- list()
message('')
message(' ', 'Looping through site years and subsetting flux data based on winter CritDates...')
for (winter_year in start_year:end_year) {
tryCatch(
expr = {
# Create a temporary dataframe in which you subset only the critical dates for the current 'winter year' as well as the previous year's winter
WinterCritDates_tmp <- WinterCritDates_bySite %>%
filter(., process_year %in% c(winter_year - 1, winter_year))
# Determine the start of winter for the current winter period
# This should fall on the previous process year (but not always)
SOW_winter_year <- WinterCritDates_tmp[which(WinterCritDates_tmp$CritThreshold == "SNW")[1],]
# Determine the end of winter for the current winter period
# This always falls on the current process year
EOW_winter_year <- WinterCritDates_tmp[tail(which(WinterCritDates_tmp$CritThreshold == "ECW"),1),]
# Test whether SOW_winter_year occcurs AFTER EOW_winter_year (in normal years this is NOT the case)
# If this test evaluates to TRUE, move to the next year
if (SOW_winter_year$k_POSIXdate_plotting > EOW_winter_year$k_POSIXdate_plotting) {
message(" * Incomplete winter on process year: ", winter_year)
next
}
# Bind these two dates and arrange according to the POSIXdate column
WinterCritDates_byYear <- rbind(SOW_winter_year, EOW_winter_year) %>%
arrange(k_POSIXdate_plotting)
# In some cases, we may have a viable 'start of winter' but no corresponding 'end of winter' date (or vice versa)
# Since winter must have a beginning and end, these cases prevent us from analyzing the data and we should skip to the next year
if (nrow(WinterCritDates_byYear) < 2) {
message(" * Incomplete winter on process year: ", winter_year)
next
}
# Determine the length of winter (no. days) for the given process year
no.days_in_winter <- interval(SOW_winter_year$k_POSIXdate_plotting, EOW_winter_year$k_POSIXdate_plotting, tzone = "UTC")/days(1)
# Divide this period into 5 subintervals, and calculate the length of days
no.days_per_interval <- round(no.days_in_winter/5)
# Determine the start date of each interval
WinterCritDates_byInterval <- seq(ymd(SOW_winter_year$k_POSIXdate_plotting), EOW_winter_year$k_POSIXdate_plotting, by = no.days_per_interval)
# Append the date corresponding to the end of winter
WinterCritDates_byInterval <- c(WinterCritDates_byInterval, EOW_winter_year$k_POSIXdate_plotting)
int1_latefall <- seq(WinterCritDates_byInterval[1], WinterCritDates_byInterval[2]-1, by = "days")
int2_earlywint <- seq(WinterCritDates_byInterval[2], WinterCritDates_byInterval[3]-1, by = "days")
int3_midwint <- seq(WinterCritDates_byInterval[3], WinterCritDates_byInterval[4]-1, by = "days")
int4_latewint <- seq(WinterCritDates_byInterval[4], WinterCritDates_byInterval[5]-1, by = "days")
# NOTE: Since we're rounding our subintervals to the nearest day, it's unlikely that each subinterval will have the exact same vector dimensions
# To deal with this, we'll add any dangling dates to the final interval (early spring)
int5_earlyspr <- seq(WinterCritDates_byInterval[5], tail(WinterCritDates_byInterval,1), by = "days")
# Create a new dataframe containing the sequence of dates for each winter subinterval
WinterIntervals_byYear <- lst(int1_latefall, int2_earlywint, int3_midwint, int4_latewint, int5_earlyspr) %>%
enframe %>%
unnest(cols = c("name", "value")) %>%
rename("interval" = name, "k_POSIXdate_plotting" = value) %>%
as.data.frame()
# Take your complete_HH_dataset and join it to your WinterIntervals_byYear dataframe
df_winter_HH <- WinterIntervals_byYear %>%
left_join(complete_HH_dataset, by = "k_POSIXdate_plotting") %>%
select(interval, db:k_POSIXdate_local, k_POSIXdate_plotting, everything())
# Store the no. of days in winter to a list, indexed by process year
site.data.list[[as.character(winter_year)]] <- df_winter_HH
rm(df_winter_HH)
}, # end TryCatch expr
# If loop fails for a particular year,
error = function(e){
message(" * Caught an error on process year: ", winter_year)
print(e)
}
) # end TryCatch
} # end winter_year loop
rm(complete_HH_dataset)
message('')
message(' ', 'Appending site years into a dataframe...')
site.data.df <- site.data.list %>%
bind_rows(.id = "source") %>%
group_by(source) %>%
rename('winter_year' = source)
if (export.files) {
message('')
message(' ', 'Exporting winter flux dataframes to: ')
message(' ', outpath)
message('')
outfile <- sprintf('%s_%s-%d-%d_WinterOnly-Output.txt', db, site, start_year, end_year)
write.table(site.data.df, file = paste0(outpath, outfile),
sep = "\t", row.names = FALSE,
col.names = TRUE,
append = FALSE)
}
rm(start_year, end_year, years_of_record)
} # end site loop
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.