gapfill_met <- function(data, var, new_col = FALSE) {
# fails if observation is near the beginning or end of dataset, need to fix
# assign data, var
cat(paste0("\n", "Gap filling: ", var, "\n"))
df <- data
var.na <- df$timestamp[which(is.na(df[, var]))]
if (length(var.na) > 0) {
locs <- match(var.na, df$timestamp)
#pbar <- progress_estimated(length(locs))
for (i in 1:length(locs)) {
if (locs[i] > 24 & locs[i] < (nrow(df) - 24)) {
point <- df[locs[i], ]
period <- df[(locs[i] - 24):(locs[i] + 24), ]
#browser()
# expand range if too many NA values
if (length(period[, var][is.na(period[, var])]) > 24) {
if (locs[i] > 48 & locs[i] < (nrow(df) - 48)) {
period <- df[(locs[i] - 48):(locs[i] + 48), ]
# make sure there are enough non-NA values in range
if (length(period[, var][is.na(period[, var])]) >= 48) {
next
}
}
}
# return NA if no conditions are met
} else if (locs[i] > 12 & locs[i] < (nrow(df) - 12)) {
point <- df[locs[i], ]
period <- df[(locs[i] - 12):(locs[i] + 12), ]
} else {
df[locs[i], var] <- NA
next
}
# define spline function
func <-
suppressWarnings(
splinefun(
x = period[, "timestamp"],
y = period[, var],
method = "periodic"
)
)
# execute spline function
pred <- func(point$timestamp)
df[locs[i], var] <- pred
#pbar$tick()$print()
}
}
return(df)
}
## =============================================================================
#' Gap-fill a Variable Using the MDS Algorithm
#'
#' Original name: sEddyProc_sMDSGapFill
#' @author AMM, TW
#'
#' @description
#' MDS gap filling algorithm adapted after the PV-Wave code and paper by Markus
#' Reichstein.
#'
#' @param var A variable to be filled.
#' @param qc_var The quality flag of the variable to be filled
#' @param qc_val The value of quality flag for _good_ (original) data, other
#' data is set to missing
#' @param v1 The 1st condition variable (default: Global radiation "Rg" in
#' W m-2)
#' @param t1 The tolerance interval for the 1st condition variable (default: 50
#' W m-2)
#' @param v2 The 2nd condition variable (default: Vapor pressure deficit "VPD"
#' in hPa)
#' @param t2 The tolerance interval for the 2nd condition variable (default: 5
#' hPa)
#' @param v3 The 3rd condition variable (default: Air temperature "Tair" in
#' degC)
#' @param t3 The tolerance interval for the 3rd condition variable (default:
#' 2.5 degC)
#' @param fill_all A logical value whether to fill all values to estimate
#' uncertainties.
#' @param verbose A logical value whether to print status information to
#' screen.
#' @param max_run A scalar integer indicating how many subsequent numerically
#' equal values to allow until a warning is produced. Set to Inf or NA for no
#' warnings. Defaults for "NEE" to records across 4 hours and no warning for
#' others.
#' @param only_fill A logical value whether to only output a vector of the gap-
#' filled variable.
#'
#' @details
#'
#' Runs of numerically equal numbers hint to problems of the data and cause
#' unreasonable estimates of uncertainty. The user is warned if this occurs.
#'
#' MDS gap filling algorithm calls the subroutines Look Up Table
#' \code{\link{sEddyProc_sFillLUT}} and Mean Diurnal Course
#' \code{\link{sEddyProc_sFillMDC}} with different window sizes as described in
#' the reference. To run dataset only with MDC algorithm
#' \code{\link{sEddyProc_sFillMDC}}, set condition variable v1 to "none".
#'
#' If meteo condition variables (v1, v2, v3) are at default values but do not
#' exist as columns, they are set to "none" (= disabled identifier). This allows
#' running MDS with less variables than prescribed in the default setting. If
#' meteo condition variable are same as variable to fill, also set to "none".
#' This prevents filling artificial gaps (for uncertainty estimates) with itself
#' as meteo condition variable.
#'
#' @return Gap filling results in sTEMP data frame (with renamed columns).
#'
#' @references Reichstein, M. et al. (2005) On the separation of net ecosystem
#' exchange into assimilation and ecosystem respiration: review and improved
#' algorithm. Global Change Biology, 11, 1424-1439.
#'
#' @export
fill_mds <- function(data, var, qc_var = "none", qc_val = NA, v1 = "Rg",
t1 = 50, v2 = "VPD", t2 = 5, v3 = "Tair", t3 = 2.5,
fill_all = TRUE, verbose = TRUE, max_run = NULL,
only_fill = FALSE) {
time_start <- Sys.time()
if (is.null(max_run)) {
dts <- 48
max_run <- if (var == "NEE") 4 * dts / 24 else NA
}
# Initialize temporary data frame for newly generated gap filled data and qualifiers
temp <- fill_init(data, var, qc_var, qc_val, fill_all)
if (is.null(temp)) {
# Abort gap filling if initialization failed
stop("Initialization of gap-filling data frame failed.")
}
if (is.finite(max_run) & (nrow(temp) >= max_run)) {
rl <- check_runs(as.vector(temp$var_orig), min_length = max_run)
if (length(rl$index)) {
rl_sorted <- rl[rev(order(rl$nRep)), , drop = FALSE]
warning(
"Variable ", var,
" contains long runs of numerically equal numbers.",
" Longest of ", rl_sorted$nRep[1],
" repeats of value ", temp$var_orig[rl_sorted$index[1]],
" starts at index ", rl_sorted$index[1]
)
}
}
if ((v1 == "Rg" && !(v1 %in% c(colnames(data)))) || (v1 == var)) {
v1 <- "none"
}
if ((v2 == "VPD" && !(v2 %in% c(colnames(data)))) || (v2 == var)) {
v2 <- "none"
}
if ((v3 == "Tair" && !(v3 %in% c(colnames(data)))) || (v3 == var)) {
v3 <- "none"
}
# Check column names (with "none" as dummy)
check_names(cbind(data, temp), c(v1, v2, v3))
# Check tolerance entries (if condition variable is not "none")
none_cols <- c(v1, v2, v3) %in% "none"
if (!check_value(t1) || !check_value(t2) || !check_value(t3)) {
stop(
"Tolerance intervals must be numeric ",
"(if not specified, set to NA.)"
)
}
if (sum(is.na(c(t1, t2, t3)[!none_cols]))) {
stop(
"If condition variable is specified (dummy name is 'none'), ",
"the tolerance interval must be specified."
)
}
# Run gap filling scheme depending on auxiliary meteo data availability
# Check availablility of meteorological data for LUT
met <-
if (
v1 != "none" && v2 != "none" && v3 != "none"
&& sum(!is.na(data[, v1])) != 0 && sum(!is.na(data[, v2])) != 0 &&
sum(!is.na(data[, v3])) != 0
) {
# All three meteo conditions are available and valid to use:
message(
"Full MDS algorithm for gap filling of '",
attr(temp$var_f, "varnames"),
"' with LUT(", v1, ", ", v2, ", ", v3, ") and MDC."
)
3
} else if (v1 != "none" && sum(!is.na(data[, v1])) != 0) {
# Only one meteo condition available for LUT
message(
"Limited MDS algorithm for gap filling of '",
attr(temp$var_f, "varnames"), "' with LUT(", v1, " only) and MDC."
)
1
} else {
# No meteo condition available (use MDC only)
message(
"Restricted MDS algorithm for gap filling of '",
attr(temp$var_f, "varnames"),
"' with no meteo conditions and hence only MDC."
)
if (!var %in% c("Rg", "PAR", "PPFD")) {
warning("No meteo available for MDS gap filling.")
}
0
}
# Full MDS algorithm =========================================================
# Each call to a gap-filling algorithm updates the temp data
# Step 1: LUT (method 1) with window size +-7 days
if (met == 3) {
temp <- fill_lut(data, temp, 7, v1, t1, v2, t2, v3, t3, verbose = verbose)
}
# Step 2: LUT (method 1) with window size +-14 days
if (met == 3) {
temp <- fill_lut(data, temp, 14, v1, t1, v2, t2, v3, t3, verbose = verbose)
}
# Step 3: LUT, Rg only (method 2) with window size +-7 days,
if (met == 3 || met == 1) {
temp <- fill_lut(data, temp, 7, v1, t1, verbose = verbose)
}
# Step 4: MDC (method 3) with window size 0 (same day)
temp <- fill_mdc(temp, 0, verbose = verbose)
# Step 5: MDC (method 3) with window size +-1, +-2 days
temp <- fill_mdc(temp, 1, verbose = verbose)
temp <- fill_mdc(temp, 2, verbose = verbose)
# Step 6: LUT (method 1) with window size +-21, +-28, ..., +-70
if (met == 3) {
for (i in seq(21, 70, 7)) {
temp <- fill_lut(data, temp, i, v1, t1, v2, t2, v3, t3, verbose = verbose)
}
}
# Step 7: LUT (method 2) with window size +-14, +-21, ..., +-70
if (met == 3 || met == 1) {
for (i in seq(14, 70, 7)) {
temp <- fill_lut(data, temp, i, v1, t1, verbose = verbose)
}
}
# Step 8: MDC (method 3) with window size +-7, +-14, ..., +-210 days
for (i in seq(7, 210, 7)) {
temp <- fill_mdc(temp, i, verbose = verbose)
}
# Set long gaps again to NA
temp$var_fall <- suppressMessages(convert_gaps(temp$var_fall))
# Message on gap filling
time_diff <- as.numeric(Sys.time()) - as.numeric(time_start)
message(
"Finished gap filling of '", var, "' in ",
floor(time_diff),
" seconds. Artificial gaps filled: ",
length(temp$var_fall) - sum(is.na(temp$var_fall)), ", real gaps filled: ",
sum(is.na(temp$var_orig)), ", unfilled (long) gaps: ",
sum(is.na(temp$var_fall)), "."
)
# Return only the gap-filled data (if requested)
if (only_fill) {
return(temp[, "var_f"])
}
# Rename new columns generated during gap filling
colnames(temp) <- gsub("var_", paste0(var, "_"), colnames(temp))
return(temp)
}
#' =============================================================================
#' Initalize Gap Filling
#' =============================================================================
#'
#' Original name: sEddyProc_sFillInit
#' @author AMM
#'
#' @description
#' Initializes temporary data frame for newly generated gap filled data and
#' qualifiers.
#'
#' @param data A data frame.
#' @param var variable to be filled
#' @param qc_var quality flag of variable to be filled
#' @param qc_val value of quality flag for _good_ (original) data, other data
#' is set to missing
#' @param fill_all Fill all values to estimate uncertainties
#'
#' @section Details
#' Long gaps (larger than 60 days) are not filled.
#'
#' Newly created variables with gap-filled data and qualifiers:
#' * VAR\emph{_orig} Original values of variable VAR used for gap filling
#' * VAR\emph{_f } Original values and filled gaps
#' * VAR\emph{_fqc} Quality flag assigned depending on gap filling method and
#' window length (0 = original data, 1 = most reliable, 2 = medium, 3 = least
#' reliable)
#' * VAR\emph{_fall} All values considered as gaps (for uncertainty estimates)
#' * VAR\emph{_fall_qc} Quality flag assigned depending on gap filling method
#' and window length (1 = most reliable, 2 = medium, 3 = least reliable)
#' * VAR\emph{_fnum} Number of datapoints used for gap-filling
#' * VAR\emph{_fsd} Standard deviation of datapoints used for gap filling
#' (uncertainty)
#' * VAR\emph{_fmeth} Method used for gap filling (1 = similar meteo condition
#' (fill_lut with Rg, VPD, Tair), 2 = similar meteo (fill_lut with Rg only),
#' 3 = mean diurnal course (fill_mdc))
#' * VAR\emph{_fwin} Full window length used for gap filling
#'
#' @return A temporary data frame ready to hold results of gap filling.
#'
#' @export
fill_init <- function(data, var, qc_var = "none", qc_val = NA,
fill_all = TRUE) {
'Initializes data frame "temp" for newly generated gap filled data and
qualifiers.'
# Check variable to fill and apply quality flag
check_names(data, c(var, qc_var))
gf_var <- apply_qc(data, var, qc_var, qc_val)
# Abort if variable to be filled contains no data
if (sum(!is.na(gf_var)) == 0) {
warning("Variable to be filled (", var, ") contains no data.")
return(-111)
}
temp <- data.frame(
var_orig = gf_var,
var_f = NA,
var_fqc = NA,
var_fall = NA,
var_fall_qc = NA,
var_fnum = NA,
var_fsd = NA,
var_fmeth = NA,
var_fwin = NA
)
# Set fqc to zero for original values
temp$var_f <- temp$var_orig
temp$var_fqc <- ifelse(!is.na(temp$var_orig), 0, NA)
# Set filling of only gaps
if (fill_all == FALSE) temp$var_fall <- temp$var_orig # "prefill" with original data
# Add units
attr(temp$var_f, "units") <- attr(gf_var, "units")
attr(temp$var_fall, "units") <- attr(gf_var, "units")
attr(temp$var_fsd, "units") <- attr(gf_var, "units")
attr(temp$var_fwin, "units") <- "days"
# Gaps longer than 60 days are not filled
gap_length <- calc_gaplength(temp$var_orig)
dts <- 48
max_gap <- dts * 60 # half-hours in 60 days
while (max(gap_length) > max_gap) {
# Flag long gap with -9999.0
end <- which(gap_length == max(gap_length))
start <- end - max(gap_length) + 1
temp$var_fall[start:end] <- -9999.0 # Set to -9999.0 as a flag for long gaps
gap_length[start:end] <- -1 # Set to -1 since accounted for
warning(
"The long gap between position ", start, " and ",
end, " will not be filled."
)
}
if (fill_all == TRUE) {
message(
"Initialized variable '", var, "' with ", sum(is.na(temp$var_orig)),
" real gaps for gap filling of all ", sum(is.na(temp$var_fall)),
" values (to estimate uncertainties)."
)
} else {
message(
"Initialized variable '", var, "' with ", sum(is.na(temp$var_orig)),
" real gaps for gap filling."
)
}
return(temp)
}
## =============================================================================
#' Execute the Mean Diurnal Course (MDC) Algorithm
#'
#' Original name: sEddyProc_sFillMDC
#' @author AMM
#'
#' @description
#' Mean Diurnal Course (MDC) algorithm based on average values within +/- one
#' hour of adjacent days.
#'
#' @param temp
#' @param win_days The window size for filling in number of days.
#' @param verbose A logical value whether to print status information to
#' screen.
#'
#' @details
#' Quality flags:
#' * 1: nDay <= 1
#' * 2: nDay [2,5)
#' * 3: nDay > 5
#'
#' @return MDC filling results in sTEMP data frame.
#'
#' @export
fill_mdc <- function(temp, win_days, verbose = TRUE) {
lgf <- matrix(NA, nrow = 0, ncol = 7, dimnames = list(NULL, c(
"index", "mean", "fnum", "fsd", "fmeth", "fwin", "fqc"
)))
# Determine gap positions
gap_psns <- which(is.na(temp$var_fall))
if (length(gap_psns) > 0) {
for (i in 1:length(gap_psns)) {
# Message on progress if wanted
if (verbose && i == 1) {
message(
"Mean diurnal course with window size of ", win_days,
" days: .", sep = ""
)
}
# Set index within window size
dts <- 48
gap <- gap_psns[i]
index <- numeric(0)
for (j in (0:win_days)) {
if (j == 0) {
index <- c(index, gap + (-2:2))
} else {
index <- c(
index, gap + c(-j * dts + (-2:2)),
gap + c(j * dts + (-2:2))
)
}
}
index <- index[index > 0 & index <= nrow(temp)]
# If enough available data, fill gap
mdc <- subset(temp$var_orig[index], !is.na(temp$var_orig[index]))
if (length(mdc) > 1) {
lvar_index <- gap
lvar_mean <- mean(mdc)
lvar_fnum <- length(mdc)
lvar_fsd <- sd(mdc)
lvar_fmeth <- 3
# Set window size and quality flag
# Window calculation changes depending on size
lvar_fwin <- if (win_days < 7) {
2 * win_days + 1
} else {
win_days + 1
}
if (lvar_fwin <= 1) lvar_fqc <- 1
if (lvar_fwin > 1 & lvar_fwin <= 5) lvar_fqc <- 2
if (lvar_fwin > 5) lvar_fqc <- 3
lgf <- rbind(lgf, c(
lvar_index, lvar_mean, lvar_fnum,
lvar_fsd, lvar_fmeth, lvar_fwin, lvar_fqc
))
}
if (verbose && i %% 100 == 0) message(".", appendLF = FALSE)
if (verbose && i %% 6000 == 0) message("\n.", appendLF = FALSE)
}
if (verbose) message("", nrow(lgf))
}
# Copy gap filled values and properties to temp
if (nrow(lgf) > 0) {
# Fill all rows in var_fall and co
temp[lgf[, "index"], c(
"var_fall", "var_fnum", "var_fsd", "var_fmeth", "var_fwin", "var_fall_qc"
)] <- lgf[, c("mean", "fnum", "fsd", "fmeth", "fwin", "fqc")]
# Only fill gaps in var_f and var_fqc
gaps <- is.na(temp[lgf[, "index"], "var_f"])
temp[lgf[, "index"], c("var_f", "var_fqc")][gaps, ] <-
as.data.frame(lgf[, c("mean", "fqc"), drop = FALSE])[gaps, ]
}
# Other columns are specific for full MR MDS algorithm
return(temp)
}
## =============================================================================
#' Execute the Look-Up Table (LUC) Algorithm
#'
#' Original name: sEddyProc_sFillLUT
#' @author AMM
#'
#' @description
#' Look-Up Table (LUT) algorithm of up to five conditions within prescribed
#' window size.
#'
#' @param win_days Window size for filling in days.
#' @param v1 Condition variable 1.
#' @param t1 Tolerance interval 1.
#' @param v2 Condition variable 2.
#' @param t2 Tolerance interval 2.
#' @param v3 Condition variable 3.
#' @param t3 Tolerance interval 3.
#' @param v4 Condition variable 4.
#' @param t4 Tolerance interval 4.
#' @param v5 Condition variable 5.
#' @param t5 Tolerance interval 5.
#' @param verbose Print status information to screen.
#'
#' @details
#' Quality flags:
#' * 1: at least one variable and nDay <= 14
#' * 2: three variables and nDay in [14,56)
#' or one variable and nDay in [14,28)
#' * 3: three variables and nDay > 56
#' or one variable and nDay > 28
#'
#' @return LUT filling results in temporary data frame.
#'
#' @export
fill_lut <- function(data, temp, win_days, v1 = "none", t1 = NA, v2 = "none",
t2 = NA, v3 = "none", t3 = NA, v4 = "none", t4 = NA,
v5 = "none", t5 = NA, verbose = TRUE) {
lgf <- matrix(NA, nrow = 0, ncol = 7, dimnames = list(
NULL, c("index", "mean", "fnum", "fsd", "fmeth", "fwin", "fqc")
))
# Check if temp has been initialized with new var_ columns
if (!exists("var_f", temp)) {
stop(
"Temporary data frame temp for processing results has not ",
"been initalized with fill_init."
)
}
# Determine gap positions
gap_psns <- which(is.na(temp$var_fall))
if (length(gap_psns) > 0) {
for (i in 1:length(gap_psns)) {
# Message on progress if wanted
none_cols <- c(v1, v2, v3, v4, v5) %in% "none"
if (verbose && i == 1) {
message(
"Look up table with window size of ", win_days, " days with ",
paste(c(v1, v2, v3, v4, v5)[!none_cols], collapse = " ")
)
}
# Set window size
dts <- 48
gap <- gap_psns[i]
start <- gap - (win_days * dts - 1)
end <- gap + (win_days * dts - 1)
if (start <= 0) start <- 1
if (end > nrow(temp)) end <- nrow(temp)
t1red <- if (grepl("Rg", v1)) {
# Reduce tolerance of radiation if variable name contains 'Rg' to
# [20, 50] depending on measurement
max(min(t1, data[gap, v1], na.rm = T), 20, na.rm = T)
} else {
t1
}
# For performance reasons, write sDATA subrange into vectors
v1n <- data[start:end, v1]
v2n <- data[start:end, v2]
v3n <- data[start:end, v3]
v4n <- data[start:end, v4]
v5n <- data[start:end, v5]
sub_gap <- gap - (start - 1)
# Set LUT fill condition
rows <- !is.na(temp$var_orig[start:end])
if (v1 != "none") {
rows <- rows & abs(v1n - v1n[sub_gap]) < t1red & !is.na(v1n)
}
if (v2 != "none") {
rows <- rows & abs(v2n - v2n[sub_gap]) < t2 & !is.na(v2n)
}
if (v3 != "none") {
rows <- rows & abs(v3n - v3n[sub_gap]) < t3 & !is.na(v3n)
}
if (v4 != "none") {
rows <- rows & abs(v4n - v4n[sub_gap]) < t4 & !is.na(v4n)
}
if (v5 != "none") {
rows <- rows & abs(v5n - v5n[sub_gap]) < t5 & !is.na(v5n)
}
lut <- subset(temp$var_orig[start:end], rows)
# If enough available data, fill gap
if (length(lut) > 1) {
lvar_index <- gap
lvar_mean <- mean(lut)
lvar_fnum <- length(lut)
lvar_fsd <- sd(lut)
# Set window size and quality flag
lvar_fwin <- 2 * win_days
lvar_fmeth <- NA
lvar_fqc <- NA
if (v1 != "none" && v2 != "none" && v3 != "none") {
# Three conditions
lvar_fmeth <- 1
if (lvar_fwin <= 14) lvar_fqc <- 1
if (lvar_fwin > 14 & lvar_fwin <= 56) lvar_fqc <- 2
if (lvar_fwin > 56) lvar_fqc <- 3
}
if (v1 != "none" && v2 == "none" && v3 == "none") {
# One condition
lvar_fmeth <- 2
if (lvar_fwin <= 14) lvar_fqc <- 1
if (lvar_fwin > 14 & lvar_fwin <= 28) lvar_fqc <- 2
if (lvar_fwin > 28) lvar_fqc <- 3
}
lgf <- rbind(lgf, c(
lvar_index, lvar_mean, lvar_fnum, lvar_fsd,
lvar_fmeth, lvar_fwin, lvar_fqc
))
}
if (verbose && i %% 100 == 0) message(".", appendLF = FALSE)
if (verbose && i %% 6000 == 0) message("\n.", appendLF = FALSE)
}
if (verbose) message("", nrow(lgf))
}
# Copy gap filled values and properties to temp
if (nrow(lgf) > 0) {
# Fill all rows in var_fall and co
temp[lgf[, "index"], c(
"var_fall", "var_fnum", "var_fsd", "var_fmeth", "var_fwin", "var_fall_qc"
)] <- lgf[, c("mean", "fnum", "fsd", "fmeth", "fwin", "fqc")]
# Only fill gaps in var_f and var_fqc
gaps <- is.na(temp[lgf[, "index"], "var_f"])
temp[lgf[, "index"], c("var_f", "var_fqc")][gaps, ] <-
as.data.frame(lgf[, c("mean", "fqc"), drop = FALSE])[gaps, ]
}
# Other columns are specific for full MR MDS algorithm
return(temp)
}
#' =============================================================================
#' Calculate Length of Gaps in a Vector
#' =============================================================================
#'
#' Original name: fCalcLengthOfGaps
#' @author AMM
#'
#' @section Description
#' Calculate length of gaps (with flag NA) in specified vector.
#'
#' @param x numeric vector with gaps (missing values, NAs)
#'
#' @section Value
#' An integer vector with length of gap since start of gap.
#'
calc_gaplength <- function(x) {
x_na <- is.na(x)
# Determine cumulative sums of gaps
sum <- cumsum(x_na)
# Calculate sum from start of gap
out <- sum - cummax((!x_na) * sum)
return(out)
}
## =============================================================================
#' Convert All Gap Flags to NA
#'
#' Original name: fConvertGapsToNA
#' @author AMM
#'
#' @description Converts all gap flags to NA.
#'
#' @param data data frame to be converted
#' @param flag flag value used for gaps, defaults to -9999.0
#'
#' @return A data frame with NAs instead of gaps.
#'
convert_gaps <- function(data, flag = -9999.0) {
gaps <- sum(data == flag, na.rm = T)
data[data == flag] <- NA_real_
message('Number of \'', flag, '\' convertered to NA: ', gaps)
return(data)
}
## =============================================================================
#' Detect Runs of Equal Values
#'
#' Original name: .runLength
#' @author
#'
#' @param x vector to check for runs
#' @param min_length minimum run length to report
#'
#' @section Value
#' Data frame with columns:
#' \itemize{
#' \item{index} starting index of runs
#' \item{nRep} length of the run
#' }
#'
check_runs <- function(x, min_length = 2) {
# TODO implement na.rm = TRUE to not interreput runs by NA
# Problems:
# with na.omit() indices are hard to transfer back
# with fillNAFoward, NAs after a value are counted as repeats, even if
# the value after NA is different
rl <- rle(x)$lengths
rlc <- cumsum(rl) + 1 - rl
iiRun <- which(rl >= min_length)
iRun <- rlc[iiRun]
out <- data.frame(index = iRun, nRep = rl[iiRun])
return(out)
}
## =============================================================================
#' Title
#'
#' @description
#' Original name: sEddyProc_sMDSGapFillAfterUstar
#'
#' @param fluxVar
#' @param uStarVar
#' @param uStarTh
#' @param uStarSuffix
#' @param isFlagEntryAfterLowTurbulence
#' @param isFilterDayTime
#' @param swThr
#' @param RgColName
#' @param ...
#'
#' @return
#' @export
#'
#' @examples
sEddyProc_sMDSGapFillAfterUstar <- function(
### sEddyProc$sMDSGapFillAfterUstar - MDS gap filling algorithm after u* filtering
fluxVar ##<< Flux variable to gap fill after ustar filtering
, uStarVar = 'Ustar' ##<< Column name of friction velocity u * (ms-1),
## default 'Ustar'
, uStarTh = ##<< data.frame with
## first column, season names, and second column estimates of uStar Threshold.
## Alternatively, a single value to be used as threshold for all records
## If only one value is given, it is used for all records.
usGetAnnualSeasonUStarMap(sUSTAR_DETAILS$uStarTh)
, uStarSuffix = 'uStar' ##<< Different suffixes required are for
## different u * scenarios
, isFlagEntryAfterLowTurbulence = FALSE ##<< Set to TRUE for flagging the
## first entry after low turbulance as bad condition (by value of 2).
, isFilterDayTime = FALSE ##<< Set to TRUE to also filter day-time values,
## default only filters night-time data
, swThr = 10 ##<< threshold of solar radiation below which data is
## marked as night time respiration.
, RgColName = "Rg" ##<< Column name of incoming short wave radiation
, ... ##<< Other arguments passed to \code{\link{sEddyProc_sMDSGapFill}}
) {
'Calling sMDSGapFill after filtering for (provided) friction velocity u * '
##author<<
## AMM, TW
##details<<
## Calling \code{\link{sEddyProc_sMDSGapFill}} after filtering for
## (provided) friction velocity u*
##
## The u* threshold(s) are provided with argument \code{uStarTh} for
## filtering the conditions of low turbulence.
## After filtering, the data is gap filled using the MDS algorithm
## \code{\link{sEddyProc_sMDSGapFill}}.
#
##seealso<<
## \itemize{
## \item \code{\link{sEddyProc_sEstimateUstarScenarios}} and
## \code{link{sEddyProc_sEstUstarThold}} for estimating the
## u* threshold from the data.
## \item \code{\link{sEddyProc_sMDSGapFillUStarScens}} for
## automated gapfilling for several scenarios of u* threshold estimates.
## }
#
uStarThresVec <- if (is.numeric(uStarTh) ) {
if (length(uStarTh) != 1L) stop(
"Without seasons, only a single uStarThreshold can be provided,"
, " but got a vector.")
uStarThresVec <- rep(uStarTh, nrow(.self$sDATA) )
} else {
if (!("season" %in% colnames(sTEMP)) ) stop(
"Seasons not defined yet. Provide argument seasonFactor to sEstUstarThold.")
# make sure merge will work
colnames(uStarTh) <- c("season", "uStarThreshold")
if (any(!is.finite(uStarTh$uStarThreshold))) stop(
"must provide finite uStarThresholds")
iMissingLevels <- which(!(levels(.self$sTEMP$season) %in% uStarTh$season))
if (length(iMissingLevels) ) stop(
"missing uStarTrheshold for seasons "
, paste(levels(.self$sTEMP$season)[iMissingLevels], collapse = ", "))
tmpDs <- merge(subset(sTEMP, select = "season"), uStarTh, all.x = TRUE)
uStarThresVec <- tmpDs[, 2L]
}
# Check column names (with 'none' as dummy)
# (Numeric type and plausibility have been checked on initialization of sEddyProc)
fCheckColNames(sDATA, c(fluxVar, uStarVar), 'sMDSGapFillAfterUstar')
# Filter data
uStar <- sDATA[, uStarVar]
qfUStar <- integer(nrow(sDATA) ) # 0L
# if not filtering dayTimeValues, create a vector that is TRUE only for nightTime
isRowFiltered <- if (isFilterDayTime) TRUE else
(!is.finite(sDATA[, RgColName]) | sDATA[, RgColName] < swThr)
# mark low uStar or bad uStar as 1L
qfUStar[
isRowFiltered &
!is.na(uStarThresVec) &
(sDATA[[uStarVar]] < uStarThresVec)
] <- 1L
if (isTRUE(isFlagEntryAfterLowTurbulence) ) {
##details<<
## With \code{isFlagEntryAfterLowTurbulence set to TRUE}, to be more
## conservative, in addition
## to the data acquired when uStar is below the threshold,
## the first half hour measured with good turbulence conditions
## after a period with low turbulence is also removed (Papaple et al. 2006).
qfUStar[which(diff(qfUStar) == 1) + 1] <- 2L
}
# mark those conditions as bad, when no threshold is defined
qfUStar[isRowFiltered & !is.finite(uStarThresVec) ] <- 3L
# mark those recods as bad, where uStar is not defined
qfUStar[isRowFiltered & !is.finite(uStar) ] <- 4L
message(
'Ustar filtering (u * Th_1 = ', uStarThresVec[1], '), marked '
, (signif(sum(qfUStar != 0) / length(qfUStar), 2)) * 100
, '% of the data as gap' )
if (isTRUE(isFlagEntryAfterLowTurbulence) ) {
message(
'(including removal of the first half-hour after a '
, 'period of low turbulence).')
}
# Add filtering step to (temporal) results data frame
suffixDash.s <- paste(
(if (fCheckValString(uStarSuffix)) '_' else ''), uStarSuffix, sep = '')
attr(uStarThresVec, 'varnames') <- paste(
'Ustar', suffixDash.s, '_Thres', sep = '')
attr(uStarThresVec, 'units') <- 'ms-1'
attr(qfUStar, 'varnames') <- paste(
'Ustar', suffixDash.s, '_fqc', sep = '')
attr(qfUStar, 'units') <- '-'
sTEMP$USTAR_Thres <<- uStarThresVec
sTEMP$USTAR_fqc <<- qfUStar
colnames(sTEMP) <<- gsub(
'USTAR_', paste('Ustar', suffixDash.s, '_', sep = ''), colnames(.self$sTEMP))
# Check for duplicate columns (to detect if different processing setups
# were executed without different suffix)
if (length(names(which(table(colnames(.self$sTEMP)) > 1))) ) {
warning('sMDSGapFillAfterUstar::: Duplicated columns found!'
, ' Please specify different suffix when processing different"
, " setups on the same dataset!')
}
# Gap fill data after applying ustar filtering
sMDSGapFill(
fluxVar, QFVar = attr(qfUStar, 'varnames'), QFValue = 0, ...
, suffix = uStarSuffix)
##value<<
## Vector with quality flag from filtering (here 0: good data
## , 1: low turbulence, 2: first half hour after low turbulence
## , 3: no threshold available, 4: missing uStar value)
## Gap filling results are in sTEMP data frame (with renamed columns)
## that can be retrieved by \code{\link{sEddyProc_sExportResults}}.
return(invisible(qfUStar))
# example in Eddy.R sEddyProc.example
}
## =============================================================================
#' Title
#'
#' @param ...
#' @param uStarTh
#' @param uStarSuffixes
#'
#' @return
#' @export
#'
#' @examples
sEddyProc_sMDSGapFillAfterUStarDistr <- function(
### GapFilling for several filters of estimated friction velocity Ustar thresholds.
... ##<< other arguments to
## \code{\link{sEddyProc_sMDSGapFillAfterUstar}} and \code{\link{sEddyProc_sMDSGapFill}}
## such as \code{fluxVar}
, uStarTh ##<< data.frame with first column, season names,
## and remaining columns different estimates of uStar Threshold.
## If the data.frame has only one row, then each uStar threshold estimate
## is applied to the entire dataset.
## Entries in first column must match levels in argument \code{seasonFactor}
, uStarSuffixes = colnames(uStarTh)[-1] ##<< String vector
## to distinguish result columns for different ustar values.
## Its length must correspond to column numbers in \code{UstarThres.m.n}.
# return value function \code{\link{sEddyProc_sEstUstarThresholdDistribution}}
) {
##details<< This method is superseedec by
##\code{\link{sEddyProc_sMDSGapFillUStarScens}} and only there
## for backward portability.
warning(
"Method sEddyProc_sMDSGapFillAfterUStarDistr has been deprecated. "
, "Please, replace it by calls to 'sEddyProc_sMDSGapFillUStarScens' "
, "and if applicable calling before 'sEddyProc_sSetUstarScenarios'."
, "see ?sEddyProc_sMDSGapFillUStarScens and vignette('uStarCases')"
)
if (missing(uStarTh)) stop(
"Need to provide argument uStarTh. Since version 1.1.6 "
, " Please, use new methods sEddyProc_sSetUstarScenarios and"
, " sEddyProc_sMDSGapFillUStarScens instead of this method."
, " see ?sEddyProc_sMDSGapFillUStarScens and ?")
.self$sSetUstarScenarios(uStarTh, uStarSuffixes)
.self$sMDSGapFillUStarScens(...)
}
## =============================================================================
#' Title
#'
#' @param ...
#'
#' @return
#' @export
#'
#' @examples
sEddyProc_sMDSGapFillUStarScens <- function(
### GapFilling for several filters of estimated friction velocity Ustar thresholds.
... ##<< other arguments to
## \code{\link{sEddyProc_sMDSGapFillAfterUstar}} and
## \code{\link{sEddyProc_sMDSGapFill}}
## such as \code{fluxVar}
) {
##author<< TW
##details<<
## sEddyProc$sMDSGapFillUStarDistr: calling
## \code{\link{sEddyProc_sMDSGapFillAfterUstar}} for several filters of
## friction velocity Ustar.
##
## The scenarios need to be set before by
## \code{\link{sEddyProc_sSetUstarScenarios}} or accepting the defaults
## annual estimates of \code{link{sEddyProc_sEstimateUstarScenarios}}.
##
## Then the difference between output columns NEE_U05_f and NEE_U95_f
## corresponds to the uncertainty
## introduced by the uncertain estimate of the u* threshold.
#
##seealso<<
## \href{useCase vignette}{../doc/useCase.html}
uStarTh <- .self$sUSTAR_SCEN
uStarSuffixes <- colnames(.self$sUSTAR_SCEN)[-1]
nEstimates <- ncol(uStarTh) - 1L
#iCol <- 1L
filterCols <- lapply(seq(1L:nEstimates), function(iCol) {
.self$sMDSGapFillAfterUstar(
...
, uStarTh = uStarTh[, c(1L, 1L + iCol)]
, uStarSuffix = uStarSuffixes[iCol]
)
})
filterMat <- do.call(cbind, filterCols)
return(invisible(filterMat))
##value<<
## Matrix (columns correspond to u* Scenarios) with quality flag from
## filtering ustar (0 - good data, 1 - filtered data)
##
## Gap filling results in sTEMP data frame (with renamed columns), that
## can be retrieved by \code{\link{sEddyProc_sExportResults}}.
## Each of the outputs is calculated for several u* r-estimates and
## distinguished by a suffix after the variable.
## E.g. with an an entry "U05" in \code{uStarSuffixes} in
## \code{\link{sEddyProc_sSetUstarScenarios}}
## the corresponding filled NEE can be found in output column "NEE_U05_f".
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.