#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#+++ R script with sEddyProc methods for gap filling
#+++ MDS gap filling algorithm, adapted after the PV-Wave code and paper by
#Markus Reichstein
#+++ Dependencies: Eddy.R, DataFunctions.R, EddyUStarFilerDP.R
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#' @include aEddy.R
#' @export
sEddyProc_sFillInit <- function(
### Initializes data frame sTEMP for newly generated gap filled data and qualifiers.
##title<< sEddyProc$sFillInit - Initialize gap filling
Var.s ##<< Variable to be filled
, QFVar.s = 'none' ##<< Quality flag of variable to be filled
, QFValue.n = NA_real_ ##<< Value of quality flag for _good_ (original) data,
## other data is set to missing
, FillAll.b = TRUE ##<< Fill all values to estimate uncertainties
#! , QF.V.b = TRUE ##<< boolean vector of length nRow(sData), to allow
# # specifying bad data directly (those entries that are set to FALSE)
) {
##author<< AMM
'Initializes data frame sTEMP for newly generated gap filled data and qualifiers.'
# Check variable to fill and apply quality flag
fCheckColNames(cbind(sDATA, sTEMP), c(Var.s, QFVar.s), 'sFillInit')
Var.V.n <- fSetQF(cbind(sDATA, sTEMP), Var.s, QFVar.s, QFValue.n, 'sFillInit')
#! Var.V.n[QF.V.b == FALSE] <- NA_real_
# Abort if variable to be filled contains no data
if (sum(!is.na(Var.V.n)) == 0) {
warning('sFillInit::: Variable to be filled (', Var.s, ') contains no data at all!')
return(-111)
}
##details<<
## Description of newly generated variables with gap filled data and qualifiers: \cr
##details<<
## VAR\emph{_orig} - Original values used for gap filling \cr
## VAR\emph{_f } - Original values and gaps filled with mean of selected
## datapoints (condition depending on gap filling method) \cr
## VAR\emph{_fqc} - Quality flag assigned depending on gap filling method and
## window length (0 = original data, 1 = most reliable, 2 = medium, 3 = least reliable) \cr
## VAR\emph{_fall} - All values considered as gaps (for uncertainty estimates) \cr
## VAR\emph{_fall_qc} - Quality flag assigned depending on gap filling method
## and window length (1 = most reliable, 2 = medium, 3 = least reliable) \cr
## VAR\emph{_fnum} - Number of datapoints used for gap-filling \cr
## VAR\emph{_fsd} - Standard deviation of datapoints used for gap
## filling (uncertainty) \cr
## VAR\emph{_fmeth} - Method used for gap filling (1 = similar meteo
## condition (sFillLUT with Rg, VPD, Tair), 2 = similar meteo
## (sFillLUT with Rg only), 3 = mean diurnal course (sFillMDC)) \cr
## VAR\emph{_fwin} - Full window length used for gap filling \cr
lTEMP <- data.frame(
VAR_orig = Var.V.n # Original values of variable VAR used for gap filling
, VAR_f = NA_real_ # Original values and filled gaps
, VAR_fqc = NA_real_ # Quality flag assigned depending on gap filling
## method and window length
, VAR_fall = NA_real_ # All values considered as gaps
## (for uncertainty estimates)
, VAR_fall_qc = NA_real_ # Quality flag assigned depending on gap filling
## method and window length
, VAR_fnum = NA_real_ # Number of datapoints used for gap-filling
, VAR_fsd = NA_real_ # Standard deviation of data points used for filling
, VAR_fmeth = NA_real_ # Method used for gap filling
, VAR_fwin = NA_real_ # Full window length used for gap filling
)
# Set fqc to zero for original values
lTEMP$VAR_f <- lTEMP$VAR_orig
lTEMP$VAR_fqc <- ifelse(!is.na(lTEMP$VAR_orig), 0, NA_real_)
# Set filling of only gaps
if (FillAll.b == FALSE) lTEMP$VAR_fall <- lTEMP$VAR_orig #"prefill" with original data
# Add units
attr(lTEMP$VAR_f, 'units') <- attr(Var.V.n, 'units')
attr(lTEMP$VAR_fall, 'units') <- attr(Var.V.n, 'units')
attr(lTEMP$VAR_fsd, 'units') <- attr(Var.V.n, 'units')
attr(lTEMP$VAR_fwin, 'units') <- 'days'
##details<<
## Long gaps (larger than 60 days) are not filled.
#! Not congruent with PV-Wave, there the code is performed on single years
#only with long gaps of 60 days in the beginning or end skipped.
GapLength.V.n <- fCalcLengthOfGaps(lTEMP$VAR_orig)
kMaxGap.n <- sINFO$DTS * 60 #Halfhours in 60 days
while (max(GapLength.V.n) > kMaxGap.n) {
#Flag long gap with -9999.0
End.i <- which(GapLength.V.n == max(GapLength.V.n))
Start.i <- End.i - max(GapLength.V.n) + 1
lTEMP$VAR_fall[Start.i:End.i] <- -9999.0 #Set to -9999.0 as a flag for long gaps
GapLength.V.n[Start.i:End.i] <- -1 #Set to -1 since accounted for
warning(
'sMDSGapFill::: The long gap between position ', Start.i, ' and '
, End.i, ' will not be filled!')
}
if (FillAll.b == T) {
message(
'Initialized variable \'', Var.s, '\' with ', sum(is.na(lTEMP$VAR_orig))
, ' real gaps for gap filling of all ', sum(is.na(lTEMP$VAR_fall))
, ' values (to estimate uncertainties).')
} else {
message('Initialized variable \'', Var.s, '\' with ', sum(is.na(lTEMP$VAR_orig)),
' real gaps for gap filling.')
}
# twutz: error prone if sTEMP already contains columns of lTEMP
sTEMP <<- data.frame(c(sTEMP, lTEMP))
return(invisible(NULL))
}
sEddyProc$methods(sFillInit = sEddyProc_sFillInit)
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#' @export
sEddyProc_sFillLUT <- function(
### Look-Up Table (LUT) algorithm of up to five conditions within prescribed window size
WinDays.i ##<< Window size for filling in days
, V1.s = 'none' ##<< Condition variable 1
, T1.n = NA_real_ ##<< Tolerance interval 1
, V2.s = 'none' ##<< Condition variable 2
, T2.n = NA_real_ ##<< Tolerance interval 2
, V3.s = 'none' ##<< Condition variable 3
, T3.n = NA_real_ ##<< Tolerance interval 3
, V4.s = 'none' ##<< Condition variable 4
, T4.n = NA_real_ ##<< Tolerance interval 4
, V5.s = 'none' ##<< Condition variable 5
, T5.n = NA_real_ ##<< Tolerance interval 5
, Verbose.b = TRUE ##<< Print status information to screen
, calculate_gapstats = calculate_gapstats_Reichstein05 ##<< function computing gap-statistics
) {
##author<< AMM, TW
#! Attention: For performance reasons, gap filled values and properties are
#first written to single variables and local matrix lGF.M
#! (rather than changing single values in sTEMP which copies the data frame each time!)
#! Improved algorithm speed by more than a factor of 10 (maybe even 100...)
lGF.M <- matrix(NA_real_, nrow = 0, ncol = 7, dimnames = list(
NULL, c('index', 'mean', 'fnum', 'fsd', 'fmeth', 'fwin', 'fqc')))
# Check if sTEMP has been initialized with new VAR_ columns
if (!exists('VAR_f', sTEMP) ) stop(
'sFillLUT::: Temporal data frame sTEMP for processing results has not '
, 'been initalized with sFillInit!')
# Determine gap positions
ToBeFilled.V.i <- which(is.na(sTEMP$VAR_fall))
if (length(ToBeFilled.V.i) > 0) {
for (Pos.i in 1:length(ToBeFilled.V.i) ) {
# Message on progress if wanted
NoneCols.b <- c(V1.s, V2.s, V3.s, V4.s, V5.s) %in% 'none'
if (Verbose.b && Pos.i == 1) message(
'Look up table with window size of ', WinDays.i, ' days with '
, paste(c(V1.s, V2.s, V3.s, V4.s, V5.s)[!NoneCols.b], collapse = ' '))
# Set window size
Gap.i <- ToBeFilled.V.i[Pos.i]
if (T == T) {
#! Non-congruent with MR PV-Wave
Start.i <- Gap.i - (WinDays.i * sINFO$DTS)
End.i <- Gap.i + (WinDays.i * sINFO$DTS)
} else {
#! Code congruent with MR PV-Wave --> window size minus 1
Start.i <- Gap.i - (WinDays.i * sINFO$DTS - 1)
End.i <- Gap.i + (WinDays.i * sINFO$DTS - 1)
}
if (Start.i <= 0) Start.i <- 1
if (End.i > nrow(sTEMP) ) End.i <- nrow(sTEMP)
# Special treatment of Rg to be congruent with MR PV-Wave,
# not mentioned in paper
isV1Rg <- grepl('Rg', V1.s)
T1red.n <- if (isV1Rg) {
# Reduce tolerance of radiation if variable name contains 'Rg' to
# [20, 50] depending on measurement
# at least 20, if Rg>20 then min(Rg, tol=50), larger than 20 for 20<Rg<50
max(min(T1.n, sDATA[Gap.i, V1.s], na.rm = T), 20, na.rm = T)
} else {
T1.n
}
# For performance reasons, write sDATA subrange into vectors
# (speed up about factor of 1.5)
V1.V.n <- sDATA[Start.i:End.i, V1.s]
V2.V.n <- sDATA[Start.i:End.i, V2.s]
V3.V.n <- sDATA[Start.i:End.i, V3.s]
V4.V.n <- sDATA[Start.i:End.i, V4.s]
V5.V.n <- sDATA[Start.i:End.i, V5.s]
SubGap.i <- Gap.i - (Start.i - 1)
# Set LUT fill condition
Rows.V.b <- !is.na(sTEMP$VAR_orig[Start.i:End.i])
if (V1.s != 'none')
Rows.V.b <- Rows.V.b & abs(V1.V.n - V1.V.n[SubGap.i]) < T1red.n & !is.na(V1.V.n)
if (V2.s != 'none')
Rows.V.b <- Rows.V.b & abs(V2.V.n - V2.V.n[SubGap.i]) < T2.n & !is.na(V2.V.n)
if (V3.s != 'none')
Rows.V.b <- Rows.V.b & abs(V3.V.n - V3.V.n[SubGap.i]) < T3.n & !is.na(V3.V.n)
if (V4.s != 'none')
Rows.V.b <- Rows.V.b & abs(V4.V.n - V4.V.n[SubGap.i]) < T4.n & !is.na(V4.V.n)
if (V5.s != 'none')
Rows.V.b <- Rows.V.b & abs(V5.V.n - V5.V.n[SubGap.i]) < T5.n & !is.na(V5.V.n)
lLUT.V.n <- subset(sTEMP$VAR_orig[Start.i:End.i], Rows.V.b)
# Here using <=, alternative could use strict <
#isV1belowT1 <- V1.V.n[Rows.V.b] < V1.V.n[SubGap.i]
# usually fewer values with lower Rg -> use <=
isV1belowT1 <- V1.V.n[Rows.V.b] <= V1.V.n[SubGap.i]
is_nighttime <- if (isV1Rg) {
# determine nighttime by radiation below 10W/m2 corresponding to uStar filter
sDATA[Gap.i, V1.s] < 10
} else {
# without radiation: assume daytime-record
FALSE
}
# If enough available data, fill gap
if (length(lLUT.V.n) > 1) {
lVAR_index.i <- Gap.i
gapstats <- calculate_gapstats(lLUT.V.n, isV1belowT1, is_nighttime)
lVAR_mean.n <- gapstats[1]
lVAR_fnum.n <- gapstats[2]
lVAR_fsd.n <- gapstats[3]
#Set window size and quality flag
##details<< \describe{\item{Quality flags}{
## \itemize{
## \item 1: at least one variable and nDay <= 14
## \item 2: three variables and nDay in [14,56)
## or one variable and nDay in [14,28)
## \item 3: three variables and nDay > 56
## or one variable and nDay > 28
## }
## }}
#! Full window length, congruent with MR PV-Wave, in paper
#single window sizes stated
lVAR_fwin.n <- 2 * WinDays.i
lVAR_fmeth.n <- NA_real_; lVAR_fqc.n <- NA_real_;
if (V1.s != 'none' && V2.s != 'none' && V3.s != 'none') {
#Three conditions
lVAR_fmeth.n <- 1
#! Limit '14' congruent with MR PV-Wave, in paper different limit
#of '28' (stated as single window size of 14 days)
if (lVAR_fwin.n <= 14) lVAR_fqc.n <- 1
if (lVAR_fwin.n > 14 & lVAR_fwin.n <= 56) lVAR_fqc.n <- 2
if (lVAR_fwin.n > 56) lVAR_fqc.n <- 3
}
if (V1.s != 'none' && V2.s == 'none' && V3.s == 'none') {
#One condition only
lVAR_fmeth.n <- 2
if (lVAR_fwin.n <= 14) lVAR_fqc.n <- 1
if (lVAR_fwin.n > 14 & lVAR_fwin.n <= 28) lVAR_fqc.n <- 2
if (lVAR_fwin.n > 28) lVAR_fqc.n <- 3
}
lGF.M <- rbind(lGF.M, c(
lVAR_index.i, lVAR_mean.n, lVAR_fnum.n, lVAR_fsd.n
, lVAR_fmeth.n, lVAR_fwin.n, lVAR_fqc.n))
}
if (Verbose.b && Pos.i %% 100 == 0) message('.', appendLF = FALSE)
if (Verbose.b && Pos.i %% 6000 == 0) message('\n.', appendLF = FALSE)
}
if (Verbose.b) message('', nrow(lGF.M))
}
# Copy gap filled values and properties to sTEMP
if (nrow(lGF.M) > 0) {
# Fill all rows in VAR_fall and co
sTEMP[lGF.M[, 'index'], c(
'VAR_fall', 'VAR_fnum', 'VAR_fsd', 'VAR_fmeth', 'VAR_fwin', 'VAR_fall_qc')] <<-
lGF.M[, c('mean', 'fnum', 'fsd', 'fmeth', 'fwin', 'fqc')]
# Only fill gaps in VAR_f and VAR_fqc
Gaps.b <- is.na(sTEMP[lGF.M[, 'index'], 'VAR_f'])
sTEMP[lGF.M[, 'index'], c('VAR_f', 'VAR_fqc')][Gaps.b, ] <<-
as.data.frame(lGF.M[, c('mean', 'fqc') , drop = FALSE])[Gaps.b, ]
}
#Other columns are specific for full MR MDS algorithm
return(invisible(sTEMP[, c(
'VAR_orig', 'VAR_f', 'VAR_fall', 'VAR_fnum', 'VAR_fsd', 'VAR_fwin')]))
##value<<
## LUT filling results in sTEMP data frame.
}
sEddyProc$methods(sFillLUT = sEddyProc_sFillLUT)
calculate_gapstats_Reichstein05 <- function(lLUT.V.n, ...) {
lVAR_mean.n <- mean(lLUT.V.n)
lVAR_fnum.n <- length(lLUT.V.n)
lVAR_fsd.n <- sd(lLUT.V.n)
c(lVAR_mean.n, lVAR_fnum.n, lVAR_fsd.n)
}
calculate_gapstats_Vekuri23 <- function(
### Compute the gap statistic by separately aggregating reocrds lower or higher than the gap-covariate
lLUT.V.n, ##<< numeric vector of observations
islower, ##<< logical vector indicating if the observation was lower than gaps covariate
is_nighttime, ##<< logical scalar indicating whether record to fill is nighttime
n_min=1 ##<< minimum number of observations in each group.
## If there are fewer observations in on the groups
## return the (skewness-biased) Reichstein05 estimate.
) {
##details<<
## In order to reduce bias due to skewed distributions of covariate.
if (isTRUE(is_nighttime))
return(calculate_gapstats_Reichstein05(lLUT.V.n))
vals_lower <- lLUT.V.n[islower]
vals_higher <- lLUT.V.n[!islower]
if ((length(vals_lower) < n_min) || (length(vals_higher) < n_min))
return(calculate_gapstats_Reichstein05(lLUT.V.n))
mean_lower <- mean(vals_lower)
mean_higher <- mean(vals_higher)
lVAR_mean.n <- (mean_lower + mean_higher)/2
lVAR_fnum.n <- length(lLUT.V.n)
lVAR_fsd.n <- sd(lLUT.V.n) # keep uncertainty of population instead of estimate
# ##details<<
# ## Error propagation assumes upper and lower aggregate to be uncorrelated.
# ## If uncertainty of one group could not be estimated, e.g. if it has only
# ## one observation, then the uncertainty of
# ## the mean across the two values is returned.
# var_lower <- var(vals_lower)
# var_higher <- var(vals_higher)
# lVAR_fsd.n <- if (!is.finite(var_lower) || !is.finite(var_higher)) {
# sd(c(mean_lower, mean_higher))
# } else {
# sqrt(var_lower + var_higher) / 2
# }
c(lVAR_mean.n, lVAR_fnum.n, lVAR_fsd.n)
}
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#' @export
sEddyProc_sFillMDC <- function(
### Mean Diurnal Course (MDC) algorithm based on average values within +/- one hour of adjacent days
WinDays.i ##<< Window size for filling in days
, Verbose.b = TRUE ##<< Print status information to screen
) {
##author<< AMM
#! Attention: For performance reasons, gap filled values and properties are first written to single
#! variables and local matrix lGF.M
#! (rather than changing single values in sTEMP which copies the data frame each time!)
#! Improved algorithm speed by more than a factor of 10 (maybe even 100...)
lGF.M <- matrix(NA_real_, nrow = 0, ncol = 7, dimnames = list(NULL, c(
'index', 'mean', 'fnum', 'fsd', 'fmeth', 'fwin', 'fqc')))
# Determine gap positions
ToBeFilled.V.i <- which(is.na(sTEMP$VAR_fall))
if (length(ToBeFilled.V.i) > 0) {
for (Pos.i in 1:length(ToBeFilled.V.i)) {
# Message on progress if wanted
if (Verbose.b && Pos.i == 1) message(
'Mean diurnal course with window size of ', WinDays.i, ' days: .', sep = '')
# Set index within window size
Gap.i <- ToBeFilled.V.i[Pos.i]
Index.V.i <- numeric(0)
for (Day.i in (0:WinDays.i))
if (Day.i == 0) {
Index.V.i <- c(Index.V.i, Gap.i + (-2:2))
} else {
Index.V.i <- c(
Index.V.i, Gap.i + c(-Day.i * sINFO$DTS + (-2:2))
, Gap.i + c(Day.i * sINFO$DTS + (-2:2)))
}
Index.V.i <- Index.V.i[Index.V.i > 0 & Index.V.i <= nrow(sTEMP)]
# If enough available data, fill gap
#iRecsSimilar <- Index.V.i[!is.na(sTEMP$VAR_orig[Index.V.i]) ]
lMDC.V.n <- subset(sTEMP$VAR_orig[Index.V.i], !is.na(sTEMP$VAR_orig[Index.V.i]))
if (length(lMDC.V.n) > 1) {
#if (Gap.i == 15167 ) recover()
lVAR_index.i <- Gap.i
lVAR_mean.n <- mean(lMDC.V.n)
lVAR_fnum.n <- length(lMDC.V.n)
lVAR_fsd.n <- sd(lMDC.V.n)
lVAR_fmeth.n <- 3
#Set window size and quality flag
if (T == T) {
#! Non-congruent with MR PV-Wave
##! Full window length, not congruent with MR PV-Wave (see below),
##in paper single window sizes stated
lVAR_fwin.n <- 2 * WinDays.i + 1
} else {
#! Code if required to be congruent with MR PV-Wave -->
#window calculation changes depending on size
lVAR_fwin.n <- if (WinDays.i < 7) {
2 * WinDays.i + 1
} else {
WinDays.i + 1
}
}
##details<< \describe{\item{Quality flag}{
## \itemize{
## \item 1: nDay <= 1
## \item 2: nDay [2,5)
## \item 3: nDay > 5
## }
## }}
if (lVAR_fwin.n <= 1) lVAR_fqc.n <- 1
if (lVAR_fwin.n > 1 & lVAR_fwin.n <= 5) lVAR_fqc.n <- 2
if (lVAR_fwin.n > 5) lVAR_fqc.n <- 3
lGF.M <- rbind(lGF.M, c(
lVAR_index.i, lVAR_mean.n, lVAR_fnum.n, lVAR_fsd.n
, lVAR_fmeth.n, lVAR_fwin.n, lVAR_fqc.n))
}
if (Verbose.b && Pos.i %% 100 == 0) message('.', appendLF = FALSE)
if (Verbose.b && Pos.i %% 6000 == 0) message('\n.', appendLF = FALSE)
}
if (Verbose.b) message('', nrow(lGF.M))
}
# Copy gap filled values and properties to sTEMP
if (nrow(lGF.M) > 0) {
# Fill all rows in VAR_fall and co
sTEMP[lGF.M[, 'index'], c(
'VAR_fall', 'VAR_fnum', 'VAR_fsd', 'VAR_fmeth', 'VAR_fwin', 'VAR_fall_qc')] <<-
lGF.M[, c('mean', 'fnum', 'fsd', 'fmeth', 'fwin', 'fqc')]
# Only fill gaps in VAR_f and VAR_fqc
Gaps.b <- is.na(sTEMP[lGF.M[, 'index'], 'VAR_f'])
# twutz: inserted drop = FALSE, otherwise one-row matrix was not
# converted to data.frame correctly
sTEMP[lGF.M[, 'index'], c('VAR_f', 'VAR_fqc')][Gaps.b, ] <<-
as.data.frame(lGF.M[, c('mean', 'fqc') , drop = FALSE])[Gaps.b, ]
}
#Other columns are specific for full MR MDS algorithm
return(invisible(sTEMP[, c(
'VAR_orig', 'VAR_f', 'VAR_fall', 'VAR_fnum', 'VAR_fsd', 'VAR_fwin')]))
##value<<
## MDC filling results in sTEMP data frame.
}
sEddyProc$methods(sFillMDC = sEddyProc_sFillMDC)
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#' @export
sEddyProc_sMDSGapFill <- function(
### MDS gap filling algorithm adapted after the PV-Wave code and paper by Markus Reichstein.
Var = Var.s ##<< Variable to be filled
, QFVar = if (!missing(QFVar.s)) QFVar.s else 'none' ##<< Quality flag
## of variable to be filled
, QFValue = if (!missing(QFValue.n)) QFValue.n else NA_real_ ##<< Value of
## quality flag for _good_ (original) data, other data is set to missing
, V1 = if (!missing(V1.s)) V1.s else 'Rg' ##<< Condition variable 1
## (default: Global radiation 'Rg' in W m-2)
, T1 = if (!missing(T1.n)) T1.n else 50 ##<< Tolerance interval 1
## (default: 50 W m-2)
, V2 = if (!missing(V2.s)) V2.s else 'VPD' ##<< Condition variable 2
## (default: Vapour pressure deficit 'VPD' in hPa)
, T2 = if (!missing(T2.n)) T2.n else 5 ##<< Tolerance interval 2
## (default: 5 hPa)
, V3 = if (!missing(V3.s)) V3.s else 'Tair' ##<< Condition variable 3
## (default: Air temperature 'Tair' in degC)
, T3 = if (!missing(T3.n)) T3.n else 2.5 ##<< Tolerance interval 3
## (default: 2.5 degC)
, FillAll = if (!missing(FillAll.b)) FillAll.b else TRUE ##<< Fill
## all values to estimate uncertainties
, isVerbose = if (!missing(Verbose.b)) Verbose.b else TRUE ##<< Print
## status information to screen
, suffix = if (!missing(Suffix.s)) Suffix.s else '' ##<< String
## suffix needed for different processing setups on the same dataset
## (for explanations see below)
, minNWarnRunLength = ##<< scalar integer:
## warn if number of subsequent
## numerically equal values exceeds this number.
## Set to Inf or NA for no warnings.
## defaults for "NEE" to records across 4 hours and no warning for others.
if (Var == "NEE") 4 * .self$sINFO$DTS/24 else NA_integer_
, Var.s ##<< deprecated
, QFVar.s ##<< deprecated
, QFValue.n ##<< deprecated
, V1.s ##<< deprecated
, T1.n ##<< deprecated
, V2.s ##<< deprecated
, T2.n ##<< deprecated
, V3.s ##<< deprecated
, T3.n ##<< deprecated
, FillAll.b ##<< deprecated
, Verbose.b ##<< deprecated
, Suffix.s ##<< deprecated
#! , QF.V.b = TRUE ##<< boolean vector of length nRow(sData),
## to allow specifying bad data directly (those entries that are set to FALSE)
, method = "Reichstein05" ##<< specify "Vekuri23" to use the skewness-bias
## reducing variant
) {
varNamesDepr <- c(
"Var.s","QFVar.s","QFValue.n","V1.s","T1.n"
,"V2.s","T2.n","V3.s","T3.n","FillAll.b","Verbose.b","Suffix.s")
varNamesNew <- c(
"Var","QFVar","QFValue","V1","T1"
,"V2","T2","V3","T3","FillAll","isVerbose","suffix")
iDepr = which(!c(
missing(Var.s),missing(QFVar.s),missing(QFValue.n),missing(V1.s)
,missing(T1.n),missing(V2.s),missing(T2.n),missing(V3.s),missing(T3.n)
,missing(FillAll.b),missing(Verbose.b),missing(Suffix.s)))
if (length(iDepr)) warning(
"Arguments names ",paste(varNamesDepr[iDepr], collapse = ",")
," have been deprecated."
," Please, use instead ", paste(varNamesNew[iDepr], collapse = ","))
##author<< AMM, TW
##references<<
## Reichstein, M. et al. (2005) On the separation of net ecosystem exchange
## into assimilation and ecosystem respiration: review and improved algorithm.
## Global Change Biology, 11, 1424-1439.
TimeStart.p <- Sys.time()
##details<<
## Initialize temporal data frame sTEMP for newly generated gap filled data and
## qualifiers, see \code{\link{sEddyProc_sFillInit}} for explanations on suffixes.
# sTEMP <<- sTEMP[, 1L, drop = FALSE]
if (!is.null(sFillInit(Var, QFVar, QFValue, FillAll)) ) #! , QF.V.b = QF.V.b)) )
return(invisible(-111)) # Abort gap filling if initialization of sTEMP failed
##details<<
## Runs of numerically equal numbers hint to problems of the data and cause
## unreasonable estimates of uncertainty. This routine warns the user.
if (is.finite(minNWarnRunLength) & (nrow(sTEMP) >= minNWarnRunLength)) {
rl <- .runLength(as.vector(sTEMP$VAR_orig), minNRunLength = minNWarnRunLength)
if (length(rl$index)) {
rlSorted <- rl[rev(order(rl$nRep)),,drop = FALSE]
warning(
"Variable ", Var, " contains long runs of numerically equal numbers."
, " Longest of ", rlSorted$nRep[1], " repeats of value "
, sTEMP$VAR_orig[ rlSorted$index[1] ]
, " starts at index ", rlSorted$index[1]
)
}
}
#+++ Handling of special cases of meteo condition variables V1, V2, V3
# If variables are at default values but do not exist as columns, set to 'none'
# (= disabled identifier).
# This allows running MDS with less variables than prescribed in the default setting.
# If meteo condition variable are same as variable to fill, also set to 'none'.
# This prevents filling artificial gaps (for uncertainty estimates) with itself
# as meteo condition variable.
#! Attention: Non-congruent with MR PV-Wave. There artificial gaps in
#Rg, VPD, Tair are filled with itself.
if ( (V1 == 'Rg' && !(V1 %in% c(colnames(sDATA)))) || (V1 == Var) ) V1 <- 'none'
if ( (V2 == 'VPD' && !(V2 %in% c(colnames(sDATA)))) || (V2 == Var) ) V2 <- 'none'
if ( (V3 == 'Tair' && !(V3 %in% c(colnames(sDATA)))) || (V3 == Var) ) V3 <- 'none'
# Check column names (with 'none' as dummy)
# (Numeric type and plausibility have been checked on initialization of sEddyProc)
fCheckColNames(cbind(sDATA, sTEMP), c(V1, V2, V3), 'sMDSGapFill')
# Check tolerance entries (if condition variable is not 'none')
NoneCols.b <- c(V1, V2, V3) %in% 'none'
if (!fCheckValNum(T1) || !fCheckValNum(T2) || !fCheckValNum(T3) ) stop(
'sMDSGapFill::: T1, T2, T3, T4.n, T5.n must be numeric '
, '(if not specified, set to NA_real_)!')
if (sum(is.na(c(T1, T2, T3)[!NoneCols.b])) ) stop(
'sMDSGapFill::: If condition variable is specified (dummy name is \'none\'), '
, 'the tolerance interval must be specified!')
# Run gap filling scheme depending on auxiliary meteo data availability
##details<<
## MDS gap filling algorithm calls the subroutines Look Up Table
## \code{\link{sEddyProc_sFillLUT}}
## and Mean Diurnal Course \code{\link{sEddyProc_sFillMDC}} with different
## window sizes as described in the reference.
##details<<
## Vekari et al. 2023 proposed a modification that reduces bias
## that results from skewed distribution of radiation Rg.
## In order to use this modification, specify \code{method="Vekuri23"}.
##details<<
calculate_gapstats <- if (method == "Reichstein05") {
calculate_gapstats_Reichstein05
} else if (method == "Vekuri23") {
if (V1 != "Rg") warning(paste0(
"Method Vekuri23 requires incoming shortware radiation (Rg), ",
"but first covariate is called ", V1))
calculate_gapstats_Vekuri23
} else
stop(paste0(
"unknown MDS method '",method,"'. Specify 'Reichstein05' or 'Vekuri23'"))
## To run dataset only with MDC algorithm \code{\link{sEddyProc_sFillMDC}},
## set condition variable V1 to 'none'.
# Check availablility of meteorological data for LUT
Met.n <-
if (
V1 != 'none' && V2 != 'none' && V3 != 'none'
&& sum(!is.na(sDATA[, V1])) != 0 && sum(!is.na(sDATA[, V2])) != 0 &&
sum(!is.na(sDATA[, V3])) != 0
) {
#All three meteo conditions are available and valid to use:
message(
'Full MDS algorithm for gap filling of \''
, attr(sTEMP$VAR_f, 'varnames'), '\' with LUT('
, V1, ', ', V2, ', ', V3, ') and MDC.')
3
} else if (V1 != 'none' && sum(!is.na(sDATA[, V1])) != 0) {
#Only one meteo condition available for LUT
message(
'Limited MDS algorithm for gap filling of \''
, attr(sTEMP$VAR_f, 'varnames'), '\' with LUT(', V1, ' only) and MDC.')
1
} else {
#No meteo condition available (use MDC only)
message(
'Restriced MDS algorithm for gap filling of \''
, attr(sTEMP$VAR_f, 'varnames')
, '\' with no meteo conditions and hence only MDC.')
if (Var != 'Rg') warning(
'sMDSGapFill::: No meteo available for MDS gap filling!')
0
}
#+++ Full MDS algorithm
# Step 1: Look-up table (method 1) with window size +-7 days
if (Met.n == 3) sFillLUT(
7, V1, T1, V2, T2, V3, T3, Verbose.b = isVerbose,
calculate_gapstats = calculate_gapstats)
# Step 2: Look-up table (method 1) with window size +-14 days
if (Met.n == 3) sFillLUT(
14, V1, T1, V2, T2, V3, T3, Verbose.b = isVerbose,
calculate_gapstats = calculate_gapstats)
# Step 3: Look-up table, Rg only (method 2) with window size +-7 days,
if (Met.n == 3 || Met.n == 1) sFillLUT(
7, V1, T1, Verbose.b = isVerbose,
calculate_gapstats = calculate_gapstats)
# Step 4: Mean diurnal course (method 3) with window size 0 (same day)
sFillMDC(0, Verbose.b = isVerbose)
# Step 5: Mean diurnal course (method 3) with window size +-1, +-2 days
sFillMDC(1, Verbose.b = isVerbose)
sFillMDC(2, Verbose.b = isVerbose)
# Step 6: Look-up table (method 1) with window size +-21, +-28, ..., +-70
if (Met.n == 3) for (WinDays.i in seq(21, 70, 7) ) sFillLUT(
WinDays.i, V1, T1, V2, T2, V3, T3, Verbose.b = isVerbose,
calculate_gapstats = calculate_gapstats)
# Step 7: Look-up table (method 2) with window size +-14, +-21, ..., +-70
if (Met.n == 3 || Met.n == 1) for (WinDays.i in seq(14, 70, 7) ) sFillLUT(
WinDays.i, V1, T1, Verbose.b = isVerbose,
calculate_gapstats = calculate_gapstats)
# Step 8: Mean diurnal course (method 3) with window size +-7, +-14, ..., +-210 days
for (WinDays.i in seq(7, 210, 7) ) sFillMDC(WinDays.i, Verbose.b = isVerbose)
# Set long gaps again to NA
sTEMP$VAR_fall <<- suppressMessages(fConvertGapsToNA(sTEMP$VAR_fall))
# Message on gap filling
TimeDiff.p <- as.numeric(Sys.time()) - as.numeric(TimeStart.p)
message(
'Finished gap filling of \'', Var, '\' in ', floor(TimeDiff.p)
, ' seconds. Artificial gaps filled: '
, length(sTEMP$VAR_fall) - sum(is.na(sTEMP$VAR_fall))
, ', real gaps filled: ', sum(is.na(sTEMP$VAR_orig))
, ', unfilled (long) gaps: ', sum(is.na(sTEMP$VAR_fall)), '.')
##details<< \describe{\item{Different processing setups on the same dataset}{
## Attention: When processing the same site data set with different setups for
## the gap filling or flux partitioning
## (e.g. due to different ustar filters),
## a string suffix is needed! This suffix is added to the result column names
## to distinguish the results of the different setups.
## }}
# Rename new columns generated during gap filling
colnames(sTEMP) <<- gsub('VAR_', paste(
Var, (if (fCheckValString(suffix)) '_' else '')
, suffix, '_', sep = ''), colnames(sTEMP))
# Check for duplicate columns (to detect if different processing
# setups were executed without different suffixes)
if (length(names(which(table(colnames(sTEMP)) > 1))) ) warning(
'sMDSGapFill::: Duplicated columns found! Please specify different suffix '
, 'when processing different setups on the same dataset!')
return(invisible(NULL))
##value<<
## Gap filling results in sTEMP data frame (with renamed columns).
}
sEddyProc$methods(sMDSGapFill = sEddyProc_sMDSGapFill)
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#' @export
sEddyProc_sMDSGapFillAfterUstar <- function(
### sEddyProc$sMDSGapFillAfterUstar - MDS gap filling algorithm after u* filtering
fluxVar ##<< Flux variable to gap fill after ustar filtering
, uStarVar = 'Ustar' ##<< Column name of friction velocity u * (ms-1),
## default 'Ustar'
, uStarTh = ##<< data.frame with
## first column, season names, and second column estimates of uStar Threshold.
## Alternatively, a single value to be used as threshold for all records
## If only one value is given, it is used for all records.
#usGetAnnualSeasonUStarMap(sUSTAR_DETAILS$uStarTh)
.self$sGetUstarScenarios()[,c("season",uStarSuffix), drop = FALSE]
, uStarSuffix = 'uStar' ##<< Different suffixes required are for
## different u * scenarios
, isFlagEntryAfterLowTurbulence = FALSE ##<< Set to TRUE for flagging the
## first entry after low turbulence as bad condition (by value of 2).
, isFilterDayTime = FALSE ##<< Set to TRUE to also filter day-time values,
## default only filters night-time data
, swThr = 10 ##<< threshold of solar radiation below which data is
## marked as night time respiration.
, RgColName = "Rg" ##<< Column name of incoming short wave radiation
, ... ##<< Other arguments passed to \code{\link{sEddyProc_sMDSGapFill}}
) {
'Calling sMDSGapFill after filtering for (provided) friction velocity u * '
##author<<
## AMM, TW
##details<<
## Calling \code{\link{sEddyProc_sMDSGapFill}} after filtering for
## (provided) friction velocity u*
##
## The u* threshold(s) are provided with argument \code{uStarTh} for
## filtering the conditions of low turbulence.
## After filtering, the data is gap filled using the MDS algorithm
## \code{\link{sEddyProc_sMDSGapFill}}.
#
##seealso<<
## \itemize{
## \item \code{\link{sEddyProc_sEstimateUstarScenarios}} and
## \code{link{sEddyProc_sEstUstarThold}} for estimating the
## u* threshold from the data.
## \item \code{\link{sEddyProc_sMDSGapFillUStarScens}} for
## automated gapfilling for several scenarios of u* threshold estimates.
## }
#
uStarThresVec <- if (is.numeric(uStarTh) ) {
if (length(uStarTh) != 1L) stop(
"Without seasons, only a single uStarThreshold can be provided,"
, " but got a vector.")
uStarThresVec <- rep(uStarTh, nrow(.self$sDATA) )
} else {
if (!("season" %in% colnames(sTEMP)) ) stop(
"Seasons not defined yet. Provide argument seasonFactor to sEstUstarThold.")
# make sure merge will work
colnames(uStarTh) <- c("season", "uStarThreshold")
if (any(!is.finite(uStarTh$uStarThreshold))) stop(
"must provide finite uStarThresholds")
iMissingLevels <- which(!(levels(.self$sTEMP$season) %in% uStarTh$season))
if (length(iMissingLevels) ) stop(
"missing uStarTrheshold for seasons "
, paste(levels(.self$sTEMP$season)[iMissingLevels], collapse = ", "))
tmpDs <- merge(subset(sTEMP, select = "season"), uStarTh, all.x = TRUE)
uStarThresVec <- tmpDs[, 2L]
}
# Check column names (with 'none' as dummy)
# (Numeric type and plausibility have been checked on initialization of sEddyProc)
fCheckColNames(sDATA, c(fluxVar, uStarVar), 'sMDSGapFillAfterUstar')
# Filter data
uStar <- sDATA[, uStarVar]
qfUStar <- integer(nrow(sDATA) ) # 0L
# if not filtering dayTimeValues, create a vector that is TRUE only for nightTime
isRowFiltered <- if (isFilterDayTime) TRUE else
(!is.finite(sDATA[, RgColName]) | sDATA[, RgColName] < swThr)
# mark low uStar or bad uStar as 1L
qfUStar[
isRowFiltered &
!is.na(uStarThresVec) &
(sDATA[[uStarVar]] < uStarThresVec)
] <- 1L
if (isTRUE(isFlagEntryAfterLowTurbulence) ) {
##details<<
## With \code{isFlagEntryAfterLowTurbulence set to TRUE}, to be more
## conservative, in addition
## to the data acquired when uStar is below the threshold,
## the first half hour measured with good turbulence conditions
## after a period with low turbulence is also removed (Papale et al. 2006).
qfUStar[which(diff(qfUStar) == 1) + 1] <- 2L
}
# mark those conditions as bad, when no threshold is defined
qfUStar[isRowFiltered & !is.finite(uStarThresVec) ] <- 3L
# mark those recods as bad, where uStar is not defined
qfUStar[isRowFiltered & !is.finite(uStar) ] <- 4L
message(
'Ustar filtering (u * Th_1 = ', uStarThresVec[1], '), marked '
, (signif(sum(qfUStar != 0) / length(qfUStar), 2)) * 100
, '% of the data as gap' )
if (isTRUE(isFlagEntryAfterLowTurbulence) ) {
message(
'(including removal of the first half-hour after a '
, 'period of low turbulence).')
}
# Add filtering step to (temporal) results data frame
suffixDash.s <- paste(
(if (fCheckValString(uStarSuffix)) '_' else ''), uStarSuffix, sep = '')
attr(uStarThresVec, 'varnames') <- paste(
'Ustar', suffixDash.s, '_Thres', sep = '')
attr(uStarThresVec, 'units') <- 'ms-1'
attr(qfUStar, 'varnames') <- paste(
'Ustar', suffixDash.s, '_fqc', sep = '')
attr(qfUStar, 'units') <- '-'
sTEMP$USTAR_Thres <<- uStarThresVec
sTEMP$USTAR_fqc <<- qfUStar
colnames(sTEMP) <<- gsub(
'USTAR_', paste('Ustar', suffixDash.s, '_', sep = ''), colnames(.self$sTEMP))
# Check for duplicate columns (to detect if different processing setups
# were executed without different suffix)
if (length(names(which(table(colnames(.self$sTEMP)) > 1))) ) {
warning('sMDSGapFillAfterUstar::: Duplicated columns found!'
, ' Please specify different suffix when processing different"
, " setups on the same dataset!')
}
# Gap fill data after applying ustar filtering
sMDSGapFill(
fluxVar, QFVar = attr(qfUStar, 'varnames'), QFValue = 0, ...
, suffix = uStarSuffix)
##value<<
## Vector with quality flag from filtering (here 0: good data
## , 1: low turbulence, 2: first half hour after low turbulence
## , 3: no threshold available, 4: missing uStar value)
## Gap filling results are in sTEMP data frame (with renamed columns)
## that can be retrieved by \code{\link{sEddyProc_sExportResults}}.
return(invisible(qfUStar))
# example in Eddy.R sEddyProc.example
}
sEddyProc$methods(sMDSGapFillAfterUstar = sEddyProc_sMDSGapFillAfterUstar)
#' @export
sEddyProc_sMDSGapFillAfterUStarDistr <- function(
### gapfilling for several filters of estimated friction velocity Ustar thresholds.
... ##<< other arguments to
## \code{\link{sEddyProc_sMDSGapFillAfterUstar}} and \code{\link{sEddyProc_sMDSGapFill}}
## such as \code{fluxVar}
, uStarTh ##<< data.frame with first column, season names,
## and remaining columns different estimates of uStar Threshold.
## If the data.frame has only one row, then each uStar threshold estimate
## is applied to the entire dataset.
## Entries in first column must match levels in argument \code{seasonFactor}
, uStarSuffixes = colnames(uStarTh)[-1] ##<< String vector
## to distinguish result columns for different ustar values.
## Its length must correspond to column numbers in \code{UstarThres.m.n}.
# return value function \code{\link{sEddyProc_sEstUstarThresholdDistribution}}
) {
##details<< This method is superseded by
##\code{\link{sEddyProc_sMDSGapFillUStarScens}} and only there
## for backward portability.
warning(
"Method sEddyProc_sMDSGapFillAfterUStarDistr has been deprecated. "
, "Please, replace it by calls to 'sEddyProc_sMDSGapFillUStarScens' "
, "and if applicable calling before 'sEddyProc_sSetUstarScenarios'."
, "see ?sEddyProc_sMDSGapFillUStarScens and vignette('uStarCases')"
)
if (missing(uStarTh)) stop(
"Need to provide argument uStarTh. Since version 1.1.6 "
, " Please, use new methods sEddyProc_sSetUstarScenarios and"
, " sEddyProc_sMDSGapFillUStarScens instead of this method."
, " see ?sEddyProc_sMDSGapFillUStarScens and ?")
.self$sSetUstarScenarios(uStarTh, uStarSuffixes)
.self$sMDSGapFillUStarScens(...)
}
sEddyProc$methods(
sMDSGapFillAfterUStarDistr = sEddyProc_sMDSGapFillAfterUStarDistr)
#' @export
sEddyProc_sMDSGapFillUStarScens <- function(
### gapfilling for several filters of estimated friction velocity Ustar thresholds.
... ##<< other arguments to
## \code{\link{sEddyProc_sMDSGapFillAfterUstar}} and
## \code{\link{sEddyProc_sMDSGapFill}}
## such as \code{fluxVar}
) {
##author<< TW
##details<<
## sEddyProc$sMDSGapFillUStarDistr: calling
## \code{\link{sEddyProc_sMDSGapFillAfterUstar}} for several filters of
## friction velocity Ustar.
##
## The scenarios need to be set before by
## \code{\link{sEddyProc_sSetUstarScenarios}} or accepting the defaults
## annual estimates of \code{link{sEddyProc_sEstimateUstarScenarios}}.
##
## Then the difference between output columns NEE_U05_f and NEE_U95_f
## corresponds to the uncertainty
## introduced by the uncertain estimate of the u* threshold.
#
##seealso<<
## \href{../doc/useCase.html}{useCase vignette}
uStarTh <- .self$sUSTAR_SCEN
uStarSuffixes <- colnames(.self$sUSTAR_SCEN)[-1]
nEstimates <- ncol(uStarTh) - 1L
#iCol <- 1L
filterCols <- lapply(seq(1L:nEstimates), function(iCol) {
.self$sMDSGapFillAfterUstar(
...
, uStarTh = uStarTh[, c(1L, 1L + iCol)]
, uStarSuffix = uStarSuffixes[iCol]
)
})
filterMat <- do.call(cbind, filterCols)
return(invisible(filterMat))
##value<<
## Matrix (columns correspond to u* Scenarios) with quality flag from
## filtering ustar (0 - good data, 1 - filtered data)
##
## Gap filling results in sTEMP data frame (with renamed columns), that
## can be retrieved by \code{\link{sEddyProc_sExportResults}}.
## Each of the outputs is calculated for several u* r-estimates and
## distinguished by a suffix after the variable.
## E.g. with an an entry "U05" in \code{uStarSuffixes} in
## \code{\link{sEddyProc_sSetUstarScenarios}}
## the corresponding filled NEE can be found in output column "NEE_U05_f".
}
sEddyProc$methods(sMDSGapFillUStarScens = sEddyProc_sMDSGapFillUStarScens)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.