#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#+++ R script with methods for ustar filtering +++
#+++ Ustar filtering adapted after the idea in Papale, D. et al. (2006) +++
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#' =============================================================================
#' Set Ustar Processing Scenarios
#' =============================================================================
#'
#' Original name: sEddyProc_sSetUstarScenarios
#' @author
#'
#' @param ustar_thr Data frame returned by \code{\link{usGetAnnualSeasonUStarMap}}
#' or \code{\link{usGetSeasonalSeasonUStarMap}}. First column, season names,
#' and remaining columns different estimates of ustar threshold. If
#' \code{ustar_thr} has only one row, then each ustar threshold estimate is
#' applied to the entire dataset. Entries in first column must match levels in
#' argument \code{sf} of
#' \code{\link{sEddyProc_sEstUstarThresholdDistribution}}.
#' @param uStarSuffixes Suffixes appended to result column names; by default the
#' column names of ustar_thr unless its first season column.
#'
#' @details
#' The eddy covariance method does not work with low turbulence conditions, and
#' periods with low turbulence indicated by low ustar must be filtered out and
#' gap-filled (see \code{\link{sEddyProc_sMDSGapFill}}). The threshold value of
#' a sufficient ustar causes one of the largest uncertainty components within
#' the gap-filled data. Hence, it is good practice to compare derived quantities
#' based on gap-filled data using different ustar threshold values.
#'
#' For example a user could provide the the following columns in argument
#' \code{uStarTh}
#' \itemize{
#' \item season: identifier for which season this row is used.
#' \item Ustar: estimate on original series
#' \item U05: 5% of bootstrap
#' \item U50: median of bootstrap
#' \item U95: 95% of bootstrap)
#' }
#' Usually, Scenarios are retrieved from resuls of estimating the distribution
#' by \code{\link{sEddyProc_sEstUstarThresholdDistribution}} and then taking
#' either
#' \itemize{
#' \item annually aggregated value for all seasons
#' (\code{\link{usGetAnnualSeasonUStarMap}}) or
#' \item seasonal estimates (\code{\link{usGetSeasonalSeasonUStarMap}}).
#' }
#'
#' The following functions apply a processing step to all of the scenarios.
#' \itemize{
#' \item \code{\link{sEddyProc_sMDSGapFillUStarScens}}: gap-filling
#' \item \code{\link{sEddyProc_sMRFluxPartitionUStarScens}}: flux-partitioning
#' }
#' The generated output columns are distinguished by appending a suffix to the
#' respective column name. Then the spread across those columns is an estimate
#' of the uncertainty introduced by not knowing the exact value of the u*
#' threshold.
#'
#' @export
sEddyProc_sSetUstarScenarios <-
function(ustar_thr, uStarSuffixes = colnames(ustar_thr)[-1]) {
if (!("season" %in% colnames(sTEMP)) ) {
stop("Seasons not defined yet. Add column 'season' to dataset with entries
matching column season in UstarThres.df, e.g. by calling
yourEddyProcClass$sSetUStarSeasons(...)")
}
if (!all(is.finite(as.matrix(ustar_thr[, -1])))) {
warning("Provided non-finite uStarThreshold for some periods. All values in
corresponding period will be marked as gap.")
}
if (nrow(ustar_thr) == 1L) {
ustar_thr <- cbind(data.frame(
season = levels(.self$sTEMP$season)), ustar_thr[, -1], row.names = NULL)
}
missing <- which( !(levels(.self$sTEMP$season) %in% ustar_thr[[1]]))
if (length(missing)) {
stop("Need to provide uStar threshold for all seasons, but was missing for
seasons", paste(levels(.self$sTEMP$season)[missing], collapse = ","))
}
nEstimates <- ncol(ustar_thr) - 1L
uStarSuffixes <- unique(uStarSuffixes)
if (length(uStarSuffixes) != nEstimates) {
stop("Number of unique suffixes must correspond to number of ustar
thresholds, i.e. number of columns in ustar_thr - 1.")
}
sUSTAR_SCEN <- ustar_thr
colnames(sUSTAR_SCEN)[-1] <- uStarSuffixes
}
#' =============================================================================
#' Estimate ustar threshold (call)
#' =============================================================================
#'
#' Original name: sEddyProc_sEstUstarThold
#' @author TW
#'
#' @description Calling \code{\link{est_ustar_thr}} for class data and
#' storing results.
#'
#' @param data A data frame.
#' @param ustar Column name for ustar.
#' @param NEE Column name for NEE.
#' @param Tair Column name for air temperature.
#' @param Rg Column name for solar radiation.
#' @param ... Further arguments to \code{\link{est_ustar_thr}}.
#' @param sf Factor of seasons to split.
#'
#' @return Result component \code{uStarTh} of \code{\link{est_ustar_thr}}. In
#' addition the result is stored in class variable \code{sUSTAR_DETAILS}.
#'
#' @export
get_ustar_thr <- function(temp, ustar = "ustar", NEE = "NEE",
Tair = "Tair", Rg = "Rg", ...,
sf = create_sf(data$timestamp)) {
if (length(sf)) temp$season <- sf
if (is.null(temp$season)) {
stop("Ustar seasons must be set by argument 'sf'.", call. = FALSE)
}
req <- c("timestamp", ustar, NEE, Tair, Rg)
missing <- which(!(req %in% names(.self$sDATA)))
if (length(missing)) {
stop("Missing columns in dataset: ", paste(req[missing], collapse = ","))
}
data <- .self$sDATA[, req, drop = FALSE]
colnames(data) <- c("timestamp", "ustar", "NEE", "Tair", "Rg")
resEst <- est_ustar_thr(data, sf = .self$sTEMP$season, ...)
sUSTAR_DETAILS <- resEst
return(resEst$uStar)
}
## =============================================================================
#' Estimate ustar threshold
#'
#' Original name: usEstUstarThreshold
#' @author TW, OM
#'
#' @description Estimate the ustar threshold by aggregating the estimates for
#' seasonal and temperature subsets.
#'
#' @param data Data frame with columns "timestamp", "ustar", "NEE", "Tair", and
#' "Rg".
#' @param sf Factor for subsetting times (see details).
#' @param ctrl_est Control parameters for estimating ustar on a single
#' binned series, see \code{\link{control_ustar}}.
#' @param ctrl_sub Control parameters for subsetting time series (number of
#' temperature and ustar classes), see \code{\link{subset_ustar}}.
#' @param binned_fun Function to estimate ustar on a single binned
#' series, see \code{\link{est_ustar_fw2}}.
#' @param cleaned Set to TRUE if the data was cleaned already (avoids
#' expensive call to \code{get_ustar_indices}.
#'
#' @details
#' The threshold for sufficiently turbulent conditions u* (ustar) is estimated
#' for different subsets of the time series. From the estimates for each season
#' (each value in \code{sf}) the maximum of all seasons of one year is reported
#' as estimate for this year. Within each season the time series is split by
#' temperature classes. Among these ustar estimates, the median is reported as
#' season value.
#'
#' In order to split the seasons, a vector with argument \code{sf} must be
#' provided. All positions with the same factor belong to the same season. It is
#' generated by one of the methods in the function \code{\link{create_sf}}:
#' * \code{create_sf(type = "month")} (default DJF-MAM-JJA-SON with December
#' from previous to January of the year)
#' * \code{create_sf(type = "ymonth")} (default DJF-MAM-JJA-SON with December
#' from the same year)
#' * \code{create_sf(type = "yday")} for a refined specification of season
#' starts.
#' * \code{create_sf(type = "year")} for specifying different seasons between
#' years.
#'
#' The estimation of ustar on a single binned series can be selected argument
#' \code{est_ustar_binned}:
#' * \code{\link{est_ustar_fw1}}
#' * \code{\link{est_ustar_fw2}} (default)
#'
#' This function is called by:
#' \itemize{
#' \item \code{\link{get_ustar_thr}} which stores the result in the
#' class variables (sUSTAR and sDATA).
#' \item \code{\link{sEddyProc_sEstUstarThresholdDistribution}} which
#' additionally estimates median and confidence intervals for each year by
#' bootstrapping the original data within seasons.
#' }
#'
#' Inspecting the NEE~uStar relationship plotting is provided by
#' \code{\link{sEddyProc_sPlotNEEVersusUStarForSeason}}
#'
#' Add index columns to locate which season / tempClass / uStarBin each record
#' belongs. Cannot directly change sDATA, in EddyProcC, because will be
#' overwritten in each bootstrap.
#'
#' \describe{
#' \item{change point detection (CPT) method}{
#' With specifying \code{ctrl_est = control_ustar(diff_temp = TRUE)} change
#' point detection is applied instead of the moving point test (e.g. with
#' Fw2Binned). The sometimes sensitive binning of uStar values within a
#' temperature class is avoided. Further, possible spurious thresholds are
#' avoided by testing that the model with a threshold fits the data better than
#' a model without a threshold using a likelihood ratio test. In addition, with
#' CPT seasons are excluded where a threshold was detected in only less than
#' ctrl_est$min_prop (default 20%) of the temperature classes. Note that this
#' method often gives higher estimates of the ustar threshold.}
#' }
#'
#' \describe{
#' \item{One-big-season fallback}{
#' If there are too few records within one year, or when no season yielded a
#' finite ustar threshold estimate, then the yearly ustar threshold is estimated
#' by pooling the data from seasons within one seasonYear. The user can suppress
#' using pooled data on few records by providing option
#' \code{ctrl_sub$one_season = FALSE} (see \code{\link{subset_ustar}})}
#' }
#'
#' @return
#' A list with entries:
#' * ustar_thr A data.frame with columns "agg_mod", "seasonYear", "season",
#' "ustar" and rows for "single" (the entire aggregate median across years),
#' "seasonYear" (each year maximum across seasons or estimate on pooled data),
#' and "season" (each season median across temperature classes).
#' * seasonYear A data.frame listing results for year with columns "seasonYear",
#' "uStarMaxSeason" (the maximum across seasonal estimates within the year),
#' "ustar_pooled" (the estimate based on data pooled across the year - only
#' calculated on few valid records or on uStarMaxSeason was nonfinite), "recs"
#' (number of valid records - only if the pooled estimate was calculated),
#' "ustar_agg" (chosen estimate corresponding to ustar_pooled if it was
#' calculated, or uStarMaxSeason or ustar_thr across years if the former was
#' non-finite).
#' * season A data.frame listing results for each season:
#' * recs The number of valid records.
#' * uStarSeasonEst The estimate for based on data within the season (median
#' across temperature classes).
#' * ustar_agg The chosen estimate, corresponding to uStarSeasonEst or the
#' yearly seasonYear$ustar_agg, if the former was non-finite.
#' * tempInSeason A numeric matrix (nTemp x nAggSeason) containing estimates
#' for each temperature subset for each season.
#' * bins Columns \code{season}, \code{temp_bin} and \code{ustar_bin} for each
#' record of input \code{ds} reporting classes of similar environmental
#' conditions that the record belongs to.
#' }
#'
#' @references
#' Papale, D. et al. (2006) Towards a standardized processing of net ecosystem
#' exchange measured with eddy covariance technique: algorithms and uncertainty
#' estimation. Biogeosciences 3(4): 571-583.
#'
#' @export
est_ustar_thr <- function(data, sf = create_sf(data$timestamp), sf_yr,
ctrl_est = control_ustar(), ctrl_sub = subset_ustar(),
binned_fun = est_ustar_fw2, cleaned = FALSE) {
if (any(is.na(sf))) {
stop("Encountered NA in season factor. Need valid season for each record.",
call. = FALSE)
}
# For each sf level get the year (aggregation period) of the season.
timestamp <- data$timestamp
origin <- as.POSIXct("1970-01-01 00:00.00", tz = "UTC")
tz <- attr(timestamp[1], "tzone")
res <- tapply(timestamp, sf, fun = function(dates) {
x <- as.numeric(dates)
center <- x[1] + (x[length(x)] - x[1]) / 2
1900L + as.POSIXlt(center, origin = origin, tz = tz)$year
})
sf_yr <- structure(as.vector(res), names = rownames(res)) # Convert to vector
data$season <- as.factor(sf)
data$sn_yr <- sf_yr[sf]
data$t_bin <- NA
data$ustar_bin <- NA
# Extract valid nighttime records
if (cleaned) {
valid <- TRUE
dsc <- data
} else {
valid <-
is.finite(data[, "NEE"]) &
is.finite(data[, "Tair"]) &
is.finite(data[, "ustar"]) &
is.finite(data[, "Rg"])
valid <- valid & data[, "Rg"] < ctrl_sub$night_thr
dsc <- data[valid, , drop = FALSE]
}
if (nrow(dsc) == 0L) stop("No finite records in dataset.", call. = FALSE)
tdsc <- as.data.frame(table(dsc$season))
colnames(tdsc) <- c("season", "recs")
# Some seasons might be absent in dsc from cleaning, construct vectors that
# report NA for missing seasons
n_sn <- merge(data.frame(season = levels(ds$season)), tdsc, all.x = TRUE)
n_sn_yr <- merge(n_sn, data.frame(season = names(sf_yr), sn_yr = sf_yr),
all.x = TRUE)
n_yr <-
n_sn_yr %>%
group_by(!!sym("seasyr")) %>%
summarize(n_rec = sum(!!sym("recs"), na.rm = TRUE))
seasonYearsWithFewData <- n_yr$sn_yr[n_yr$n_rec < ctrl_sub$min_recs]
# Use CPT if requested
est_ustar_season <- function(...) {
if (isTRUE(ctrl_sub$diff_temp)) {
.estimateUStarSeasonCPTSeveralT(...)
} else est_ustar_season(...)
}
ustar_sn_tl <-
dsc %>%
split(.$season) %>%
map(est_ustar_season, ctrl_sub = ctrl_sub,
ctrl_est = ctrl_est,
binned_fun = binned_fun)
ustar_sn_t <- do.call(rbind, map(ustar_sn_tl, 1L))
ustar_sn <- apply(ustar_sn_t, 1, median, na.rm = TRUE)
# Report NA where threshold was found in less than 20% of temperature classes
invalid <- (rowSums(is.finite(ustar_sn_t)) / ncol(ustar_sn_t)) <
ctrl_est$min_prop
ustar_sn[invalid] <- NA
# extract the temperature and bin indices
for (season in names(ustar_sn_tl) ) {
bins <- c("t_bin", "ustar_bin")
dsc[dsc$season == season, bins] <- ustar_sn_tl[[season]]$bins
}
# check correct ordering
results_sn <- n_sn_yr
results_sn$uStarSeasonEst <- ustar_sn
results <-
results_sn %>%
group_by(!!sym("sn_yr")) %>%
summarize(uStarMaxSeason = {
if (all(!is.finite(!!sym("uStarSeasonEst")))) {NA}
else {max(!!sym("uStarSeasonEst"), na.rm = TRUE)}},
n_rec = sum(!!sym("n_rec")))
results$ustar_agg <- results$uStarMaxSeason
#---- for season_years with too few records and for season_years with no
# seasonal estimate do a pooled estimate
sn_yr_pooled <- results$sn_yr[!is.finite(results$ustar_agg) & results$n_rec > 0]
if (isTRUE(ctrl_sub$one_season)) {
sn_yr_pooled <- union(seasonYearsWithFewData, sn_yr_pooled)
}
results_pooled <-
if (!length(sn_yr_pooled) ) {
results_pooled <- data.frame(sn_yr = NA, recs = NA,
ustar_pooled = NA)[FALSE, ]
} else {
pooled <- filter(dsc, UQ(sym("seasyr")) %in% !!sn_yr_pooled)
ustar_yrtl <-
tmp <-
pooled %>%
split(.$seasyr) %>%
map(fEstimateUStarSeason, ctrl_sub = ctrl_sub,
ctrl_est = ctrl_est,
binned_fun = binned_fun)
# matrix (nSeason x nTemp)
ustar_yrt <- do.call(rbind, map(ustar_yrtl, 1L))
ustar_yr <- apply(ustar_yrt, 1, median, na.rm = TRUE)
# Report NA where threshold was found in less than 20% of temperature
# classes
invalid <- (rowSums(is.finite(ustar_yrt)) / ncol(ustar_yrt)) <
ctrl_est$min_prop
ustar_yr[invalid] <- NA
# Omit gettting binning into ds (do not overwrite bins from seasonal
# estimates)
results_pooled <- data.frame(seasyr = names(ustar_yr),
recs = as.vector(table(pooled$seasyr)),
ustar_pooled = ustar_yr)
}
results <- merge(results, results_pooled, all.x = TRUE)
finite_pooled <- is.finite(results$ustar_pooled)
results$ustar_agg[finite_pooled] <- results$ustar_pooled[finite_pooled]
#----- overall is the median across years
ustar_medyr <- median(results$ustar_agg, na.rm = TRUE)
message(paste("Estimated ustar threshold of: ", signif(ustar_medyr, 2),
"by using controls:\n",
paste(capture.output(unlist(ctrl_sub)), collapse = "\n")))
#----- propagate aggregate estimates back to NA-slots of years and seaons
non_finite <- !is.finite(results$ustar_agg)
results$ustar_agg[non_finite] <- ustar_medyr
# Merge yearly aggregated estimates to season and replace by finite
# seasonal estimates
results_seas <- merge(results_seas, results[, c("seasyr", "ustar_agg")],
all.x = TRUE)
finite_est <- (is.finite(results_seas$uStarSeasonEst))
results_seas$ustar_agg[finite_est] <- results_seas$uStarSeasonEst[finite_est]
df_out <- results_seas[, c("season", "seasyr", "ustar_agg")]
df_out$season <- as.factor(df_out$season)
df_out$agg_mod <- "season"
tmp <- rbind(cbind(data.frame(agg_mod = "year", season = as.factor(NA)),
results[, c("seasyr", "ustar_agg")]), df_out)
df_out <- tmp
tmp <- rbind(cbind(data.frame(agg_mod = "single", season = as.factor(NA),
seasyr = NA), ustar_agg = ustar_medyr), df_out)
df_out <- tmp
df_out$ustar <- df_out$ustar_agg
# Store indices in data, first remove columns
data[valid, ] <- dsc
# Check correct ordering, return list with entries
res <- list(ustar_thr = df_out[, c("agg_mod", "seasyr", "season", "ustar")],
seasyr = results, season = results_seas,
tempInSeason = t(ustar_seast),
bins = data[, c("season", "t_bin", "ustar_bin")])
return(res)
}
#' =============================================================================
#' Plot NEE versus Ustar for Data of a Subset with Estimates
#' =============================================================================
#'
#' Original name: .plotNEEVersusUStarTempClass
#' @author TW
#'
#' @param data Data frame or tibble with columns NEE, ustar, Tair/Tsoil,
#' ustar_bin, and timestamp.
#' @param ustar_thr Value of ustar of an estimated threshold.
#' @param ustar Column name for ustar.
#' @param NEE Column name for NEE
#' @param Tair Column name for temperature variable.
#' @param xlab Custom x-axis label. If NULL (default), the x-axis is given the
#' label \code{bquote(u["*"] * " (" * m ~ s^-1 * ")")}.
#' @param ylab Custom y-axis label. If NULL (default), the y-axis is given the
#' label \code{bquote("NEE (" * gC ~ m^-2 ~ yr^-1 * ")")}.
#'
#' @details
#' For each ustar_bin, mean of NEE and ustar is calculated.
#'
#' @return Side effect of plotting NEE ~ ustar with indicating means of the
#' bins, ustar_thr, date range, and temperature range.
#'
plot_ustar_thr <- function(data, ustar_thr, ustar = "ustar", NEE = "NEE",
Tair = "Tair", xlab = NULL, ylab = NULL) {
dss <- data[, c(NEE, ustar, Tair, "ustar_bin", "timestamp")]
colnames(dss) <- c("NEE", "ustar", "Tair", "ustar_bin", "timestamp")
dssm <-
dss %>%
group_by(!!sym("ustar_bin")) %>%
summarize(
ustar_avg = mean(!!sym("ustar")),
NEE_avg = mean(!!sym("NEE"))
)
if (is.null(xlab)) {
xlab <- bquote(u["*"] * " (" * m ~ s^-1 * ")")
} else {
xlab <- bquote(xlab)
}
if (is.null(ylab)) {
ylab <- bquote("NEE (" * gC ~ m^-2 ~ yr^-1 * ")")
} else {
ylab <- bquote(ylab)
}
plot(
NEE ~ ustar, dss,
col = adjustcolor("brown", alpha.f = 0.3),
ylim = quantile(dss$NEE, c(0.02, 0.98)), xlab = xlab, ylab = ylab
)
points(NEE_avg ~ ustar_avg, dssm, pch = "+", cex = 1.5)
abline(v = ustar_thr, lty = "dashed", col = "darkgrey", lwd = 2)
date_range <- strftime(range(dss$timestamp), "%d.%m.%y")
legend("topleft", legend = c(
paste(
data$season[1], " (", date_range[1],
"-", date_range[2], ")",
sep = ""
),
sprintf(
" (%1.1f-%1.1f\u00B0C)", min(dss$Tair),
max(dss$Tair)
),
sprintf("ustar_thr =%1.2f", ustar_thr)
))
}
#' =============================================================================
#' Estimate uStar threshold for data of a given season
#' =============================================================================
#'
#' Original name: .estimateUStarSeason
#' @author TW
#'
#' @param dsi A data frame with columns ustar, NEE, Tair, and Rg.
#' @param ctrl_sub
#' @param ctrl_est
#' @param est_ustar_binned
#'
#' @return
#' List with entries:
#' * ustar_thr A vector of ustar threshold for temperature classes.
#' * bins A data frame with columns temp_bin, ustar_bin for each row in dsi.
#'
est_ustar_season <- function(dsi, ctrl_sub, ctrl_est, est_ustar_binned) {
res_na <- list(
ustar_thr = rep(NA, ctrl_sub$temp_classes),
bins = data.frame(
temp_bin = rep(NA, nrow(dsi)),
ustar_bin = rep(NA, nrow(dsi))
)
)
if (nrow(dsi) < ctrl_sub$min_recs_season) {
warning(
"Too few finite records within season (n = ", nrow(dsi), ").",
"Need at least n = ", ctrl_sub$min_recs_season,
". Returning NA for this season.",
call. = FALSE
)
return(res_na)
}
if (nrow(dsi) / ctrl_sub$temp_classes < ctrl_sub$min_recs_temp) {
warning(
"Too few finite records within season (n = ", nrow(dsi), ")",
"for ", ctrl_sub$temp_classes, " temperature classes.",
" Need at least n = ", ctrl_sub$min_recs_temp * ctrl_sub$temp_classes,
". Returning NA for this season.",
call. = FALSE
)
return(res_na)
}
order_temp <- order(dsi[["Tair"]])
# Default for methods that do not bin ustar
ustar_sorted <- integer(nrow(dsi))
# Sort values in a season by air temperature (later in class by ustar)
sort <- dsi[order_temp, , drop = FALSE]
# Set up vector that contains ustar values for temperature classes
ustar_thr <- vector(length = ctrl_sub$temp_classes)
TId <- bin_balanced(sort[, "Tair"], ctrl_sub$temp_classes)
# Loop through k temperature classes
for (k in 1:ctrl_sub$temp_classes) {
temp_class <- TId == k
sort_temp <- sort[temp_class, ]
# Threshold only accepted if Tair and ustar are not correlated
cor1 <- suppressWarnings(abs(cor(
sort_temp[["ustar"]],
sort_temp[["Tair"]]
)))
if ((is.finite(cor1)) && (cor1 < ctrl_est$corr_thr)) {
if (isTRUE(ctrl_est$cpt)) {
if (!requireNamespace("segmented")) {
stop("Need to install package segmented before using Change Point
Decetion for estimation of ustar threshold.", call. = FALSE)
}
res_cpt <- try(.fitSeg1(
sort_temp[["ustar"]],
sort_temp[["NEE"]]
), silent = TRUE)
ustar_thr[k] <- if (
inherits(res_cpt, "try-error") ||
!is.finite(res_cpt["p"]) ||
res_cpt["p"] > 0.05
) {
NA
} else {
res_cpt["cp"]
}
} else {
res_bin <- bin_ustar(
sort_temp[["NEE"]], sort_temp[["ustar"]],
ctrl_sub$ustar_classes
)
binned_ustar <- res_bin$bin_avgs
ustar_sorted[temp_class] <- res_bin$ustar_bins
if (any(!is.finite(binned_ustar[[2]]))) {
stop("Encountered non-finite average NEE for a ustar bin. Data must be
provided with non-finite columns ustar and NEE for ustar
threshold detection.", call. = FALSE)
}
ustar_thr[k] <- if (binned_ustar[[1]][1] > ctrl_est$check_first) {
## If first mean ustar bin is already large (>ctrl_est$check_first),
## this temperature class is skipped from estimation
NA
} else {
est_ustar_binned(binned_ustar, ctrl_est = ctrl_est)
}
}
} else {
# Correlation between T and ustar too high: fill respective cell with NA
ustar_thr[k] <- NA
}
}
# Bins of temperature and ustar have been generated on sorted data.frame.
# Need to assign to original positions
ustar_unsorted <- integer(length(order_temp))
TIdUnsorted <- ustar_unsorted
TIdUnsorted[order_temp] <- TId
ustar_unsorted[order_temp] <- ustar_sorted
invisible(list(
ustar_thr = ustar_thr,
bins = data.frame(
temp_bin = TIdUnsorted,
ustar_bin = ustar_unsorted
)
))
}
## =============================================================================
#' Create List of Parameters for Determining Ustar of Single Binned Series
#'
#' Original name: usControlUstarEst
#' Author: TW
#'
#' @param plateau_fwd Number of subsequent ustar bin values to compare to in
#' fwd mode.
#' @param plateau_back Number of subsequent ustar bin values to compare to in
#' back mode.
#' @param plateau_crit Significant differences between a ustar value and the
#' mean of a "plateau".
#' @param corr_thr Threshold value for correlation between Tair and ustar data.
#' @param check_first If first ustar bin average of a class is already
#' larger than this value, the temperature class is skipped.
#' @param omit_bins If TRUE (default), bins where no threshold was found are
#' ignored. Set to FALSE to report highest ustar bin for these cases.
#' @param diff_temp Set to TRUE to use Change Point Detection without
#' binning ustar but with additionally changed aggregation scheme for several
#' temperature classifications.
#' @param cpt Set to TRUE to use Change Point Detection without binning
#' ustar before in usual aggregation method (good for comparing methods, but
#' not recommended, overruled by diff_temp = TRUE).
#' @param min_prop Seasons in which only less than this
#' proportion of temperature classes a threshold was detected, are excluded
#' from aggregation.
#' @param min_boot Minimum proportion of bootstrap samples for which a
#' threshold was detected. Below this proportion NA quantiles are reported.
#' @param min_recs Minimum number of records in plateau, threshold must
#' be larger than mean of this many bins.
#'
#' @export
control_ustar <- function(plateau_fwd = 10, plateau_back = 6,
plateau_crit = 0.95, corr_thr = 0.5,
check_first = 0.2, omit_bins = TRUE,
diff_temp = FALSE, cpt = FALSE, min_prop = 0.2,
min_boot = 0.4, min_recs = 3L) {
ctrl <- list(
plateau_fwd = plateau_fwd, plateau_back = plateau_back,
plateau_crit = plateau_crit, corr_thr = corr_thr, check_first = check_first,
omit_bins = omit_bins, cpt = cpt, diff_temp = diff_temp,
min_prop = min_prop, min_boot = min_boot, min_recs = min_recs
)
# display warning message for variables that we advise not to be changed
if (corr_thr != 0.5) {
warning("Parameter 'corr_thr' is set to a non-default value.",
call. = FALSE)
}
return(ctrl)
}
#' =============================================================================
#' List of parameters for subsetting data for ustar threshold estimation
#' =============================================================================
#'
#' Original name: usControlUstarSubsetting
#' @author TW
#'
#' @param temp_classes number of Tair classes
#' @param ustar_classes number of ustar classes
#' @param night_thr nighttime data threshold for solar radiation (in W+1m-2)
#' @param min_recs_temp minimum number of records within one temperature
#' class
#' @param min_recs_season minimum number of records within one season
#' @param min_recs_year minimum number of records within one year
#' @param one_season logical, set to FALSE to avoid aggregating
#' all seasons on too few records
#'
#' @export
subset_ustar <- function(temp_classes = 7, ustar_classes = 20, night_thr = 10,
min_recs_temp = 100, min_recs_season = 160,
min_recs_year = 3000, one_season = TRUE) {
ctrl <- list(
temp_classes = temp_classes, ustar_classes = ustar_classes,
night_thr = night_thr, min_recs_temp = min_recs_temp,
min_recs_season = min_recs_season, min_recs_year = min_recs_year,
one_season = one_season
)
# TODO: loop all this nonsense??
if (ctrl$night_thr != 10) {
warning("Parameter night_thr set to non default value.")
}
if (ctrl$temp_classes != 7) {
warning("Parameter temp_classes set to non default value.")
}
if (ctrl$ustar_classes != 20) {
warning("Parameter ustar_classes set to non default value.")
}
if (ctrl$min_recs_temp != 100) {
warning("Parameter min_recs_temp set to non default value.")
}
if (ctrl$min_recs_season != 160) {
warning("Parameter min_recs_season set to non default value.")
}
if (ctrl$min_recs_year != 3000) {
warning("Parameter min_recs_year set to non default value.")
}
return(ctrl)
}
## =============================================================================
#' Calculate Season Factor
#'
#' @description Compute year-spanning or year-bounded season factor by starting
#' month, year-day, or year and year-day.
#'
#' Original names: usCreateSeasonFactorMonth,
#' usCreateSeasonFactorMonthWithinYear, usCreateSeasonFactorYday,
#' usCreateSeasonFactorYdayYear
#' @author TW
#'
#' @param dates POSIXct vector of length of the data set to be filled,
#' specifying the center-time of each record.
#' @param month Integer (1-12) vector of length of the data set to be filled,
#' specifying the month for each record.
#' @param yday Integer (1-366) vector of length of the data set to be filled,
#' specifying the day of the year for each record.
#' @param year Integer vector of length of the data set to be filled, specifying
#' the year.
#' @param start_month Integer vector specifying the starting month for each
#' season, counting from one. Default is (Dec, Jan, Feb); (Mar, Apr, May);
#' (Jun, Jul, Aug); (Sep, Oct, Nov).
#' @param start_yday Integer vector (1-366) specifying the starting yday for
#' each season in increasing order.
#'
#' @details
#' For type "month":
#' Calculate factors to denote the season for ustar-filtering by specifying
#' starting months, with continuous seasons spanning year boundaries. If Jan is
#' not a starting month, then the first months of each year will be part of the
#' last period in the year. E.g. with the default the fourth period of the first
#' year consists of Jan, Feb, Dec.
#'
#' For type "ymonth":
#' Calculate factors to denote the season for ustar-filtering by specifying
#' starting months, with seasons not spanning year boundaries. If Jan is not a
#' starting month, then the first months of each year will be part of the last
#' period in the year e.g. with the default the fourth period of the first year
#' consists of Jan, Feb, Dec.
#'
#' For type "yday":
#' With default parameterization, dates are assumed to denote begin or center of
#' the eddy time period. If working with dates that denote the end of the
#' period, use \code{yday = as.POSIXlt(fGetBeginOfEddyPeriod(dates))$yday}.
#'
#' Internally, the timestamp is shifted 15 minutes after the start of each half
#' hour. When providing the \code{dates} argument, the start time may be shifted
#' by \code{dates = data$timestamp + 15 * 60}.
#'
#' @return For types "month" and "ymonth": an integer vector of length
#' \code{dates}, with each unique value representing one season. For types
#' "yday" and "year": an integer vector of length \code{nrow(ds)}, each unique
#' class representing one season.
#'
#' @export
create_sf <- function(dates, month = as.POSIXlt(dates)$mon + 1L,
yday = as.POSIXlt(dates)$yday + 1L,
year = as.POSIXlt(dates)$year + 1900L,
start_month = c(3, 6, 9, 12),
start_yday = c(335, 60, 152, 244),
type = c("month", "ymonth", "yday", "year")) {
# Check function inputs
type <- match.arg(type)
if (type %in% c("month", "ymonth")) {
if (length(year) == 1L) year <- rep(year, length(month))
if (length(month) != length(year) ) {
stop("'month' and 'year' arguments must have the same length.",
call. = FALSE)
}
if (any(month < 1 | month > 12) ) {
stop("'month' out of range 1-12.", call. = FALSE)
}
if (type == "month") {
starts <- data.frame(month = sort(unique(start_month)),
year = rep(sort(unique(year)),
each = length(start_month)))
if (starts$month[1] != 1L) {
starts <- rbind(data.frame(month = 1L, year = starts$year[1]), starts)
}
sf <- integer(length(month)) # 0L
start_ymonths <- starts$year * 1000L + starts$month
starts$start_ymonths <- start_ymonths
ymonths <- year * 1000L + month
for (i in 1:(length(start_ymonths) - 1)) {
bo <- (ymonths >= start_ymonths[i]) & (ymonths < start_ymonths[i + 1])
sf[bo] <- starts$year[i] * 1000L + starts$month[i]
}
# Last period with no end border defined
i <- length(start_ymonths)
bo <- (ymonths >= start_ymonths[i])
sf[bo] <- starts$year[i] * 1000L + starts$month[i]
return(as.factor(sf))
} else if (type == "ymonth") {
start_month <- sort(unique(start_month))
boLastPeriod <- month < start_month[1]
# translate month before the first specified beginning month to be
# after last specified month (1 becomes 13)
month[boLastPeriod] <- month[boLastPeriod] + 12
start_month_add <- c(start_month, start_month[1] + 12)
sf <- year * 1000L + rep(start_month[1], length(month) )
for (i in 2:length(start_month) ) {
bo <- (month >= start_month_add[i]) & (month < start_month_add[i + 1])
sf[bo] <- year[bo] * 1000L + start_month[i]
}
table(sf)
as.factor(sf)
}
} else if (type %in% c("yday", "year")) {
if (length(year) == 1L) year <- rep(year, length(yday))
if (length(yday) != length(year)) {
stop("'month' and 'year' arguments must have the same length.",
call. = FALSE)
}
if (any(yday < 1 | yday > 366)) {
stop("'yday' out of range 1..366.", call. = FALSE)
}
if (type == "yday") {
starts <- data.frame(yday = sort(unique(start_yday)),
year = rep(sort(unique(year)),
each = length(start_yday)))
} else if (type == "year") {
starts <- data.frame(yday = c(335, 60, 152, 244), year)
}
colnames(starts) <- c("yday", "year")
if (starts$yday[1] != 1L) {
starts <- rbind(data.frame(yday = 1L, year = starts$year[1]), starts)
}
sf <- integer(length(yday)) # 0L
start_ydays <- starts$year * 1000L + starts$yday
starts$start_ydays <- start_ydays
ydays <- year * 1000L + yday
for (i in 1:(length(start_ydays) - 1)) {
bo <- (ydays >= start_ydays[i]) & (ydays < start_ydays[i + 1])
sf[bo] <- starts$year[i] * 1000L + starts$yday[i]
}
# Last period with no defined end border
i <- length(start_ydays)
bo <- (ydays >= start_ydays[i])
sf[bo] <- starts$year[i] * 1000L + starts$yday[i]
return(as.factor(sf))
}
}
#' =============================================================================
#' (Internal) Bin NEE for a Number of Ustar Classes
#' =============================================================================
#'
#' Original name: .binUstar
#' @author TW
#'
#' @param NEE Vector with values of Net Ecosystem Exchange.
#' @param ustar Vector with ustar (friction velocity; m2 / s).
#' @param ustar_classes Number of binning classes.
#' @param sorted Set to TRUE if NEE and ustar are already sorted by
#' increasing ustar values (performance gain).
#'
#' @return
#' List with entries:
#' * bin_avgs Data frame with columns ustar_avg, NEE_avg, recs with one
#' row for each bin.
#' * ustar_bins Integer vector reporting the bin for each record in Ustar.v
#'
bin_ustar <- function(NEE, ustar,
ustar_classes = subset_ustar()$ustar_classes,
sorted = FALSE) {
ds0.F <- data.frame(NEE = NEE, ustar = ustar)
ds.F <- ds0.F
# Within data frame sort values by ustar
if (!isTRUE(sorted)) {
order_ustar <- order(ustar)
ds.F <- ds.F[order_ustar, ]
} else{
order_ustar <- TRUE
}
# When assigning ustar classes, only start new class when ustar value changes
ds.F$ustar_class <- bin_unique(ds.F$ustar, nBin = ustar_classes,
tol = 1e-14)
bin_avgs <-
ds.F %>%
group_by(!!sym("ustar_class")) %>%
summarise(Ust_avg = mean(!!sym("Ustar"), na.rm = TRUE),
NEE_avg = mean(!!sym("NEE"), na.rm = TRUE),
nRec = length(!!sym("NEE"))) %>%
select(-1) # omit first column
bins_unsorted <- integer(nrow(ds.F))
bins_unsorted[order_ustar] <- as.integer(ds.F$ustar_class)
return(list(bin_avgs = bin_avgs, ustar_bins = bins_unsorted))
}
#' =============================================================================
#' Create Binning Factor and Shorten Following Bins
#' =============================================================================
#'
#' Original name: .binWithEqualValuesBalanced
#' @author
#'
#' @param x Sorted numeric vector to sort into bins.
#' @param bins Intended number of bins.
#' @param tol Distance between successive values of x that are treated as equal.
#' @param floor Set to FALSE to postpone rounding on start and end values.
#'
#' @details
#' Equal values of x end up in the same bin, and shortens the following bins. By
#' not taking the floor, a better distribution of samples across bins is
#' achieved.
#'
#' @return Integer vector of same length as x, with unique value for each bin.
#'
bin_balanced <- function(x, bins, tol = 1e-8, floor = TRUE) {
if (bins == 1L) return(integer(length(x)))
binSize <- length(x) / bins
if (floor) binSize <- floor(binSize)
breaksX <- which(diff(x) > tol) + 1
binId <- rep(1L, length(x))
iBreak <- 1L # index from which to seek next break
for (iClass in 2L:as.integer(bins)) {
start0 <- round((iClass - 1) * binSize) + 1
iBreak <- .whichValueGreaterEqual(breaksX, start0, iStart = iBreak)
start1 <- breaksX[iBreak]
# Find next uStar change at or after position start0
binId[start1:length(x)] <- iClass
}
return(binId)
}
#' =============================================================================
#' Create Binning Factor and Shift Following Bins
#' =============================================================================
#'
#' Original name: .binWithEqualValuesMinRec
#' @author TW
#'
#' @param x Sorted numeric vector to sort into bins
#' @param bins Intended number of bins
#' @param tol Distance between successive values of x that are treated as equal
#'
#' @section Value
#' Integer vector of same length as x, with unique value for each bin. Each bin
#' holds at least floor(length(x) / bins) records, or more if there were values
#' after the bin that were numerically equal to last value of the bin. The
#' actual number of bins might be differnt from argument bins due to numericall
#' equal values and is reported with attribute \code{bins}. Because of using
#' floor in bin-width calculation, the last, i.e. nbin, of the bins may hold
#' more values.
#'
bin_unique <- function(x, bins, tol = 1e-8) {
length <- length(x)
binId <- integer(length)
binSize <- as.integer(floor(length / bins))
# Positions in x where value is numerically different from following element
iBreaksX <- which(diff(x) > tol)
iBreak <- 0L # start index in iBreaks, to avoid searching the samller el.
end <- 0L # index in x, end of the (previous) period
bin <- 0L # bin Id
while (end < length) {
bin <- bin + 1L
stat <- end + 1L
# Same as start + binsSize-1, with counting from 1 instead of 0
end <- end + binSize
# Find the next break after end
iBreak <- .whichValueGreaterEqual(iBreaksX, end, iBreak + 1L)
if (is.na(iBreak)) {
# No break found, set period end to vector end and finish. If last bin
# length is smaller than 90% of intended binsize, sort records to former
# bin.
if ((length + 1L - start) < binSize * 0.9 && bin != 1L) {
bin <- bin - 1L
}
binId[start:length] <- bin
break
} else {
end <- iBreaksX[iBreak] # update end to position with break after it
binId[start:end] <- bin
}
}
# Do not use more than bins classes; increase the size of the last class
binId[binId > bins] <- bins
attr(binId, "bins") <- min(bin, bins)
return(binId)
}
#' =============================================================================
#' Search for First Element in an Integer Vector Larger than Threshold
#' =============================================================================
#'
#' Original name: .whichValueGreaterEqual
#' @author TW
#'
#' @description For performance reasons call a C++ function that loops across
#' the vector.
#'
#' @param x Increasingly sorted numeric vector to search.
#' @param thr Integer scalar: searched element will need to be greater
#' than or equal to this argument
#' @param iStart Index in vector to start search
#'
#' @details Searches a sorted integer vector for the next element that is >= a
#' threshold in fast C-code.
#'
#' @return A scalar integer: first index in x, that is >= start, and whose
#' value x[i] is >= threshold. If no index was found, returns NA.
#'
.whichValueGreaterEqual <- function(x, thr, start = 1L) {
ans <- whichValueGreaterEqualC(
as.integer(x), as.integer(thr), as.integer(start) )
return(ans)
}
## =============================================================================
#'
#'
#'
#' @param x an integer vector
#' @param threshold an integer vector
#' @param iStart an integer vector
#'
#' @export
whichValueGreaterEqualC <- function(x, threshold, iStart) { # translated
threshold <- as.integer(threshold) # translated
i <- as.integer(iStart) # translated
while ((i < length(x)) & (x[i] < threshold)) i + 1 # translated
if (i < length(x)) { # translated
# return index
return(i)
} else {
# no break found
return(NA)
}
}
#' =============================================================================
#' Estimate Ustar Threshold for Single Subset (Using FW1 Algorithm)
#' =============================================================================
#'
#' Original name: usEstUstarThresholdSingleFw1Binned
#' @author TW, OM
#'
#' @param ustar_bins Data frame with columns NEE_avg and ustar_avg, of ustar
#' bins.
#' @param ctrl_est Parameter list, see \code{\link{control_ustar}} for
#' defaults and description.
#'
#' @return
#'
#' @export
est_ustar_fw1 <-
function(ustar_bins, ctrl_est = control_ustar()) {
# Relying on binned NEE and ustar algorithm to check when plateau is reached
flag <- FALSE
# For every u * bin compare to avg of subsequent UST_PLATEAU, until found
u <- 1
while (!flag) {
# Only stop if threshold is found
if (!flag & (ustar_bins$NEE_avg[u] >=
(ctrl_est$plateau_crit * mean(ustar_bins$NEE_avg[
(u + 1):(u + ctrl_est$plateau_fwd)], na.rm = TRUE)))) {
# NEE_i >= .95 * avg(i, i + 1, ..., i + 10) [FW]
thr <- ustar_bins$ustar_avg[u]
flag <- TRUE # set flag for threshold found in this mode
}
# Case where no threshold is found by plateau method, use maximum ustar in
# that T_class.
if (u == (nrow(ustar_bins) - 1)) {
thr <- ustar_bins$ustar_avg[u + 1]
break
}
u <- u + 1 # increase index by 1
}
return(thr)
}
#' =============================================================================
#' Estimate Ustar Threshold for Single Subset (Using FW2 Algorithm)
#' =============================================================================
#'
#' Original name: usEstUstarThresholdSingleFw2Binned
#' @author TW, OM
#'
#' @param ustar_bins Data frame with columns NEE_avg and ustar_avg, of ustar
#' bins.
#' @param ctrl_est Parameter list, see \code{\link{control_ustar}} for
#' defaults and description.
#'
#' @return
#'
#' @export
est_ustar_fw2 <-
function(ustar_bins, ctrl_est = control_ustar()) {
# Algorithm to check when plateau is reached
flag <- FALSE
# For every u * bin compare to avg of subsequent UST_PLATEAU, until found
u <- 1
thr <- NA
## Demand that threshold is higher than \code{ctrl_est$minNuStarPlateau}
## records. If fewer records
# FF2 neads at least two bins after threshold
umax <- nrow(ustar_bins) - max(2L, ctrl_est$minNuStarPlateau)
while (u <= umax) {
if (
(ustar_bins$NEE_avg[u] >= (ctrl_est$plateau_crit *
mean(ustar_bins$NEE_avg[(u + 1):(u + ctrl_est$plateau_fwd)],
na.rm = TRUE))) &
(ustar_bins$NEE_avg[u + 1] >= (ctrl_est$plateau_crit *
mean(ustar_bins$NEE_avg[(u + 1 + 1):(u + ctrl_est$plateau_fwd + 1)],
na.rm = TRUE)))
) {
thr <- ustar_bins$ustar_avg[u]
break
}
u <- u + 1L
}
# Case where no threshold is found by plateau method, use maximum ustar in
# that T_class.
if (is.na(thr) & !isTRUE(ctrl_est$omit_bins) )
thr <- ustar_bins$ustar_avg[u + 1]
return(thr)
}
#' =============================================================================
#' Extract Mapping Season to UStar Columns from Distribution Result (Annual)
#' =============================================================================
#'
#' Original name: usGetAnnualSeasonUStarMap
#' @author TW
#'
#' @description
#'
#' @param ustar_thr Result of \code{\link{est_ustar_dist}} or
#' \code{\link{est_ustar_thr}}$ustar_thr.
#'
#' @details
#'
#' @return A data frame with first column the season, and other columns
#' different ustar threshold estimates.
#'
#' @export
get_annual_map <- function(ustar_thr) {
year <- ustar_thr[ustar_thr$agg_mod == "year", , drop = FALSE]
year$season <- NULL
year$agg_mod <- NULL
# deprecated: done in est_ustar_thr
seasons <- ustar_thr[
ustar_thr$agg_mod == "season",
c("season", "seasonYear"),
drop = FALSE
]
res <- merge(seasons, year)
res$seasonYear <- NULL
# Transform column names of "x%" to "Ux" with leading zeros
colnames(res)[-(1:2)] <- (
gsub(" ", "0", sprintf("U%2s", gsub("%", "", colnames(res)[-(1:2)])))
)
return(res)
}
#' =============================================================================
#' Extract Mapping Season to UStar Columns from Distribution Result (Seasonal)
#' =============================================================================
#'
#' Original name: usGetSeasonalSeasonUStarMap
#' @author TW
#'
#' @param ustar_thr Result of \code{\link{est_ustar_dist}} or
#' \code{\link{est_ustar_thr}}$ustar_thr.
#'
#' @details
#' From result of \code{\link{est_ustar_dist}} omit aggregation model and
#' seasonYear column.
#'
#' @return A data frame with first column the season, and other columns
#' different ustar threshold estimates. Transform column names of "x%" to "Ux"
#' with leading zeros.
#'
#' @export
get_seasonal_map <- function(ustar_thr) {
seasons <- ustar_thr[ustar_thr$agg_mod == "season", , drop = FALSE]
seasons$seasonYear <- NULL
seasons$agg_mod <- NULL
colnames(seasons)[-(1:2)] <- (
gsub(" ", "0", sprintf("U%2s", gsub("%", "", colnames(seasons)[-(1:2)])))
)
return(seasons)
}
#' =============================================================================
#' Estimate Distribution of Ustar Threshold by Bootstrapping
#' =============================================================================
#'
#' Original name: sEddyProc_sEstimateUstarScenarios
#' @author TW
#'
#' @param ctrl_est Control parameters for estimating ustar on a single binned
#' series, see \code{\link{control_ustar}}.
#' @param ctrl_sub Control parameters for subsetting time series (number of
#' temperature and ustar classes), see \code{\link{subset_ustar}}.
#' @param ustar Column name for ustar.
#' @param NEE Column name for NEE.
#' @param Tair Column name for air temperature.
#' @param Rg Column name for solar radiation.
#' @param ... Further arguments to \code{\link{est_ustar_thr}}.
#' @param sf Factor of seasons to split (data is resampled only within the
#' seasons).
#' @param reps Number of repetitions in the bootstrap.
#' @param probs Quantiles of the bootstrap sample to return. Default is the 5%,
#' median and 95% of the bootstrap.
#' @param verbose Set to FALSE to omit printing progress.
#'
#' @details
#' The choice of the criterion for sufficiently turbulent conditions (ustar >
#' chosen threshold) introduces large uncertainties in calculations based on
#' gap-filled Eddy data. Hence, it is good practice to compare derived
#' quantities based on gap-filled data using a range of ustar threshold
#' estimates.
#'
#' This method explores the probability density of the threshold by repeating
#' its estimation on a bootstrapped sample. By default it returns the 90%
#' confidence interval (argument \code{probs}). For larger intervals the sample
#' number must be increased (argument \code{probs}).
#'
#' \describe{
#' \item{Quality Assurance}{
#' If more than \code{ctrl_est$min_boot} (default 40%) did not
#' report a threshold, no quantiles (i.e. NA) are reported.
#' }}
#'
#' @return A data frame with columns \code{agg_mod}, \code{year}, and
#' \code{ustar} estimate based on the non-resampled data. The other columns
#' correspond to the quantiles of ustar estimate for given probabilities
#' (argument \code{probs}) based on the distribution of estimates using
#' resampled the data.
#'
#' @export
est_ustar_scens <- function(temp, ctrl_est = control_ustar(),
ctrl_sub = subset_ustar(), ustar = "ustar",
NEE = "NEE", Tair = "Tair", Rg = "Rg", ...,
sf = create_sf(data$timestamp, type = "month"),
reps = 200L, probs = c(0.05, 0.5, 0.95),
verbose = TRUE) {
temp$season <- sf
ds <- data[, c("timestamp", ustar, NEE, Tair, Rg)]
colnames(ds) <- c("timestamp", "ustar", "NEE", "Tair", "Rg")
ds$sf <- temp$season
res0 <- suppressMessages(est_ustar_thr(
ustar = ustar,
NEE = NEE,
Tair = Tair,
Rg = Rg,
...,
ctrl_est = ctrl_est, ctrl_sub = ctrl_sub,
sf = NULL
))
pos_agg <- which(res0$agg_mod == "single")
pos_years <- which(res0$agg_mod == "year")
pos_seasons <- which(res0$agg_mod == "season")
years0 <- res0$seasonYear[pos_years]
seasons0 <- res0$season[pos_seasons]
wrapper <- function(sample, ...) {
ds2 <- ds
season_boot <-
ds2 %>%
split(.$sf) %>%
map_df(function(dss) {
sample <- sample.int(nrow(dss), replace = TRUE)
dss[sample, , drop = FALSE]
})
if (isTRUE(verbose)) message(".", appendLF = FALSE)
res <- est_ustar_thr(
season_boot, ...,
sf = season_boot$season,
ctrl_est = ctrl_est, ctrl_sub = ctrl_sub
)
gc()
# Check if years and seasons have been calculated differently due to
# subsetting with too few values within a season, report NA for those cases
res_agg <- res$ustar_thr$ustar[pos_agg]
years <- res$ustar_thr$year[pos_years]
res_years <- structure(
if (all(years == years0)) {
res$ustar_thr$ustar[pos_years]
} else {
rep(NA, length(years0))
},
names = as.character(years0)
)
res_seasons <- structure(
if (
nrow(res$ustar_thr) == nrow(res0) &&
all((seasons <- res$ustar_thr$season[pos_seasons]) == seasons0)
) {
res$ustar_thr$ustar[pos_seasons]
} else {
rep(NA, length(seasons0))
},
names = as.character(seasons0)
)
return(c(agg_years = res_agg, res_years, res_seasons))
}
# Collect into one big matrix
ustar0 <- res0$ustar[c(pos_agg, pos_years, pos_seasons)]
Ustar.l <- suppressMessages(Ustar.l <- lapply(1:(reps - 1), wrapper, ...))
if (isTRUE(verbose)) message("") # line break
stat <- do.call(rbind, c(list(ustar0), Ustar.l))
# workaround: if probs is a scalar, apply returns vector without names in
# order to get the matrix in all cases, prepend extend probs and delete the
# corresponding row afterwards
res_quants0 <- apply(stat, 2, quantile, probs = c(0, probs), na.rm = TRUE)
res_quants <- t(res_quants0[-1, , drop = FALSE])
invalid <- colSums(is.finite(stat)) / nrow(stat) < ctrl_est$min_boot
res_quants[invalid, ] <- NA
rownames(res_quants) <- NULL
res_df <- cbind(res0, res_quants)
message(paste(
"Estimated ustar distribution of:\n",
paste(
capture.output(res_df[res_df$agg_mod == "single", -(1:3)]),
collapse = "\n"
), "\nby using ", reps, "bootstrap samples and controls:\n",
paste(capture.output(unlist(ctrl_sub)), collapse = "\n")
))
# .self$sSetUstarScenarios(get_annual_map(res_df))
return(res_df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.