Nothing
#' Assessing the quality of the fits
#' @description Indicates if the slopes provided by
#' \link[fluxible:flux_fitting]{flux_fitting} should be discarded or replaced
#' by 0 according to quality thresholds set by user
#' @param slopes_df dataset containing slopes
#' @param fit_type model fitted to the data, linear, quadratic or exponential.
#' Will be automatically filled if `slopes_df` was produced using
#' \link[fluxible:flux_fitting]{flux_fitting}
#' @param ambient_conc ambient gas concentration in ppm at the site of
#' measurement (used to detect measurement that started with a polluted setup)
#' @param error error of the setup, defines a window outside of which
#' the starting values indicate a polluted setup
#' @param f_fluxid column containing unique IDs for each flux
#' @param f_slope column containing the slope of each flux
#' (as calculated by the \link[fluxible:flux_fitting]{flux_fitting} function)
#' @param f_slope_lm column containing the linear slope of each flux
#' (as calculated by the \link[fluxible:flux_fitting]{flux_fitting} function)
#' @param force_discard vector of fluxIDs that should be discarded
#' by the user's decision
#' @param force_ok vector of fluxIDs for which the user wants to keep
#' the calculated slope despite a bad quality flag
#' @param force_zero vector of fluxIDs that should be replaced by zero by
#' the user's decision
#' @param force_lm vector of fluxIDs for which the linear slope should be used
#' by the user's decision
#' @param force_exp vector of fluxIDs for which the exponential slope should be
#' used by the user's decision (kappamax method)
#' @param ratio_threshold ratio of gas concentration data points over length of
#' measurement (in seconds) below which the measurement will be considered as
#' not having enough data points to be considered for calculations
#' @param f_pvalue column containing the p-value of each flux
#' (linear and quadratic fits)
#' @param f_rsquared column containing the r squared of each flux
#' (linear and quadratic fits)
#' @param pvalue_threshold threshold of p-value below which the change of
#' gas concentration over time is considered not significant
#' (linear and quadratic fits)
#' @param rsquared_threshold threshold of r squared value below which
#' the linear model is considered an unsatisfactory fit
#' (linear and quadratic fits)
#' @param f_conc column containing the measured gas concentration
#' (exponential fits)
#' @param f_b column containing the b parameter of the exponential expression
#' (exponential fits)
#' @param f_time column containing the time of each measurement in seconds
#' (exponential fits)
#' @param f_start column with datetime of the start of the measurement
#' (after cuts)
#' @param f_end column with datetime of the end of the measurement
#' (after cuts)
#' @param f_fit column containing the modeled data (exponential fits)
#' @param rmse_threshold threshold for the RMSE of each flux above which
#' the fit is considered unsatisfactory (exponential fits)
#' @param cor_threshold threshold for the correlation coefficient of
#' gas concentration with time below which the correlation
#' is considered not significant (exponential fits)
#' @param f_cut column containing the cutting information
#' @param cut_arg argument defining that the data point should be cut out
#' @param b_threshold threshold for the b parameter.
#' Defines a window with its opposite inside which the fit is
#' considered good enough (exponential fits)
#' @param gfactor_threshold threshold for the g-factor. Defines a window
#' with its opposite outside which the flux will be flagged `discard`
#' (exponential quadratic fits).
#' @param kappamax logical. If `TRUE` the kappamax method will be applied.
#' @param instr_error error of the instrument, in the same unit as the
#' gas concentration
#' @param f_fit_lm column with the fit of the linear model.
#' (as calculated by the \link[fluxible:flux_fitting]{flux_fitting} function)
#' @details the kappamax method (Hüppi et al., 2018) selects the linear slope if
#' \ifelse{html}{\out{|b| > kappamax}}{\eqn{|b| > kappamax}{ASCII}}, with
#' \ifelse{html}{\out{kappamax = |f_slope_lm / instr_error|}}{\eqn{kappamax = |f_slope_lm / instr_error|}{ASCII}}.
#' The original kappamax method was applied to the HMR model
#' (Pedersen et al., 2010; Hutchinson and Mosier, 1981), but here it can be
#' applied to any exponential fit.
#' @references Pedersen, A.R., Petersen, S.O., Schelde, K., 2010.
#' A comprehensive approach to soil-atmosphere trace-gas flux estimation with
#' static chambers. European Journal of Soil Science 61, 888–902.
#' https://doi.org/10.1111/j.1365-2389.2010.01291.x
#' @references Hüppi, R., Felber, R., Krauss, M., Six, J., Leifeld, J., Fuß,
#' R., 2018. Restricting the nonlinearity parameter in soil greenhouse gas
#' flux calculation for more reliable flux estimates.
#' PLOS ONE 13, e0200876. https://doi.org/10.1371/journal.pone.0200876
#' @references Hutchinson, G.L., Mosier, A.R., 1981. Improved Soil Cover Method
#' for Field Measurement of Nitrous Oxide Fluxes.
#' Soil Science Society of America Journal 45, 311–316.
#' @return a dataframe with added columns of quality flags (`f_quality_flag`),
#' the slope corrected according to the quality flags (`f_slope_corr`),
#' and any columns present in the input.
#' It will also print a summary of the quality flags. This summary can also
#' be exported as a dataframe using
#' \link[fluxible:flux_flag_count]{flux_flag_count}
#' @importFrom dplyr mutate case_when group_by rowwise summarise ungroup
#' @importFrom tidyr nest unnest
#' @importFrom stats cor
#' @importFrom lubridate int_length interval
#' @importFrom stringr str_detect
#' @examples
#' data(co2_conc)
#' slopes <- flux_fitting(co2_conc, conc, datetime, fit_type = "exp_zhao18")
#' flux_quality(slopes, conc)
#' @export
flux_quality <- function(slopes_df,
f_conc = f_conc,
f_fluxid = f_fluxid,
f_slope = f_slope,
f_time = f_time,
f_start = f_start,
f_end = f_end,
f_fit = f_fit,
f_cut = f_cut,
f_pvalue = f_pvalue,
f_rsquared = f_rsquared,
f_slope_lm = f_slope_lm,
f_fit_lm = f_fit_lm,
f_b = f_b,
force_discard = c(),
force_ok = c(),
force_zero = c(),
force_lm = c(),
force_exp = c(),
ratio_threshold = 0.5,
gfactor_threshold = 10,
fit_type = c(),
ambient_conc = 421,
error = 100,
pvalue_threshold = 0.3,
rsquared_threshold = 0.7,
rmse_threshold = 25,
cor_threshold = 0.5,
b_threshold = 1,
cut_arg = "cut",
instr_error = 5,
kappamax = FALSE) {
name_df <- deparse(substitute(slopes_df))
args_ok <- flux_fun_check(list(
ambient_conc = ambient_conc,
error = error,
ratio_threshold = ratio_threshold
),
fn = list(is.numeric, is.numeric, is.numeric),
msg = rep("has to be numeric", 3))
slopes_df_check <- slopes_df |>
select(
{{f_slope}},
{{f_conc}},
{{f_fit}},
{{f_time}}
)
df_ok <- flux_fun_check(slopes_df_check,
fn = list(
is.numeric,
is.numeric,
is.numeric,
is.numeric
),
msg = rep(
"has to be numeric",
4
),
name_df = name_df)
if (any(!c(args_ok, df_ok)))
stop("Please correct the arguments", call. = FALSE)
fit_type <- flux_fit_type(
slopes_df,
fit_type = fit_type
)
if (fit_type == "linear" && kappamax == TRUE) {
stop("You cannot use kappamax with a linear fit.")
}
if (fit_type == "quadratic" && kappamax == TRUE) {
stop("You cannot use kappamax with a quadratic fit.")
}
name_conc <- names(select(slopes_df, {{f_conc}}))
quality_par_start <- slopes_df |>
# for the start error we take the entire flux into account
group_by({{f_fluxid}}) |>
nest() |>
rowwise() |>
summarise(
f_start_error = case_when(
# mean of the 3 first data points
abs(mean(data[[name_conc]][1:3], na.rm = TRUE) - ambient_conc) > error
~ "error",
TRUE ~ "ok"
),
.groups = "drop"
) |>
unnest({{f_fluxid}})
slopes_df <- slopes_df |>
left_join(quality_par_start, by = join_by({{f_fluxid}}))
slopes_keep <- slopes_df |>
filter(
{{f_cut}} != cut_arg
) |>
mutate(
f_n_conc = sum(!is.na(.data[[name_conc]])),
f_ratio = .data$f_n_conc / int_length(interval({{f_start}}, {{f_end}})),
f_flag_ratio = case_when(
.data$f_ratio == 0 ~ "no_data",
.data$f_ratio <= ratio_threshold ~ "too_low",
TRUE ~ "ok"
),
f_min_slope = (2 * instr_error) / max({{f_time}}),
.by = c({{f_fluxid}}, {{f_cut}})
)
slopes_cut <- slopes_df |>
filter(
{{f_cut}} == cut_arg
)
if (kappamax == TRUE) {
slopes_keep <- flux_quality_kappamax(
slopes_keep,
f_slope = {{f_slope}},
f_fluxid = {{f_fluxid}},
f_fit = {{f_fit}},
f_slope_lm = {{f_slope_lm}},
f_fit_lm = {{f_fit_lm}},
f_b = {{f_b}},
force_exp = force_exp,
fit_type = fit_type,
instr_error = instr_error,
name_df = name_df
)
quality_flag_lm <- slopes_keep |>
filter(.data$f_model == "linear")
quality_flag_exp <- slopes_keep |>
filter(str_detect(.data$f_model, "exp"))
if (nrow(quality_flag_lm) > 0) {
quality_flag_lm <- flux_quality_lm(
slopes_df = quality_flag_lm,
f_fluxid = {{f_fluxid}},
f_slope = {{f_slope}},
f_pvalue = {{f_pvalue}},
f_rsquared = {{f_rsquared}},
force_discard = force_discard,
force_ok = force_ok,
force_zero = force_zero,
pvalue_threshold = pvalue_threshold,
rsquared_threshold = rsquared_threshold,
name_df = name_df
)
}
if (nrow(quality_flag_exp) > 0) {
quality_flag_exp <- flux_quality_exp(
quality_flag_exp,
{{f_conc}},
{{f_fluxid}},
{{f_slope}},
{{f_time}},
{{f_fit}},
{{f_slope_lm}},
{{f_b}},
force_discard = force_discard,
force_ok = force_ok,
force_zero = force_zero,
force_lm = force_lm,
gfactor_threshold = gfactor_threshold,
rmse_threshold = rmse_threshold,
cor_threshold = cor_threshold,
b_threshold = b_threshold,
name_df = name_df
)
}
quality_flag <- bind_rows(quality_flag_exp, quality_flag_lm) |>
arrange({{f_fluxid}})
}
if (str_detect(fit_type, "exp") && kappamax == FALSE) {
quality_flag <- flux_quality_exp(
slopes_keep,
{{f_conc}},
{{f_fluxid}},
{{f_slope}},
{{f_time}},
{{f_fit}},
{{f_slope_lm}},
{{f_b}},
force_discard = force_discard,
force_ok = force_ok,
force_zero = force_zero,
force_lm = force_lm,
gfactor_threshold = gfactor_threshold,
rmse_threshold = rmse_threshold,
cor_threshold = cor_threshold,
b_threshold = b_threshold,
name_df = name_df
)
}
if (fit_type == "quadratic" && kappamax == FALSE) {
quality_flag <- flux_quality_qua(slopes_keep,
{{f_fluxid}},
{{f_slope}},
{{f_pvalue}},
{{f_rsquared}},
{{f_slope_lm}},
force_discard = force_discard,
force_ok = force_ok,
force_zero = force_zero,
force_lm = force_lm,
gfactor_threshold = gfactor_threshold,
pvalue_threshold = pvalue_threshold,
rsquared_threshold = rsquared_threshold,
name_df = name_df
)
}
if (fit_type == "linear") {
quality_flag <- flux_quality_lm(slopes_keep,
{{f_fluxid}},
{{f_slope}},
{{f_pvalue}},
{{f_rsquared}},
force_discard = force_discard,
force_ok = force_ok,
force_zero = force_zero,
pvalue_threshold = pvalue_threshold,
rsquared_threshold = rsquared_threshold,
name_df = name_df
)
}
flag_count <- flux_flag_count(
quality_flag
)
flag_msg <- flag_count |>
mutate(
message = paste(
"\n", .data$f_quality_flag, "\t", .data$n,
"\t", round(.data$ratio, 2) * 100, "%"
)
) |>
pull(message)
total_nb <- quality_flag |>
select({{f_fluxid}}) |>
distinct() |>
nrow()
message(paste("\n", "Total number of measurements:", total_nb))
message(flag_msg)
quality_flag <- bind_rows(quality_flag, slopes_cut) |>
arrange({{f_fluxid}}, {{f_time}})
attr(quality_flag, "fit_type") <- {{fit_type}}
quality_flag
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.