#' Analyse raw ion count data
#'
#' \code{stat_X} function for descriptive and predictive statistics on single
#' ion precision.
#' \code{stat_R} function for descriptive and predictive statistics on isotope
#' ratios (R) precision with appropriate error propagation.
#'
#' These functions are a convenient wrapper to calculate the statistics
#' pertaining to the precision of pulsed ion count data (e.g. secondary ion mass
#' spectrometry). The statistics can either be calculated for single ions or
#' isotope ratios, and include observed and predicted (Poisson) statistics.
#' Calculations for isotope ratios include proper error propagation. For more
#' information on the usage as well as the mathematics behind these
#' functions see \code{vignette("IC-precision", package = "point")}.
#'
#' @param .IC A tibble containing processed ion count data.
#' @param .ion1 A character string constituting the heavy isotope ("13C").
#' @param .ion2 A character string constituting the light isotope ("12C").
#' @param ... Variables for grouping.
#' @param .nest A variable hat identifies a series of analyses to calculate
#' external precision.
#' @param .X A variable constituting the ion count rate (defaults to
#' variables generated with \code{read_IC()}.)
#' @param .N A variable constituting the ion counts (defaults to variables
#' generated with \code{read_IC()}.).
#' @param .species A variable constituting the species analysed (defaults to
#' variables generated with \code{read_IC()}.).
#' @param .t A variable constituting the time of the analyses (defaults to
#' variables generated with \code{read_IC()}.).
#' @param .stat Select statistics (e.g. \code{c("M", "RS"}), see the tables
#' \code{point::names_stat_X} and \code{point::names_stat_R} for the full
#' selection of statistics available (default uses all statistic
#' transformations).
#' @param .label A character string indicating whether variable names are latex
#' (\code{"latex"}) or webtex (\code{"webtex"}) compatible. Will be extended in
#' the future \code{default = NULL}.
#' @param .meta Logical whether to preserve the metadata as an attribute
#' (defaults to TRUE).
#' @param .output A character string for output as summary statistics ("sum");
#' statistics only ("stat"); and statistics with the original data ("complete")
#' \code{default = "sum"}.
#' @param .zero A character string that determines whether analyses with zero
#' count measurements will be removed from the calculations.
#'
#' @return A \code{tibble::\link[tibble:tibble]{tibble}} containing descriptive
#' and predictive statistics for ion counts and isotope ratios. The naming
#' convention depends on the argument \code{latex}; if set to \code{FALSE},
#' variable names concerning statistics will consist of an abbreviation pasted
#' together with the input variable names of \code{Xt}.
#'
#' @export
#' @examples
#' # Use point_example() to access the examples bundled with this package
#'
#' # raw data containing 13C and 12C counts on carbonate
#' tb_rw <- read_IC(point_example("2018-01-19-GLENDON"), meta = TRUE)
#'
#' # Processing raw ion count data
#' tb_pr <- cor_IC(tb_rw)
#'
#' # Single ion descriptive an predictive statistics for all measured ions
#' stat_X(tb_pr, file.nm)
#'
#' # Descriptive an predictive statistics for 13C/12C ratios
#' stat_R(tb_pr, "13C", "12C", file.nm, .zero = TRUE)
#'
#' # Descriptive an predictive statistics for 13C/12C ratios (external)
#' stat_R(tb_pr, "13C", "12C", sample.nm, file.nm, .nest = file.nm,
#' .zero = TRUE)
stat_X <- function(.IC, ..., .X = NULL, .N = NULL, .species = NULL,
.t = NULL, .stat = point::names_stat_X$name,
.label = "none", .meta = FALSE, .output = "sum") {
# Checks
stat_validator(.IC, .stat, point::names_stat_X$name, .output, .label)
# Metadata
if(.meta) meta <- unfold(.IC, merge = FALSE)
# Quoting the call (user-supplied expressions) and complete if NULL
args <- inject_args(
.IC,
enquos(.X = .X, .N = .N, .species = .species, .t = .t),
type = c("processed", "group")
)
# Argument check
argument_check(.IC, args, "processed")
# Grouping
gr_by <- enquos(...)
# New quosures
new_args <- arg_builder(args, "X")
args <- append(args, new_args)
# The statistics
calcs <- rlang::quos(
# number of measurements
dplyr::n(),
# sum of ion counts (important for external reproducibility calcs)
sum(!! args[[".N"]]),
# mean ion count rate
mean(!! args[[".X"]]),
# standard deviation (SD) count rate
sd(!! args[[".X"]]),
# RSD ion count rate
(!! args[["S_X"]] / !! args[["M_X"]]) * 100,
# standard error of the mean (SE) count rate
sd(!! args[[".X"]]) / sqrt(dplyr::n()),
# predicted SD count rate
sqrt(mean(!! args[[".N"]])),
# predicted RSD count rate
(1 / sqrt(mean(!! args[[".N"]]))) * 100,
# predicted SE count rate
sqrt(mean(!! args[[".N"]]) / dplyr::n()),
# reduced chi squared
(!! args[["SeM_X"]] / !! args[["hat_SeM_N"]]) ^ 2
)
# The statistic names (depend on user-supplied expression)
ls_nm <- sapply(new_args, as_name)
# Set statistic names
calcs <- rlang::set_names(calcs, nm = ls_nm)
# Stat selection
vars <- stat_selector(.stat, ls_nm)
# Render latex variable names
if (.label == "latex" | .label == "webtex") {
ls_latex <- rlang::set_names(
vars$pos,
tex_labeller(point::names_stat_X, .stat, .label)
)
}
# Evaluate expressions
IC <- dplyr::group_by(.IC, !!! gr_by, !! args[[".species"]]) %>%
rlang::eval_tidy(
expr =
rlang::call2(mod_call[[.output]], rlang::expr(.), !!! calcs,
.ns = "dplyr")
) %>%
dplyr::ungroup() %>%
dplyr::select(-dplyr::any_of(vars$neg))
# Return metadata
if (.meta) IC <- fold(IC, type = ".mt", meta = meta)
# Output (Rmarkdown latex labels or just print in console?)
if (.label == "latex" | .label == "webtex") {
IC <- dplyr::mutate(
IC,
!! args[[".species"]] :=
purrr::map_chr(!! args[[".species"]], ion_labeller, label = .label)
) %>%
dplyr::select(!!! gr_by, !! args[[".species"]], !!! ls_latex)
}
# Return
IC
}
#' @rdname stat_X
#'
#' @export
stat_R <- function(.IC, .ion1, .ion2, ..., .nest = NULL, .X = NULL, .N = NULL,
.species = NULL, .t = NULL,
.stat = point::names_stat_R$name, .label = "none",
.output = "sum", .zero = FALSE){
# Checks
stat_validator(.IC, .stat, point::names_stat_R$name, .output, .label)
# Quoting the call (user-supplied expressions)
args <- inject_args(
.IC,
enquos(.X = .X, .N = .N, .species = .species, .t = .t),
type = c("processed", "group")
)
# Argument check
argument_check(.IC, args, "processed")
# Grouping
gr_by <- enquos(...)
# Nesting
nest <- enquo(.nest)
# External precision
if (!is.null(rlang::quo_get_expr(nest))) {
# Calculate single ion stat to obtain mean and total ion counts
.IC <- rlang::call2("stat_X", .IC, !!! gr_by, !!! args, .ns = "point") %>%
rlang::eval_tidy()
# Updated quotes
args[[".X"]] <- quo_updt(args[[".X"]], pre = "M")
args[[".N"]] <- quo_updt(args[[".N"]], pre = "tot")
# Updated grouping
gr_by <- gr_by[!sapply(gr_by, as_name) %in% as_name(nest)]
# Check whether groups have enough observations
.IC <- dplyr::add_count(.IC, !!! gr_by, !! args[[".species"]])
if (any(.IC$n <= 1)) {
warning(
paste0("Some groups have too few observations for a reliable",
" estimation of the external precision. These groups have been",
" omitted."),
call. = FALSE
)
.IC <- dplyr::filter(.IC, .data$n > 1)
}
# default to `TRUE`
nest <- TRUE
}
# If t column (time) is empty create a manual time increment (important for
# external precision)
if (!(as_name(args[[".t"]]) %in% colnames(.IC))) {
.IC <- dplyr::group_by(.IC, !!! gr_by, !! args[[".species"]]) %>%
dplyr::mutate(!! args[[".t"]] := dplyr::row_number()) %>%
dplyr::ungroup()
}
# Remove white space in ion names and add underscore for polyatomic species
.ion1 <- ion_trim(.ion1)
.ion2 <- ion_trim(.ion2)
# Update quosures (rare and common isotope)
args <- rlang::list2(
!!! args,
# Update quosures (rare isotope)
X1 = quo_updt(args[[".X"]], post = .ion1), # count rate
N1 = quo_updt(args[[".N"]], post = .ion1), # counts
# Update quosures (common isotope)
X2 = quo_updt(args[[".X"]], post = .ion2), # count rate
N2 = quo_updt(args[[".N"]], post = .ion2) # counts
)
# New quosures
new_args <- arg_builder(args, "R")
args <- append(args, new_args)
# The statistics
calcs <- rlang::quos(
# number of measurements
dplyr::n(),
# mean isotope ratio
mean(!! args[["X1"]]) / mean(!! args[["X2"]]),
# SD isotope ratio
stat_SDprop(!! args[["X1"]], !! args[["X2"]]),
# RSD isotope ratio
(!! args[["S_R"]] / !! args[["M_R"]]) * 1000,
# SE isotope ratio
!! args[["S_R"]] / sqrt(dplyr::n()),
# RSE isotope ratio
(!! args[["SeM_R"]] / !! args[["M_R"]]) * 1000,
# predictive SD isotope ratio
stat_SDprop(!! args[["N1"]], !! args[["N2"]], predicted = TRUE),
# predictive RSD isotope ratio
!! args[["hat_S_R"]] / !! args[["M_R"]] * 1000,
# predictive SE isotope ratio
!! args[["hat_S_R"]] / sqrt(dplyr::n()),
# predictive RSE isotope ratio
!! args[["hat_SeM_R"]] / !! args[["M_R"]] * 1000,
# reduced chi squared
(!! args[["SeM_R"]] / !! args[["hat_SeM_R"]]) ^ 2
)
# The statistic names (depend on user-supplied expression)
ls_nm <- sapply(new_args, as_name)
# Set statistic names
calcs <- rlang::set_names(calcs, nm = ls_nm)
# Extra arg in case of mutate and transmute (isotope ratio for each time step)
if (.output != "sum") {
R <- paste("R", as_name(args[[".X"]]), sep = "_")
calcs[[R]] <- rlang::quo(!! args[["X1"]] / !! args[["X2"]])
}
# Stat selection
vars <- stat_selector(.stat, ls_nm)
# Render latex variable names
if (.label == "latex" | .label == "webtex") {
tb_tex <- point::names_stat_R
if (isTRUE(nest)) {
tb_tex <- dplyr::mutate(
tb_tex,
origin = dplyr::case_when(origin == "X" ~ "M", origin == "N" ~ "Ntot")
)
}
ls_latex <- rlang::set_names(
vars$pos,
tex_labeller(tb_tex, .stat, .label)
)
}
# Evaluate expressions and calls
IC <- rlang::eval_tidy(zeroCt_call(.zero, .IC, .ion1, .ion2, gr_by, args)) %>%
cov_R(c(.ion1, .ion2), !!! gr_by, .species = !! args[[".species"]],
.t = !! args[[".t"]]) %>%
dplyr::group_by(!!! gr_by) %>%
rlang::eval_tidy(
expr = rlang::call2(mod_call[[.output]], rlang::expr(.), !!! calcs,
.ns = "dplyr")
) %>%
dplyr::ungroup() %>%
dplyr::select(-dplyr::any_of(vars$neg))
# Output
if (.label == "latex" | .label == "webtex") {
IC <- dplyr::mutate(
IC,
ratio.nm = R_labeller(.ion1, .ion2, label = .label)
) %>%
dplyr::select(!!! gr_by, .data$ratio.nm, !!! ls_latex)
}
# Return
dplyr::mutate(IC, ratio.nm = paste(.ion1, .ion2, sep = "/"))
}
#' Propagation of errors for isotope ratios
#'
#' \code{stat_SDprop} function for propagation of descriptive and predictive
#' (Poisson) error statistics for isotope ratios (R).
#'
#' Isotope ratios are based on two measured variables, i.e., two isotopes of a
#' single chemical species. The combined variable compounds the uncertainty
#' associated with the individual measurement. Hence error propagation is
#' required to obtain a reliable estimate of the uncertainty associated
#' with the compounded variable (i.e. isotope ratio).
#'
#' @param ion1 A numeric vector constituting the single ion count rate of the
#' heavy isotope if argument predicted is set to \code{FALSE}, otherwise counts
#' are required.
#' @param ion2 A numeric vector constituting the single ion count rate of the
#' light isotope if argument predicted is set to \code{FALSE}, otherwise counts
#' are required.
#' @param type A character string for the type of uncertainty estimate:
#' \code{"sd"}, standard deviation; \code{"rsd"}, relative standard deviation in
#' per mille; \code{"se"}, standard error of the mean; \code{"rse"}, relative
#' standard error of the mean in per mille.
#' @param predicted Logical indicating whether sd is descriptive \code{FALSE},
#' or based on predicted Poisson value \code{TRUE}.
#'
#' @return A numeric vector containing the propagated uncertainty of the isotope
#' ratio.
#' @export
#' @examples
#
#' # Light isotope count rates
#' `32S` <- c(22318.19, 22151.20, 22429.52, 22143.78, 22574.25, 22455.50)
#'
#' # Heavy isotope count rate
#' `34S` <- c(231.4838, 220.3725, 255.5584, 237.0395, 244.4471, 238.8914)
#'
#' # Propagation
#' stat_SDprop(`34S`, `32S`, "rse")
stat_SDprop <- function(ion1, ion2, type = "sd", predicted = FALSE){
M_ion1 <- mean(ion1) # heavy
M_ion2 <- mean(ion2) # light
M_R <- M_ion1 / M_ion2 # R
n_ion1 <- sum(is.finite(ion1)) # observations heavy
n_ion2 <- sum(is.finite(ion2)) # observations light
if (n_ion1 != n_ion2) {
stop("Unequal number of measurements between isotopes.", call. = FALSE)
}
if (isTRUE(predicted)) {
hat_sd <- sqrt((1 / sum(ion1)) + (1 / sum(ion2)))
if (type == "sd") return(hat_sd * M_R * sqrt(n_ion1))
if (type == "rsd") return(hat_sd * sqrt(n_ion1) * 1000)
if (type == "se") return(hat_sd * M_R / sqrt(n_ion1))
if (type == "rse") return((hat_sd / sqrt(n_ion1)) * 1000)
} else {
S_ion1 <- sd(ion1)
S_ion2 <- sd(ion2)
sd <- sqrt(
((S_ion1 / M_ion1) ^ 2) + ((S_ion2 / M_ion2) ^ 2) -
(2 * (cov(ion1, ion2, method = "pearson", use = "everything") /
(M_ion1 * M_ion2))
)
)
if (type == "sd") return(sd * M_R)
if (type == "rsd") return(sd * 1000)
if (type == "se") return(sd * M_R / sqrt(n_ion1))
if (type == "rse") return((sd / sqrt(n_ion1)) * 1000)
}
}
#-------------------------------------------------------------------------------
# Helper functions for parsing, testing and validation
#-------------------------------------------------------------------------------
# Switch output complete dataset, stats or summary stats
mod_call <- c(complete = "mutate", stat = "transmute", sum = "summarize")
# Call specific to removal of zero count measurements with ZeroCt
zeroCt_call <- function(zero, IC, .ion1, .ion2, gr_by, args) {
if (isTRUE(zero)) {
rlang::call2(
"zeroCt",
IC,
.ion1 = .ion1,
.ion2 = .ion2,!!!gr_by,
.N = args[[".N"]],
.species = args[[".species"]],
.warn = TRUE,
.ns = "point"
)
} else {
rlang::call2("invisible", IC)
}
}
# Build new quosures and names for calcs
arg_builder <- function(args, stat, ion = NULL, append = NULL) {
if (stat == "X") arg_names <- point::names_stat_X
if (stat == "R") arg_names <- point::names_stat_R
if (stat == "model") {
arg_names <- point::names_model
pre <- NULL
} else {
pre <- "."
}
# no origin of variable names
if (!"origin" %in% colnames(arg_names)) arg_names$origin <- NA_character_
arg_names <- dplyr::mutate(
arg_names,
origin = dplyr::if_else(is.na(.data$origin), .data$derived, .data$origin),
label =
dplyr::if_else(
.data$origin == .data$derived,
paste0(paste(.data$name, .data$origin, sep = "_"), append),
paste0(paste(.data$name, .data$derived, sep = "_"), append)
),
name =
dplyr::if_else(
.data$origin == .data$derived,
.data$name,
paste(.data$name, .data$derived, sep = "_")
)
)
# quosure update
args <- purrr::map2(
arg_names$origin,
arg_names$name,
~quo_updt(args[[paste0(pre, .x)]], pre = .y)
)
# wide format with ions
if (!is.null(ion)) args <- purrr::map(args, ~quo_updt(.x, post = ion))
# set names
rlang::set_names(args, nm = arg_names$label)
}
# latex labeller function
tex_labeller <- function(vars, stat, label) {
if (!"origin" %in% colnames(vars)) vars$origin <- vars$derived
names_vars <- dplyr::filter(vars, .data$name %in% stat) %>%
# if variable has a stat component
dplyr::mutate(
derived =
dplyr::if_else(
stringr::str_detect(.data$derived, "[[:punct:]]"),
stringr::str_extract("M_R", "(?<=[[:punct:]])[[:alpha:]]"),
.data$derived
)
)
purrr::pmap_chr(
list(
var = names_vars$derived,
org = names_vars$origin,
stat = names_vars$name
),
stat_labeller,
label = label
)
}
# consistency checks
stat_validator <- function(IC, stat_in = NULL, stat_def = NULL,
output = "sum", label = "none") {
if (!tibble::is_tibble(IC)) {
stop("Ion count dataset should be a tibble object.", call. = FALSE)
}
if (!all(stat_in %in% stat_def)) {
stop("Unkown statistic.", call. = FALSE)
}
if (output != "sum" & label != "none") {
stop("Latex labels is not supported for complete datasets.", call. = FALSE)
}
}
# Stat selection function
stat_selector <- function(stat, vars) {
str_stat <- stringr::str_c("^", stat, "_", collapse = "|")
ls_vars <- list()
ls_vars$pos <- purrr::keep(vars, ~stringr::str_detect(., str_stat))
ls_vars$neg <- purrr::discard(vars, ~stringr::str_detect(., str_stat))
ls_vars
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.