Nothing
#' Verify the data type of a value
#'
#' Function to verify the data type of a value.
#'
#' @param x the value
#' @param type expected data type
#' @param check_convertible [logical] also try, if a conversion to the
#' declared data type would work.
#' @param threshold_value [numeric] from=0 to=100. percentage of failing
#' conversions allowed.
#' @param return_percentages [logical] return the percentage of
#' mismatches.
#' @param check_conversion_stable [logical] do not distinguish convertible
#' from convertible, but with issues
#' @param robust_na [logical] treat white-space-only-values as `NA`
#'
#' @return if `return_percentages`: if not `check_convertible`, the percentage
#' of mismatches instead of logical value,
#' if `check_convertible`, return a named
#' vector with the percentages of all cases
#' (names of the vector are
#' `match`, `convertible_mismatch_stable`,
#' `convertible_mismatch_unstable`,
#' `nonconvertible_mismatch`)
#' if not `return_percentages`: if `check_convertible` is `FALSE`,
#' [logical] whether `x` is of the expected type
#' if `check_convertible` is `TRUE`
#' [integer] with the states `0, 1, 2, 3`: 0 = Mismatch, not convertible
#' 1 = Match
#' 2 = Mismatch, but convertible
#' 3 = Mismatch, convertible,
#' but with issues (e.g.,
#' loss of decimal places)
#' @importFrom stats setNames
#'
#' @family data_management
#' @concept data_management
#' @keywords internal
util_check_data_type <- function(x, type, check_convertible = FALSE,
threshold_value = 0, return_percentages =
FALSE, check_conversion_stable = FALSE,
robust_na = FALSE) {
# FIXME: SLOW!!
hash_id <-
rlang::hash(list(x,
type,
check_convertible,
check_conversion_stable,
threshold_value,
robust_na,
return_percentages))
# HINT: if dep on dataframe env: as.list(.dataframe_environment), also remind
# global options(). Also remind possible side effects
if (exists(hash_id, .cache[[".cache"]])) {
return(get(hash_id, .cache[[".cache"]]))
}
if (robust_na) {
empty <- util_empty
} else {
empty <- is.na
}
checks <- setNames(
list(
util_is_integer,
is.numeric,
is.character,
lubridate::is.timepoint
),
nm = c(
DATA_TYPES$INTEGER,
DATA_TYPES$FLOAT,
DATA_TYPES$STRING,
DATA_TYPES$DATETIME
)
)
# function to check whether the data type matches
.is <- try(checks[[type]], silent = TRUE)
if (length(.is) != 1 || inherits(.is, "try-error")) {
util_error("%s is not a known data type.", dQuote(type),
applicability_problem = TRUE)
}
# function to check whether the data type matches (disregarding `NA`s)
.is_or_na <- function(...) all(empty(...) | .is(...))
mismatches <- !vapply(x, .is_or_na, FUN.VALUE = logical(1))
if (length(mismatches) < 1) {
pct_mismatches <- 0
} else {
pct_mismatches <- sum(mismatches) / length(mismatches) * 100
}
if (check_convertible) {
# perform data type conversion
x2 <- util_data_type_conversion(x, type)
if (check_conversion_stable) {
stable <- util_conversion_stable(x, type)
} else {
stable <- rep(TRUE, length(x))
}
convertible_mismatch_all <-
mismatches &
vapply(x, empty, FUN.VALUE = logical(1)) ==
vapply(x2, empty, FUN.VALUE = logical(1))
convertible_mismatch_unstable <- convertible_mismatch_all & !stable
convertible_mismatch_stable <- convertible_mismatch_all & stable
pct_convertible_mismatch_stable <-
sum(convertible_mismatch_stable) /
length(convertible_mismatch_stable) * 100
pct_convertible_mismatch_unstable <-
sum(convertible_mismatch_unstable) /
length(convertible_mismatch_unstable) * 100
nonconvertible_mismatch <-
vapply(x, empty, FUN.VALUE = logical(1)) !=
vapply(x2, empty, FUN.VALUE = logical(1))
if (length(nonconvertible_mismatch) < 1) {
pct_nonconvertible_mismatch <- 0
} else {
pct_nonconvertible_mismatch <-
sum(nonconvertible_mismatch) /
length(nonconvertible_mismatch) * 100
}
if (return_percentages) {
result <- c(
match = 100 - pct_mismatches,
convertible_mismatch_stable = pct_convertible_mismatch_stable,
convertible_mismatch_unstable = pct_convertible_mismatch_unstable,
nonconvertible_mismatch = pct_nonconvertible_mismatch
)
} else {
if (pct_nonconvertible_mismatch > threshold_value)
{ # NB: Order does matter
# 0 = Mismatch, not convertible
result <- 0L
} else if (pct_mismatches <= threshold_value) {
# 1 = Match
result <- 1L
} else if (pct_nonconvertible_mismatch <= threshold_value) {
# 2 = Mismatch, but convertible
result <- 2L
} else {
util_error(
"Internal error in util_check_data_type, sorry, and please report")
}
}
} else {
if (return_percentages) {
result <- pct_mismatches
} else {
result <- pct_mismatches <= threshold_value
}
}
assign(hash_id, value = result, envir = .cache[[".cache"]])
return(result)
}
.cache <- new.env(parent = emptyenv()) # HINT: This is exported by dq_reoprt2, but not yet for parallel rendering, which is complicated an not fully working anyways.
util_reset_cache <- function() {
assign(x = ".cache", value = new.env(parent = emptyenv()), envir = .cache)
}
util_reset_cache()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.