# utils_rules.R
# ::rtemis::
# EDG rtemis.org
#' Match Rules to Cases
#'
#' @param x Matrix / data frame: Input features
#' @param rules Character vector: Rules
#' @param verbosity Integer: Verbosity level.
#'
#' @author EDG
#'
#' @return cases-by-rules matrix (binary; 1: match, 0: no match)
#' @keywords internal
#' @noRd
match_cases_by_rules <- function(x, rules, prefix = "Rule_", verbosity = 1L) {
n_cases <- NROW(x)
n_rules <- length(rules)
if (!is.data.table(x)) {
# {data.table}
x <- data.table::as.data.table(x)
} else {
# Either make copy, or set ID to NULL before exit
# x <- copy(x)
on.exit(x[, ID := NULL])
}
# appease R CMD check
ID <- NULL
x[, ID := seq_len(n_cases)]
cxr <- matrix(0, n_cases, n_rules)
if (verbosity > 0L) {
msg2start(
"Matching",
hilite(n_rules),
"rules to",
hilite(n_cases),
"cases..."
)
}
for (i in seq_along(rules)) {
match <- x[eval(parse(text = rules[i])), ID]
cxr[match, i] <- 1
}
if (!is.null(prefix)) {
colnames(cxr) <- paste0(prefix, seq_len(n_rules))
}
if (verbosity > 0L) {
msg2done()
}
cxr
} # rtemis::match_cases_by_rules
#' Index cases by rules
#'
#' Get an index of which cases match which rule - meant for cases where each case matches one rule
#' and one rule only
#'
#' @inheritParams match_cases_by_rules
#'
#' @author EDG
#' @keywords internal
#' @noRd
index_cases_by_rules <- function(x, rules, verbosity = 1L) {
cxr <- match_cases_by_rules(x, rules, verbosity)
apply(cxr, 1, \(i) which(i == 1))
}
#' Prune a rule to a maximum length
#'
#' @param rule Character: A rule.
#' @param max_length Integer: The maximum number of conditions to keep.
#' @param sep Character: The separator between conditions.
#'
#' @return Character: The pruned rule.
#' @author EDG
#' @keywords internal
#' @noRd
simple_prune_ <- function(rule, max_length, sep = " & ") {
conditions <- strsplit(rule, sep)[[1]]
if (length(conditions) > max_length) {
conditions <- conditions[1:max_length]
paste(conditions, collapse = sep)
} else {
rule
}
} # rtemis::simple_prune_
#' Prune rules to a maximum length
#'
#' @param rule Character vector: Rules.
#' @param max_length Integer: The maximum number of conditions to keep.
#' @param sep Character: The separator between conditions.
#'
#' @return Character: The pruned rule.
#'
#' @author EDG
#' @keywords internal
#' @noRd
simple_prune <- function(rules, max_length, sep = " & ") {
rules <- sapply(
rules,
simple_prune_,
max_length = max_length,
sep = sep,
USE.NAMES = FALSE
)
rules
} # rtemis::simple_prune
#' Extract variable names from rules
#'
#' @param rules Character vector: Rules.
#' @param unique Logical: If TRUE, return only unique variables.
#'
#' @return Character vector: Variable names.
#'
#' @author EDG
#' @export
get_vars_from_rules <- function(rules, unique = FALSE) {
# Extract variables from rules
vars <- unique(unlist(strsplit(rules, " & ")))
# Get string up to first "<", ">", "=", "!", or "%in%"
vars <- gsub("(<|>|=|!|%in%).*", "", vars)
vars <- gsub(" .*", "", vars)
if (unique) {
vars <- unique(vars)
}
vars
}
#' Format rules
#'
#' Converts R-executable logical expressions to a more human-friendly format
#'
#' @param x Vector, string: Logical expressions
#' @param space_after_comma Logical: If TRUE, place spaces after commas.
#' @param decimal_places Integer: Limit all floats (numbers of the form 9.9) to this many
#' decimal places
#'
#' @return Character vector: Formatted rules.
#'
#' @author EDG
#' @keywords internal
#' @noRd
format_rules <- function(x, space_after_comma = FALSE, decimal_places = NULL) {
x <- gsub("[&+]", "AND", x)
x <- gsub(">", " > ", x)
x <- gsub("<=", " <= ", x)
x <- gsub("%in%", "IN", x)
x <- gsub("c\\(", "{", x)
x <- gsub("\\)", "}", x)
x <- gsub("'", "", x)
if (space_after_comma) {
x <- gsub(",", ", ", x)
}
if (!is.null(decimal_places)) {
x <- gsubfn::gsubfn(
"([0-9.]+[0-9])",
function(i) ddSci(i, decimal_places = decimal_places),
x,
engine = "R"
)
}
x
} # rtemis::format_rules
#' Format LightRuleFit rules
#'
#' Converts R-executable logical expressions to a more human-friendly format
#'
#' @param x Vector, string: Logical expressions
#' @param space_after_comma Logical: If TRUE, place spaces after commas.
#' @param decimal_places Integer: Limit all floats (numbers of the form 9.9) to this many
#' decimal places
#'
#' @return Character vector: Formatted rules.
#'
#' @author EDG
#' @keywords internal
#' @noRd
format_LightRuleFit_rules <- function(
x,
space_after_comma = FALSE,
decimal_places = NULL
) {
x <- gsub("[&+]", "AND", x)
x <- gsub(">", " > ", x)
x <- gsub("<=", " <= ", x)
x <- gsub("%in%", "IN", x)
x <- gsub("%notin%", "NOT IN", x)
x <- gsub("c\\(", "{", x)
x <- gsub("\\)", "}", x)
x <- gsub("'", "", x)
if (space_after_comma) {
x <- gsub(",", ", ", x)
}
if (!is.null(decimal_places)) {
x <- gsubfn::gsubfn(
"([0-9.]+[0-9])",
function(i) ddSci(i, decimal_places = decimal_places),
x,
engine = "R"
)
}
gsub(" ", " ", x)
} # rtemis::format_LightRuleFit_rules
# rules2medmod
# ::rtemis::
# 2018 EDG rtemis.org
#' Convert rules from cutoffs to median/mode and range
#'
#' Convert rules from cutoffs to `median (range)` and `mode (range)` format
#'
#' @param rules Character, vector: Input rules
#' @param x Data frame: Data to evaluate rules
#' @param .ddSci Logical: If TRUE, format all continuous variables using
#' [ddSci], which will give either 2 decimal places, or scientific
#' notation if two decimal places result in 0.00
#' @param verbosity Integer: Verbosity level.
#'
#' @return Character vector.
#'
#' @author EDG
#' @export
rules2medmod <- function(rules, x, .ddSci = TRUE, verbosity = 1L) {
cxr <- match_cases_by_rules(x, rules, verbosity = verbosity)
nrules <- length(rules)
rules_f <- vector("character", nrules)
frmt <- if (.ddSci) ddSci else I
if (verbosity > 0L) {
msg2("Converting rules...")
}
for (i in seq(rules)) {
if (verbosity > 1L) {
cat("#", i, "/", nrules, "...\n", sep = "")
}
dat <- x[cxr[, i] == 1, ]
sub <- strsplit(rules[i], "&")[[1]]
rule <- character()
for (j in seq(sub)) {
categorical <- grepl("%in%", sub[j])
if (categorical) {
var <- gsub("\\s", "", strsplit(sub[j], "%in%")[[1]][1])
vals <- dat[[var]]
value <- paste0(
get_mode(vals),
" (",
paste(levels(droplevels(vals)), collapse = ", "),
")"
)
rule[j] <- paste0(var, " = ", value)
} else {
sub[j] <- gsub(">|>=|<|<=", "@", sub[j])
var <- gsub("\\s", "", strsplit(sub[j], "@")[[1]][1])
vals <- dat[[var]]
value <- paste0(
frmt(median(vals)),
" (",
frmt(min(vals)),
":",
frmt(max(vals)),
")"
)
rule[j] <- paste0(var, " = ", value)
}
} # /loop through each rule's conditions
# This consolidates conditions like a > 3 & a > 5 to one
rules_f[i] <- paste(unique(rule), collapse = " & ")
} # /loop through rules
if (verbosity > 0L) {
msg2("Done")
}
rules_f
} # rtemis::rules2medmod
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.