#' TURF Analysis
#'
#' Runs Total Unduplicated Reach & Frequency with options for case weights,
#' constraints on combinations, item weights, and three methods of a greedy
#' algorithm.
#'
#' @details
#' Need some dang details here.
#'
#' @param data A data frame.
#' @param items Columns on which to run TURF. Must contain only ones, zeros, or
#' `NA`. Suggest using [is_onezero][onezero::is_onezero] ahead of time to check.
#' @param case_weights An optional column of case weights to use in reach
#' calculations. Rows with `NA` will be removed from the base.
#' @param item_weights An optional named vector of non-zero weights to associate
#' with each item. Items not specified will be given a default weight of 1.
#'
#' Common examples are profit, revenue, or simply relative importance weights.
#'
#' @param k Set size, number of `items` to choose in a combination. Can be a vector of
#' values from 1 to number of `items`. Values outside of that range will be
#' silently ignored. [floor][base::floor] is used to override accidental use of
#' decimals. Default is 1.
#' @param depth Number of `items` needed in order to be considered "reached."
#' Can be any number between 1 to number of `items`. Default is 1.
#' @param force_in,force_in_together,force_out,force_out_together Options for
#' reducing the number of combinations by adding constraints.
#'
#' `force_in` and `force_out` accept a single tidyselect expression. Items
#' specified here will force every combination to include or exclude those items.
#'
#' Use `force_in_together` or `force_out_together` to make items appear together
#' or not appear together in the combinations. Pass an arbitrary number of
#' tidyselect expressions to [together][onezero::together]. Duplicates and
#' inclusions/exclusions containing only one item will be silently dropped.
#'
#' @param greedy_begin Set size at which the greedy algorithm will kick in. Default
#' is `Inf`.
#' @param greedy_entry Method for entering variables into greedy algorithm.
#' Options are `"shapley"` (default) which uses approximated Shapley Values,
#' `"reach"` or `"freq"` for using the combination with the highest reach or
#' frequency, respectively, from the `k[i-1]` set size.
#' @param progress Display progress? Default is `FALSE`. Adds a slight processing
#' overhead, but not much. Useful when number of `items` exceeds 20.
#'
#' @importFrom rlang enquo abort warn arg_match
#' @importFrom glue glue
#' @importFrom tidyselect eval_select
#' @importFrom purrr map discard map_int map_lgl
#' @importFrom dplyr setdiff select all_of %>% between arrange rename matches
#' filter pull as_tibble bind_cols mutate desc distinct ungroup group_by
#' case_when
#' @importFrom tibble rowid_to_column enframe tibble deframe
#' @importFrom collapse dapply roworderv fmean seqid
#' @importFrom stringr str_pad
#' @importFrom cli cat_line rule col_yellow
#' @importFrom arrangements combinations
#' @importFrom Rfast rowsums
#' @importFrom readr parse_number
#' @importFrom utils head
#' @importFrom scales percent
#' @importFrom forcats fct_inorder
#'
#' @examples
#' library(dplyr)
#'
#' # Simple 10-item TURF
#' x <- turf(FoodSample, Bisque:Ribeye, k = 1:10)
#'
#' # With items forced in and out
#' # Forcing in "Ribeye"
#' # Forcing out items with an individual reach of < 10%
#' turf(
#' data = FoodSample,
#' items = Bisque:Ribeye,
#' k = 1:10,
#' force_in = Ribeye,
#' force_out = where(~mean(.x, na.rm = TRUE) < 0.1)
#' )
#'
#' # Forcing items in and out together
#' turf(
#' data = FoodSample,
#' items = Bisque:Ribeye,
#' case_weights = weight,
#' k = c(1:4, 6:10),
#' force_in_together = together(
#' c(Chicken, Salmon),
#' c(Chili, Tofu, Turkey)
#' ),
#' force_out_together = together(
#' matches("eye"),
#' c(2, 10)
#' ),
#' greedy_begin = 10,
#' greedy_entry = "reach"
#' )
#'
#' # Item weights
#' turf(
#' data = FoodSample,
#' items = 2:6,
#' k = 1:6,
#' item_weights = c(
#' Bisque = 1.2,
#' Chicken = 2.5,
#' Tofu = 2.9,
#' Chili = 1.7,
#' PorkChop = 3.0
#' )
#' )
#'
#' @export
turf <- function(
data,
items,
case_weights,
item_weights,
k = 1,
depth = 1,
force_in,
force_in_together,
force_out,
force_out_together,
greedy_begin = Inf,
greedy_entry = "shapley",
progress = FALSE
) {
# Preliminary error checks ------------------------------------------------
# Start total clock
total.clock1 <- Sys.time()
if (!is.data.frame(data)) {
abort("Input to `data` must be a data frame.")
}
if (!is.numeric(k)) {
abort("Input to `k` must be one or more numeric values.")
}
if (!is.numeric(depth) | length(depth) != 1) {
abort("Input to `depth` must be a single numeric value.")
}
depth <- floor(depth)
if (!is.numeric(greedy_begin) | length(greedy_begin) != 1) {
abort("Input to `greedy_begin` must be a single numeric value.")
}
greedy_begin <- floor(greedy_begin)
rlang::arg_match(
arg = greedy_entry,
values = c("shapley", "reach", "freq")
)
if (greedy_entry == "shapley" & !missing(force_out_together)) {
abort("'shapley' greedy entry cannot be combined with `force_out_together`.")
}
# Get names of things -----------------------------------------------------
# `items`
item.names <- names(eval_select(expr = enquo(items), data = data))
# `case_weights`
has.weights <- FALSE
if (!missing(case_weights)) {
case.weights.names <- names(
eval_select(
expr = enquo(case_weights),
data = data
)
)
has.weights <- TRUE
}
# `force_in`
do.force.in <- FALSE
force.in.names <- character()
if (!missing(force_in)) {
force.in.names <- names(
eval_select(
expr = enquo(force_in),
data = data
)
)
do.force.in <- TRUE
}
# `force_in_together`
do.force.in.together <- FALSE
force.in.together.names <- list()
if (!missing(force_in_together)) {
force.in.together.names <-
map(
.x = force_in_together,
.f = ~eval_select(
expr = .x,
data = data
)
) %>%
map(names) %>%
discard(~length(.x) <= 1) %>%
# this removes accidental duplicates
map(sort) %>%
unique()
do.force.in.together <- length(force.in.together.names) > 0
}
# `force_out`
do.force.out <- FALSE
force.out.names <- character()
if (!missing(force_out)) {
force.out.names <- names(
eval_select(
expr = enquo(force_out),
data = data
)
)
do.force.out <- TRUE
}
# `force_out_together`
do.force.out.together <- FALSE
force.out.together.names <- list()
# `force_out_together`
if (!missing(force_out_together)) {
force.out.together.names <-
map(
.x = force_out_together,
.f = ~eval_select(
expr = .x,
data = data
)
) %>%
map(names) %>%
discard(~length(.x) <= 1) %>%
# this removes accidental duplicates
map(sort) %>%
unique()
do.force.out.together <- length(force.out.together.names) > 0
}
# Set up weights ----------------------------------------------------------
if (missing(case_weights)) {
wgt.vec <- rep(1, times = nrow(data))
} else {
if (length(case.weights.names) > 1) {
abort("Can only provide one column of weights to `case_weights`.")
}
if (case.weights.names %in% item.names) {
abort(glue("Column '{case.weights.names}' cannot used both in `items` and `case_weights`."))
}
wgt.vec <- data[[case.weights.names]]
if (!is.numeric(wgt.vec)) {
abort("Input to `case_weights` must be a numeric column.")
}
}
# Use this as the denominator for reach calculations later
wgt.base <- sum(wgt.vec, na.rm = TRUE)
# Validate: all `force_in` in `items` -------------------------------------
if (do.force.in) {
# any items in `force_in` that are not in `items`?
bad.names <- setdiff(force.in.names, item.names)
fail <- length(bad.names) > 0
if (fail) {
# warn that mismatches will be ignored
bad.names.string <- paste(glue("{bad.names}"), collapse = ", ")
msg <- glue("All items in `force_in` should appear in `items`. The following items are missing in `items` and will be ignored:\n{bad.names.string}")
warn(msg)
# update the vector of names
force.in.names <- force.in.names[!force.in.names %in% bad.names]
# set it to false if there is nothing left
if (length(force.in.names) == 0) {
do.force.in <- FALSE
}
}
}
# Validate: all `force_out` in `items` ------------------------------------
if (do.force.out) {
# any items in `force_out` that are not in `items`?
bad.names <- setdiff(force.out.names, item.names)
fail <- length(bad.names) > 0
if (fail) {
# warn that mismatches will be ignored
bad.names.string <- paste(glue("{bad.names}"), collapse = ", ")
msg <- glue("All items in `force_out` should appear in `items`. The following items are missing in `items` and will be ignored:\n{bad.names.string}")
warn(msg)
# update the vector of names
force.out.names <- force.out.names[!force.out.names %in% bad.names]
# set it to false if there is nothing left
if (length(force.out.names) == 0) {
do.force.out <- FALSE
}
}
}
# Validate: no overlap between `force_in` and `force_out` -----------------
if (do.force.in & do.force.out) {
# any items overlap?
bad.names <- intersect(force.in.names, force.out.names)
fail <- length(bad.names) > 0
if (fail) {
bad.names.string <- paste(glue("{bad.names}"), collapse = ", ")
msg <- glue("Cannot have items that appear both in `force_in` and `force_out`. The following variables are in both:\n{bad.names.string}")
abort(msg)
}
}
# Validate: all `force_in_together` in `items` ----------------------------
if (do.force.in.together) {
# index for dropping them if their lengths become 1
drop.fi <- numeric()
for (fi in seq_along(force.in.together.names)) {
bad.names <- setdiff(force.in.together.names[[fi]], item.names)
fail <- length(bad.names) > 0
if (fail) {
bad.names.string <- paste(glue("{bad.names}"), collapse = ", ")
msg <- glue("All items in `force_in_together` should appear in `items`. The following items are missing in `items` and will be ignored:\n{bad.names.string}")
warn(msg)
# drop them
force.in.together.names[[fi]] <-
force.in.together.names[[fi]][!force.in.together.names[[fi]] %in% bad.names]
if (length(force.in.together.names[[fi]]) < 2) {
drop.fi <- c(drop.fi, fi)
}
}
}
# drop the combos if necessary
if (length(drop.fi) > 0) force.in.together.names <- force.in.together.names[-drop.fi]
# reset the "do" things if necessary
if (length(force.in.together.names) == 0) do.force.in.together <- FALSE
}
# Validate: all `force_in_together` in `items` ----------------------------
if (do.force.out.together) {
# index for dropping them if their lengths become 1
drop.fo <- numeric()
for (fo in seq_along(force.out.together.names)) {
bad.names <- setdiff(force.out.together.names[[fo]], item.names)
fail <- length(bad.names) > 0
if (fail) {
bad.names.string <- paste(glue("{bad.names}"), collapse = ", ")
msg <- glue("All items in `force_in_together` should appear in `items`. The following items are missing in `items` and will be ignored:\n{bad.names.string}")
warn(msg)
# drop them
force.out.together.names[[fo]] <-
force.out.together.names[[fo]][!force.out.together.names[[fo]] %in% bad.names]
if (length(force.out.together.names[[fo]]) < 2) {
drop.fo <- c(drop.fo, fo)
}
}
}
# drop the combos if necessary
if (length(drop.fo) > 0) force.out.together.names <- force.out.together.names[-drop.fo]
# reset the "do" things if necessary
if (length(force.out.together.names) == 0) do.force.out.together <- FALSE
}
# Validate: no overlap btw `force_in_together` & `force_out_together -----
if (do.force.in.together & do.force.out.together) {
# index of list items to drop
drop.fi <- numeric()
drop.fo <- numeric()
for (fi in seq_along(force.in.together.names)) {
for (fo in seq_along(force.out.together.names)) {
bad.names <- intersect(
force.in.together.names[[fi]],
force.out.together.names[[fo]]
)
# this fails if intersection is more than 1
fail <- length(bad.names) >= 2
if (fail) {
bad.names.string <- paste(glue("{bad.names}"), collapse = ", ")
msg <- glue("The following items appeared both in `force_in_together` and `force_out_together`, constraints with these items will be ignored:\n{bad.names.string}")
warn(msg)
# update index of list items to drop
drop.fi <- c(drop.fi, fi)
drop.fo <- c(drop.fo, fo)
}
}
}
# drop the combos if necessary
if (length(drop.fi) > 0) force.in.together.names <- force.in.together.names[-drop.fi]
if (length(drop.fo) > 0) force.out.together.names <- force.out.together.names[-drop.fo]
# reset the "do" things if necessary
if (length(force.in.together.names) == 0) do.force.in.together <- FALSE
if (length(force.out.together.names) == 0) do.force.out.together <- FALSE
}
# Build constraints list --------------------------------------------------
constraints <- list(
"force_in" = force.in.names,
"force_in_together" = force.in.together.names,
"force_out" = force.out.names,
"force_out_together" = force.out.together.names
)
# Get the turf data -------------------------------------------------------
# Just grab the columns to be turf'd
item.mat <- select(data, all_of(item.names))
# Validate `k` ------------------------------------------------------------
# Make sure user is only requesting sets of `k` that make sense.
# These are the number of items before any forced exclusions take place
n.items <- ncol(item.mat)
# Establish final `k` --
# Setting to floor will force into an integer
# Need to sort to make sure it cycles thru the set sizes in an order
# that makes sense
# Drop duplicates, no need to run set sizes more than once
# Subset `k` to be between 1 and the number of items
k <-
floor(k) %>%
sort() %>%
unique() %>%
.[between(., 1, n.items)]
if (length(k) == 0) {
abort(glue(
"Input to `k` must contain integers between 1 and number of columns in `items` ({n.items})."
))
}
# Validate `depth` --------------------------------------------------------
if (!between(depth, 1, n.items)) {
abort("Input to `depth` must be a value between 1 and number of `items` ({n.items}).")
}
# Drop items due to exclusions --------------------------------------------
# Doing this here because it will save from having to create unnecessary
# combinations with {arrangements}
if (do.force.out) {
item.mat <- select(item.mat, -all_of(force.out.names))
}
# Finally prep turf data --------------------------------------------------
# Rfast::rowsums() needs the data to be in a matrix
item.mat <- as.matrix(item.mat)
# this needs to be updated if there are any forced exclusions
item.names <- colnames(item.mat)
n.items <- ncol(item.mat)
# Check and make sure that all of the columns contain only 1/0/NA
oz.check <- dapply(item.mat, is_onezero)
oz.fail <- any(!oz.check)
if (oz.fail) {
bad.names <- names(oz.check[!oz.check])
bad.names.string <- paste(bad.names, sep = ", ")
msg <- glue(
"All variables in `items` must contain only 0/1 data, the following do not:\n{bad.names.string}"
)
abort(msg)
}
# Check and see if any rows have 100% missing data
all.miss <-
dapply(
X = item.mat,
FUN = function(x) all(is.na(x)),
MARGIN = 1
) %>%
which()
if (length(all.miss) > 0) {
all.miss.string <- paste(all.miss, collapse = ", ")
msg <- glue(
"{length(all.miss)} rows in `data` have 100% missing values for the items specified in `items`. They will still be retained in the analysis and treated as \"unreached\". If you do not want those rows in the TURF analysis, please remove them ahead of time."
)
warn(msg)
}
# Replace NA with zero, makes sense since we are operating row-wise
# for reach. This radically improves the speed of Rfast::rowsums().
item.mat[is.na(item.mat)] <- 0
# Initialize objects for storing and displaying results -------------------
# For reach values
reach.list <- vector(
mode = "list",
length = length(k)
)
# how long does it take to make and constrain the combos?
combo.clock <- vector("double", length = length(k))
# how long does turf calculation take place?
turf.clock <- vector("double", length = length(k))
# Used for string padding
# This gets used for setting name of reach list as well as the item number
# identifiers in the data
pad <- max(nchar(k))
names(reach.list) <- paste0("k_", str_pad(k, width = pad, side = "left", pad = "0"))
# Item weights ------------------------------------------------------------
do.item.weights <- FALSE
if (!missing(item_weights)) {
item.wgt.names <- names(item_weights)
if (is.null(item.wgt.names)) {
abort("Input to `item_weights` must be a named vector.")
}
if (any(item.wgt.names == "")) {
abort("Cannot have empty characters as names in `item_weights`.")
}
if (any(is.na(item_weights))) {
abort("Every element of `item_weights` must be named.")
}
if (length(unique(item.wgt.names)) != length(item.wgt.names)) {
abort("There cannot be duplicate names in the names of `item_weights`.")
}
pos.check <- all(sign(item_weights) == 1)
if (!pos.check) {
abort("All `item_weights` must be positive and non-zero.")
}
bad <- setdiff(item.wgt.names, item.names)
if (length(bad) > 0) {
bad.string <- paste(bad, collapse = ", ")
msg <- glue(
"The following items specified in `item_weights` were not included in `items` and will be ignored:\n{bad.string}"
)
warn(msg)
item_weights <- item_weights[names(item_weights) %in% item.names]
}
item.wgt <- rep(1, times = n.items)
names(item.wgt) <- item.names
item.wgt.default <- enframe(
x = item.wgt,
name = "item",
value = "default"
)
item.wgt.new <- enframe(item_weights, name = "item", value = "new")
item.wgt <-
item.wgt.default %>%
left_join(item.wgt.new, by = "item") %>%
mutate(wgt = coalesce(new, default)) %>%
pull(wgt, name = item)
# if (all(item.wgt == 1)) {
# warn("All `item_weights` are 1 and will not be used.")
# do.item.weights <- FALSE
# } else {
# do.item.weights <- TRUE
# }
if (all(item.wgt == 1)) {
warn("All `item_weights` have a value of 1.")
}
do.item.weights <- TRUE
} else {
# item.wgt <- rep(1, times = n.items)
# names(item.wgt) <- item.names
item.wgt <- NULL
}
# Shapley approximation ---------------------------------------------------
# Shapley approximation used for shapley greedy entry
# Only need if using that method and greedy will actually kick in
# if (greedy_entry == "shapley" & greedy_begin <= max(k)) {
if (missing(case_weights)) {
shap.approx <-
shapley_approx(
data = data,
items = all_of(item.names),
item_weights = item_weights,
depth = depth,
return = "tibble"
) %>%
arrange(desc(shapley_value))
} else {
shap.approx <-
shapley_approx(
data = data,
items = all_of(item.names),
case_weights = {{case_weights}},
item_weights = item_weights,
depth = depth,
return = "tibble"
) %>%
arrange(desc(shapley_value))
}
# }
# Rearrange the combo matrix ----------------------------------------------
# Rearrange in order of reach
# TODO
# Do TURF ----------------------------------------------------------------
cat_line(rule("TURF", line = 2))
# string padding item names
pad <- nchar(n.items)
shap.keep <- 0
# Begin iteratin'
for (i in seq_along(k)) {
cat_line(glue("Running combinations of {k[i]}\n"))
# k cannot exceed number of items - this happens when there are
# forced exclusions and running all k from 1 to # orig items
if (k[i] > n.items) {
cat_line(
col_yellow(
"i Skipping this set size, cannot perform due to forced exclusions"
)
)
next
}
# depth cannot exceed k
if (depth > k[i]) {
cat_line(
col_yellow(
glue(
"i Skipping this set size, `depth` cannot exceed `k`"
)
)
)
next
}
# start combo clock
combo.clock1 <- Sys.time()
# generate full set of combinations
combos <- combinations(
x = item.names,
k = k[i]
)
colnames(combos) <- paste0("i_", str_pad(1:k[i], width = pad, side = "left", pad = "0"))
# reduce by forced inclusions
if (do.force.in) {
# index of rows to keep
keep.combos <- dapply(
X = combos,
FUN = function(x) all(force.in.names %in% x),
MARGIN = 1
)
# subset the combos
combos <- combos[keep.combos, , drop = FALSE]
# check and make sure rows remain
if (nrow(combos) == 0) {
cat_line(
col_yellow(
"i Skipping this set size, no combinations remain after forced inclusions"
)
)
next
}
}
# reduce by forced exclusions
if (do.force.out) {
# index of rows to keep
# can't have any of the exclusions
keep.combos <- dapply(
X = combos,
FUN = function(x) !any(force.out.names %in% x),
MARGIN = 1
)
# subset the combos
combos <- combos[keep.combos, , drop = FALSE]
# check and make sure rows remain
if (nrow(combos) == 0) {
cat_line(
col_yellow(
"i Skipping this set size, no combinations remain after forced exclusions"
)
)
next
}
}
# reduce by forced in together
if (do.force.in.together) {
# loop thru the list
for (j in seq_along(force.in.together.names)) {
keep.names <- force.in.together.names[[j]]
# no need to continue if the number of forced mutual
# incluions exceeds the current set size
if (length(keep.names) > k[i]) {
next
}
# if any are present, all have to be present
keep.combos <- dapply(
X = combos,
FUN = function(x) {
chk1 <- ifelse(any(x %in% keep.names), 1, -1)
chk2 <- ifelse(all(keep.names %in% x), 1, -1)
(chk1 * chk2) == 1
},
MARGIN = 1
)
combos <- combos[keep.combos, , drop = FALSE]
if (nrow(combos) == 0) {
cat_line(
col_yellow(
"i Skipping this set size, no combinations remain after items forced in together"
)
)
break # completely exit this part of the for loop
}
}
}
# the forced mutual inclusions will break out of its for loop
# skip over to the next set size if there are no combinations left
if (nrow(combos) == 0) next
# reduce by forced out together
if (do.force.out.together) {
# loop thru the list
for (j in seq_along(force.out.together.names)) {
drop.names <- force.out.together.names[[j]]
# no need to continue if the number of forced mutual
# incluions exceeds the current set size
if (length(drop.names) > k[i]) {
next
}
# keep only comos where all of the drop names aren't in
keep.combos <- dapply(
X = combos,
MARGIN = 1,
FUN = function(x) !all(drop.names %in% x)
)
combos <- combos[keep.combos, , drop = FALSE]
if (nrow(combos) == 0) {
cat_line(
col_yellow(
"i Skipping this set size, no combinations remain after items forced out together"
)
)
break # completely exit this part of the for loop
}
}
}
# the forced mutual inclusions will break out of its for loop
# skip over to the next set size if there are no combinations left
if (nrow(combos) == 0) next
# If greedying
# set size has to be when greedy kicks in
# can't be the first iteration
# the set size has to be larger than the number of force inclusions
if (k[i] >= greedy_begin & i > 1 & k[i] > length(force.in.names)) {
if (greedy_entry == "reach") {
# Find the best combo from k-1
item.keep.greedy <-
reach.list[[i-1]] %>%
roworderv(cols = "reach", decreasing = TRUE) %>%
head(1) %>%
select(matches("i")) %>%
unlist() %>%
unname()
} else if (greedy_entry == "freq") {
item.keep.greedy <-
reach.list[[i-1]] %>%
roworderv(cols = "freq", decreasing = TRUE) %>%
head(1) %>%
select(matches("i")) %>%
unlist() %>%
unname()
} else if (greedy_entry == "shapley") {
shap.keep <- shap.keep + 1
item.keep.greedy <-
shap.approx %>%
# force-ins have to be there already
# this skips them
filter(!item %in% force.in.names) %>%
head(shap.keep) %>%
pull(1)
}
keep.combos <- dapply(
X = combos,
MARGIN = 1,
FUN = function(x) all(item.keep.greedy %in% x)
)
combos <- combos[keep.combos, , drop = FALSE]
}
# placeholder matrix
n.combos <- nrow(combos)
# end combo clock
combo.clock2 <- Sys.time()
combo.clock[i] <- as.numeric(
difftime(combo.clock2, combo.clock1, units = "secs")
)
# this matrix receives the reach and freq calcs
fill <- matrix(
data = NA,
ncol = 2,
nrow = n.combos,
dimnames = list(
NULL,
c("reach", "freq")
)
)
# calculate reach
# pb <- progress_bar$new(total = n.combos)
# It should be noted that time was spent on optimizing this part of
# the calculation. This for-loop is marginally faster than any of the
# apply functions I have tried.
if (progress) prog.seq <- floor(seq(1, n.combos, length.out = 10))
turf.clock1 <- Sys.time()
for (j in 1:nrow(combos)) {
if (progress) {
if (any(prog.seq == j)) {
cat("\r")
cat(" Progress:", percent(j/n.combos))
cat("\r")
}
}
n.reached <- rowsums(item.mat[, combos[j, ], drop = FALSE])
is.reached <- n.reached >= depth
reach <- fmean(x = is.reached, w = wgt.vec)
# denominator is now calculated outside the loop
# since it does not change
freq <- sum(wgt.vec * n.reached) / wgt.base
fill[j, ] <- c(reach, freq)
}
# Join the reach measures to the combinations
reach.stats <-
combos %>%
as_tibble() %>%
bind_cols(as_tibble(fill), .) %>%
rowid_to_column("combo") # %>%
# roworderv(cols = c("reach", "freq"), decreasing = TRUE)
# update the list
reach.list[[i]] <- reach.stats
# log the clock
turf.clock2 <- Sys.time()
turf.clock[i] <- as.numeric(
difftime(turf.clock2, turf.clock1, units = "secs")
)
}
# update clocks and reach output
k.null <- map_lgl(reach.list, is.null)
combo.clock <- combo.clock[!k.null]
turf.clock <- turf.clock[!k.null]
reach.list <- reach.list[!k.null]
total.clock2 <- Sys.time()
total.clock <- as.numeric(
difftime(total.clock2, total.clock1, units = "secs")
)
# Add in item weights -----------------------------------------------------
if (do.item.weights) {
map_item_weights <- function(data, w) {
cn <- data %>% select(matches("i")) %>% names()
wgt.sum <- dapply(
X = data[cn],
MARGIN = 1,
FUN = function(x) {
fsum(w[names(w) %in% x])
}
)
mutate(
.data = data,
weight = wgt.sum,
value = reach * weight,
.before = matches("i_")
)
}
reach.list <- map(
.x = reach.list,
.f = ~map_item_weights(.x, w = item.wgt)
)
}
# Add in shapley values ---------------------------------------------------
map_sv <- function(data, sv) {
cn <- data %>% select(matches("i")) %>% names()
sv.sum <- dapply(
X = data[cn],
MARGIN = 1,
FUN = function(x) {
fsum(sv[names(sv) %in% x])
}
)
mutate(
.data = data,
shap = sv.sum,
.after = freq
)
}
reach.list <- map(
.x = reach.list,
.f = ~map_sv(.x, sv = deframe(shap.approx))
)
# Organize and return output ----------------------------------------------
reach.list <-
reach.list %>%
enframe(name = "k", value = "results") %>%
mutate(k = parse_number(k))
clock.list <- list(
total = total.clock,
by_k = tibble(
k = reach.list$k,
n_combos = map_int(reach.list$results, nrow),
combo_secs = combo.clock,
turf_secs = turf.clock,
combo_per_sec = n_combos / combo_secs,
turf_per_sec = n_combos / turf_secs
)
)
info <- list(
n = wgt.base,
n_items = length(item.names),
n_combos = sum(map_int(reach.list$results, nrow), na.rm = TRUE),
items = item.names,
item_weights = list(
weighted = do.item.weights,
weights = item.wgt
),
k = k,
depth = depth,
case_weights = list(
weighted = has.weights,
name = ifelse(has.weights, case.weights.names, NA_character_)
),
greedy = list(
begin = greedy_begin,
entry = greedy_entry
),
progress = progress
)
out <- list(
turf = reach.list,
info = info,
constraints = constraints,
clock = clock.list
)
class(out) <- "turf"
cat_line()
out
}
utils::globalVariables(c(
".", "item", "shapley_value", "n_combos", "combo_secs", "turf_secs",
"weight"
))
#' @exportS3Method print turf
print.turf <- function(x, ...) {
# Begin messages
cat_line(rule(left = "TURF", line = 2))
# Build string for `k`
k <- x$turf$k
k.seq <- seqid(k)
k.string <-
tibble(k, k.seq) %>%
group_by(k.seq) %>%
mutate(min = min(k), max = max(k)) %>%
ungroup() %>%
mutate(
string = case_when(
min == max ~ as.character(k),
.default = paste0(min,"-",max)
),
string = fct_inorder(string)
) %>%
distinct(string) %>%
pull() %>%
paste(collapse = ", ")
# Build string for the total runtime
runtime <- x$clock$total
runtime <- ifelse(
runtime <= 2*60,
glue("{round(runtime, 2)} secs"),
glue("{round(runtime / 60, 2)} mins")
)
# Print out summary messages
cat_line(glue("# Items {x$info$n_items}"))
cat_line(glue("# Combos {scales::comma(x$info$n_combos)}"))
cat_line(glue("Set Sizes {k.string}"))
cat_line(glue("Sample Size {x$info$n}"))
cat_line(glue("Total Runtime {runtime}"))
cat_line()
# Item weights and case weights
c.wgt <- x$info$case_weights$weighted
i.wgt <- x$info$item_weights$weighted
if (any(c.wgt, i.wgt)) {
cat_line(rule(left = "Weights", line = 1))
cat_line(glue("Case Weights {ifelse(c.wgt, 'Yes', 'No')}"))
cat_line(glue("Item Weights {ifelse(i.wgt, 'Yes', 'No')}"))
cat_line()
}
# Constraints
con.in <- length(x$constraints$force_in) > 0
con.out <- length(x$constraints$force_out) > 0
con.in2 <- length(x$constraints$force_in_together) > 0
con.out2 <- length(x$constraints$force_out_together) > 0
if (any(con.in, con.out, con.in2, con.out2)) {
cat_line(rule(left = "Constraints", line = 1))
if (con.in) {
con.in.str <-
x$constraints$force_in %>%
paste(collapse = ", ") %>%
paste("\U2022", .)
cat_line("Items Forced In")
cat_line(con.in.str)
cat_line()
}
if (con.out) {
con.out.str <-
x$constraints$force_out %>%
paste(collapse = ", ") %>%
paste("\U2022", .)
cat_line("Items Forced Out")
cat_line(con.out.str)
cat_line()
}
if (con.in2) {
con.in2.str <-
x$constraints$force_in_together %>%
map(paste, collapse = ", ") %>%
map(~paste("\U2022", .x)) %>%
paste(collapse = "\n")
cat_line("Items Forced In Together")
cat_line(con.in2.str)
cat_line()
}
if (con.out2) {
con.out2.str <-
x$constraints$force_out_together %>%
map(paste, collapse = ", ") %>%
map(~paste("\U2022", .x)) %>%
paste(collapse = "\n")
cat_line("Items Forced Out Together")
cat_line(con.out2.str)
cat_line()
}
}
# Greedy?
did.greedy <- x$info$greedy$begin < max(x$turf$k)
if (did.greedy) {
cat_line(rule("Greedy"))
cat_line(glue("Begins at size {x$info$greedy$begin}"))
cat_line(glue("Entry method {x$info$greedy$entry}"))
}
invisible(x)
}
utils::globalVariables(c(
"string"
))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.