turf_shapley_values <- function(res) {
# Was every k size run? ---------------------------------------------------
item.names <- res$items
n.items <- length(res$items)
# need to have run every set size
fail <- !identical(
x = as.numeric(res$options$k),
y = as.numeric(1:n.items)
)
if (fail) {
abort(glue(
"Shapley values can only be obtained when running TURF from 1 to number of items ({n.items})."
))
}
# Any constraints? --------------------------------------------------------
fail <-
res %>%
pluck("constraints") %>%
map_lgl(~length(.x) > 0) %>%
any()
if (fail) {
abort("TURF must be run without constraints to obtain Shapley values.")
}
# Depth not 1? ------------------------------------------------------------
fail <- res$options$depth != 1
if (fail) {
abort("TURF must be run with `depth` set to 1 to obtain Shapley values.")
}
# Greedy? -----------------------------------------------------------------
fail <- res$options$greedy_begin < max(res$options$k)
if (fail) {
abort("TURF must be run without running greedy algorithm to obtain Shapley values.")
}
# Function to calculate reach with/out item -------------------------------
with_without_reach <- function(
reach.values, just.items, item.names = item.names
) {
BY(
x = reach.values,
g = dapply(
X = just.items,
MARGIN = 1,
FUN = function(x) ifelse(item.names %in% x, "with", "without")
),
FUN = fmean
)
}
# Calculate reach with/out each item --------------------------------------
# pull out the reach list
reach <- res$reach
# to store the results
with.without.reach.list <- vector(
mode = "list",
length = length(reach)
)
# loop thru each set size
for (i in seq_along(reach)) {
# pull out the i'th reach values and item matrix
rv <- reach[[i]][["reach"]]
ji <- reach[[i]] %>% select(matches("i_"))
wwr <-
# map thru each of the item names and calculate
map(
.x = item.names,
.f = ~with_without_reach(
reach.values = rv,
just.items = ji,
item.names = .x
)
) %>%
# bind everything together
do.call(rbind, .) %>%
as_tibble() %>%
# adding in items and set size
add_column(
item = item.names,
.before = 1
) %>%
add_column(
k = i,
.before = 1
)
# the last set size has no "without" need to manually add in
if (i == length(item.names)) {
wwr$without <- 0
}
# store in the list
with.without.reach.list[[i]] <- wwr
}
# mean difference of with/without = shapley
with.without.reach.list %>%
reduce(bind_rows) %>%
fmutate(gap = with - without) %>%
fgroup_by(item) %>%
fsummarise(shapley_value = fmean(gap)) %>%
fungroup() %>%
arrange(desc(shapley_value))
}
turf_shapley_values(out)
# what does this do?
#
turf_incremental_reach <- function(res) {
# Can it be done? ---------------------------------------------------------
reach <- rev(res$reach)
k <- res$options$k
pad <- max(nchar(k))
item.names <- res$items
max.items <- max(k)
item.mat <- matrix(
data = NA_character_,
nrow = length(k),
ncol = max.items,
dimnames = list(
rev(k),
paste0("i_", str_pad(1:max.items, pad, "left", "0"))
)
)
reach.val <- double(length = nrow(item.mat))
top.items <-
reach %>%
head(1) %>%
flatten_df() %>%
slice(1)
top.item.names <-
top.items %>%
select(matches("i")) %>%
unlist() %>%
unname()
lvl <- fct_inorder(top.item.names)
top.item.reach <- top.items$reach
item.mat[1, ] <- top.item.names
reach.val[1] <- top.item.reach
reach <- reach[-1]
for (i in seq_along(reach)) {
# browser()
# go to the item matrix and pull out prior items
keep <-
item.mat[i, , drop = TRUE] %>%
unname()
keep <- keep[!is.na(keep)]
# go to the ith reach list and find rows where all the items
# are in the keeps
top.items <-
reach[[i]] %>%
filter(
if_all(
.cols = matches("i"),
.fns = ~.x %in% keep
)
) %>%
slice(1)
top.item.names <-
top.items %>%
select(matches("i")) %>%
unlist() %>%
unname() %>%
factor(levels = lvl) %>%
sort()
top.item.names <- c(
as.character(top.item.names),
rep(NA, times = ncol(item.mat) - length(top.item.names))
)
top.item.reach <- top.items$reach
item.mat[i+1, ] <- top.item.names
reach.val[i+1] <- top.item.reach
}
# browser()
# rearrange item matrix from bottom to top according to item order entry
item.order <- item.mat[nrow(item.mat), ]
item.order <- item.order[complete.cases(item.order)] %>% unname()
for (i in 1:(nrow(item.mat)-1)) {
row <- nrow(item.mat) - i
items <- item.mat[row, ] %>% unname()
n.na <- sum(is.na(items))
items <- items[!is.na(items)]
s.d <- setdiff(items, item.order)
item.mat[row, ] <- c(item.order, s.d, rep(NA, times = n.na))
item.order <- c(item.order, s.d)
}
bind_cols(item.mat, reach = reach.val) %>%
mutate(gain = reach - lead(reach)) %>%
mutate(k = rev(k), .before = 1)
}
# putting item names in columns
out %>%
turf_incremental_reach() %>%
pivot_longer(
cols = matches("i_")
) %>%
filter(!is.na(value)) %>%
mutate(name = 1) %>%
pivot_wider(names_from = value, values_from = name, values_fill = list(name = 0)) %>%
relocate(c(reach, gain), .after = last_col())
# incremental gain plot
# color the inside/outside by lead/lag whatever.
out %>%
turf_incremental_reach() %>%
pivot_longer(matches("^i"), names_to = "position", values_to = "item") %>%
arrange(k, item) %>%
filter(!is.na(item)) %>%
group_by(item) %>%
mutate(step = min(k)) %>%
arrange(item, step) %>%
filter(row_number() == 1) %>%
ungroup() %>%
arrange(k, item) %>%
group_by(k) %>%
summarise(
item = paste(item, collapse = "\n"),
reach = mean(reach),
gain = mean(gain)
) %>%
mutate(item = fct_reorder(item, reach, .desc = TRUE)) %>%
ggplot(aes(x = reach, y = item)) +
geom_col()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.