test-code/turf_shapley_values.R

turf_shapley_values <- function(res) {

    # Was every k size run? ---------------------------------------------------

    item.names <- res$items
    n.items <- length(res$items)

    # need to have run every set size
    fail <- !identical(
        x = as.numeric(res$options$k),
        y = as.numeric(1:n.items)
    )

    if (fail) {
        abort(glue(
            "Shapley values can only be obtained when running TURF from 1 to number of items ({n.items})."
        ))
    }


    # Any constraints? --------------------------------------------------------

    fail <-
        res %>%
        pluck("constraints") %>%
        map_lgl(~length(.x) > 0) %>%
        any()

    if (fail) {
        abort("TURF must be run without constraints to obtain Shapley values.")
    }


    # Depth not 1? ------------------------------------------------------------

    fail <- res$options$depth != 1

    if (fail) {
        abort("TURF must be run with `depth` set to 1 to obtain Shapley values.")
    }


    # Greedy? -----------------------------------------------------------------

    fail <- res$options$greedy_begin < max(res$options$k)

    if (fail) {
        abort("TURF must be run without running greedy algorithm to obtain Shapley values.")
    }


    # Function to calculate reach with/out item -------------------------------

    with_without_reach <- function(
        reach.values, just.items, item.names = item.names
    ) {

        BY(
            x = reach.values,
            g = dapply(
                X = just.items,
                MARGIN = 1,
                FUN = function(x) ifelse(item.names %in% x, "with", "without")
            ),
            FUN = fmean
        )

    }


    # Calculate reach with/out each item --------------------------------------

    # pull out the reach list
    reach <- res$reach

    # to store the results
    with.without.reach.list <- vector(
        mode = "list",
        length = length(reach)
    )

    # loop thru each set size
    for (i in seq_along(reach)) {

        # pull out the i'th reach values and item matrix
        rv <- reach[[i]][["reach"]]
        ji <- reach[[i]] %>% select(matches("i_"))

        wwr <-

            # map thru each of the item names and calculate
            map(
                .x = item.names,
                .f = ~with_without_reach(
                    reach.values = rv,
                    just.items = ji,
                    item.names = .x
                )
            ) %>%

            # bind everything together
            do.call(rbind, .) %>%
            as_tibble() %>%

            # adding in items and set size
            add_column(
                item = item.names,
                .before = 1
            ) %>%
            add_column(
                k = i,
                .before = 1
            )

        # the last set size has no "without" need to manually add in
        if (i == length(item.names)) {
            wwr$without <- 0
        }

        # store in the list
        with.without.reach.list[[i]] <- wwr

    }

    # mean difference of with/without = shapley
    with.without.reach.list %>%
        reduce(bind_rows) %>%
        fmutate(gap = with - without) %>%
        fgroup_by(item) %>%
        fsummarise(shapley_value = fmean(gap)) %>%
        fungroup() %>%
        arrange(desc(shapley_value))

}

turf_shapley_values(out)


# what does this do?
#
turf_incremental_reach <- function(res) {

    # Can it be done? ---------------------------------------------------------

    reach <- rev(res$reach)

    k <- res$options$k
    pad <- max(nchar(k))
    item.names <- res$items
    max.items <- max(k)

    item.mat <- matrix(
        data = NA_character_,
        nrow = length(k),
        ncol = max.items,
        dimnames = list(
            rev(k),
            paste0("i_", str_pad(1:max.items, pad, "left", "0"))
        )
    )

    reach.val <- double(length = nrow(item.mat))

    top.items <-
        reach %>%
        head(1) %>%
        flatten_df() %>%
        slice(1)

    top.item.names <-
        top.items %>%
        select(matches("i")) %>%
        unlist() %>%
        unname()

    lvl <- fct_inorder(top.item.names)

    top.item.reach <- top.items$reach

    item.mat[1, ] <- top.item.names
    reach.val[1] <- top.item.reach

    reach <- reach[-1]

    for (i in seq_along(reach)) {

        # browser()

        # go to the item matrix and pull out prior items
        keep <-
            item.mat[i, , drop = TRUE] %>%
            unname()

        keep <- keep[!is.na(keep)]

        # go to the ith reach list and find rows where all the items
        # are in the keeps
        top.items <-
            reach[[i]] %>%
            filter(
                if_all(
                    .cols = matches("i"),
                    .fns = ~.x %in% keep
                )
            ) %>%
            slice(1)

        top.item.names <-
            top.items %>%
            select(matches("i")) %>%
            unlist() %>%
            unname() %>%
            factor(levels = lvl) %>%
            sort()

        top.item.names <- c(
            as.character(top.item.names),
            rep(NA, times = ncol(item.mat) - length(top.item.names))
        )

        top.item.reach <- top.items$reach

        item.mat[i+1, ] <- top.item.names
        reach.val[i+1] <- top.item.reach

    }

    # browser()

    # rearrange item matrix from bottom to top according to item order entry
    item.order <- item.mat[nrow(item.mat), ]
    item.order <- item.order[complete.cases(item.order)] %>% unname()

    for (i in 1:(nrow(item.mat)-1)) {

        row <- nrow(item.mat) - i
        items <- item.mat[row, ] %>% unname()
        n.na <- sum(is.na(items))
        items <- items[!is.na(items)]
        s.d <- setdiff(items, item.order)
        item.mat[row, ] <- c(item.order, s.d, rep(NA, times = n.na))
        item.order <- c(item.order, s.d)


    }


    bind_cols(item.mat, reach = reach.val) %>%
        mutate(gain = reach - lead(reach)) %>%
        mutate(k = rev(k), .before = 1)
}

# putting item names in columns

out %>%
    turf_incremental_reach() %>%
    pivot_longer(
        cols = matches("i_")
    ) %>%
    filter(!is.na(value)) %>%
    mutate(name = 1) %>%
    pivot_wider(names_from = value, values_from = name, values_fill = list(name = 0)) %>%
    relocate(c(reach, gain), .after = last_col())

# incremental gain plot
# color the inside/outside by lead/lag whatever.
out %>%
    turf_incremental_reach() %>%
    pivot_longer(matches("^i"), names_to = "position", values_to = "item") %>%
    arrange(k, item) %>%
    filter(!is.na(item)) %>%
    group_by(item) %>%
    mutate(step = min(k)) %>%
    arrange(item, step) %>%
    filter(row_number() == 1) %>%
    ungroup() %>%
    arrange(k, item) %>%
    group_by(k) %>%
    summarise(
        item = paste(item, collapse = "\n"),
        reach = mean(reach),
        gain = mean(gain)
    ) %>%
    mutate(item = fct_reorder(item, reach, .desc = TRUE)) %>%
    ggplot(aes(x = reach, y = item)) +
    geom_col()
ttrodrigz/onezero documentation built on May 9, 2023, 2:59 p.m.