test-code/turf-working/old-attempts/turf-new.R

library(tidyverse)
library(tidyselect)
library(microbenchmark)
library(turfR)
library(arrangements)
library(onezero)
library(crayon)


terf <- function(
    data, cols, weight,
    k, depth = 1, min.prop, force.in, mutual.excl
) {

    # Parse information -------------------------------------------------------

    # Grab the data needed for the analysis
    analysis.df <- select(data, {{cols}})
    analysis.names <- names(analysis.df)

    # Do weights exist? If so, grab them, if not, make them.
    if (missing(weight)) {

        ss <- nrow(data)
        wgt.vec <- rep(1, times = ss)

    } else {

        wgt.df <- select(data, {{weight}})

        if (ncol(wgt.df) > 1) {
            stop("Can only provide one column of weights in `weight` argument.")
        }

        wgt.name <- names(wgt.df)

        if (wgt.name %in% analysis.names) {
            warning(paste0(
                "Column '",
                wgt.name,
                "' was supplied as an input to both `cols` and `weights` arguments, this is likely ill-advised."
            ))
        }

        wgt.vec <- pull(wgt.df, {{weight}})

    }

    # This object will be used a lot later:
    item.names <- colnames(analysis.df)


    # Make sure arguments are specified correctly -----------------------------

    # k can't be bigger than number of columns, or less than 1
    if (!between(k, 1, length(item.names))) {
        stop(paste0(
            "Input to `k` must be a value between 1 and number of columns provided in `cols` (",
            length(item.names),
            ")."
        ))
    }

    # depth can't be bigger than k
    if (depth > k) {
        stop("Input to `depth` cannot exceed `k`. Doing so would result in a reach of zero.")
    }



    # Make sure analysis data is onezero --------------------------------------

    oz.check <- sapply(analysis.df, is_onezero)

    bad.vars <- names(oz.check[!oz.check])

    if (length(bad.vars) > 0) {

        bad.vars.message <- paste0(
            "The following variables do not meet the requirements of `is_onezero`:\n",
            paste(bad.vars, collapse = ", ")
        )

        stop(bad.vars.message)

    }


    # Force in any items? -----------------------------------------------------

    if (!missing(force.in)) {

        .force.in <- enquo(force.in)
        force.index <- eval_select(
            expr = .force.in,
            data = data
        )

        force.names <- names(force.index)

        # if k is less than the number of items forced in then there will be no
        # valid combinations to run
        if (k < length(force.names)) {
            stop(paste0(
                "Input to `k` must be greater than or equal to the number of items being forced in (",
                length(force.names),
                "), otherwise no valid combinations will be available."
            ))
        }

        bad.names <- force.names[!force.names %in% item.names]

        if (length(bad.names) > 0) {
            stop(paste0(
                "Invalid input to `force.in`, columns supplied in `force.in` must also be present in input to `cols`. The following columns must be added to `cols` if you want to force them in:\n",
                paste(bad.names, collapse = ", ")
            ))
        }

    }


    # Should any items be dropped due to low %? -------------------------------

    if (!missing(min.prop)) {
        if (!between(min.prop, 0, 1)) {
            stop("Input to `min.prop` must be a value between 0 and 1.")
        }

        # calculate %
        low.prop.items <- map_lgl(
            .x = analysis.df,
            .f = ~weighted.mean(.x, wgt.vec, na.rm = TRUE) < min.prop
        )

        low.prop.item.names <- names(low.prop.items[low.prop.items])


        # forced in items will override this
        if (length(force.names) > 0) {

        }

    }



    # Get all combos ----------------------------------------------------------

    combos <- combinations(
        x = item.names,
        k = k
    )

    colnames(combos) <- paste0("i", 1:k)

    # subset the combos if force in
    if (!missing(force.in)) {

        keep.combos <- apply(combos, 1, function(x) all(force.names %in% x))

        combos <- combos[keep.combos, , drop = FALSE]

    }

    n.combos <- nrow(combos)


    # Calculate ---------------------------------------------------------------

    # This matrix will receive the reach and frequency for each combination.
    fill <- matrix(
        data = NA,
        ncol = 2,
        nrow = n.combos,
        dimnames = list(
            NULL,
            c("reach", "freq")
        )
    )

    # The `Rfast::rowsums()` function only operates on matrices, and for some
    # reason I also replaced missing values with zero. I don't remember why
    # but the results are equivalent with `base::rowSums()` so I'm sticking
    # to it.

    data.0 <- as.matrix(analysis.df)
    data.0[is.na(data.0)] <- 0


    # Calculate the percent reached and frequency for each combination.
    header <- paste0(
        "-- turf ",
        paste(rep("-", times = 50), collapse = "")
    )

    cat.string <- paste0(
        header, "\n",
        italic("Number of items..... "), length(item.names), "\n",
        italic("Taken at a time..... "), k, "\n",
        italic("Number of combos.... "), formatC(n.combos, big.mark = ",", format = "f", drop0trailing = TRUE),
        "\n"
    )


    if (!missing(force.in)) {
        cat.string <- paste0(
            cat.string,
            italic("Forced inclusions...\n  "),
            paste(
                paste0("\U2713", " ", force.names),
                collapse = "\n  "
            ),
            "\n"
        )
    }

    cat(cat.string, "\n")


    for (i in 1:nrow(combos)) {

        n.reached <- Rfast::rowsums(data.0[, combos[i, ], drop = FALSE])

        is.reached <- n.reached >= depth

        # final versions here
        reach <- weighted_mean_cpp(is.reached, wgt.vec)
        # freq  <- sum(wgt.vec * n.reached) / sum(wgt.vec[is.reached])
        freq  <- sum(wgt.vec * n.reached) / sum(wgt.vec)

        out <- c(reach, freq)

        fill[i, ] <- out

    }


    # Arrange output and return -----------------------------------------------

    out <-
        as_tibble(combos) %>%
        add_column(n = length(item.names), .before = 1) %>%
        add_column(k = k, .before = 2) %>%
        bind_cols(as_tibble(fill)) %>%
        rowid_to_column("combo") %>%
        arrange(desc(reach), desc(freq))

    out

}


# Testing mine vs hack ----------------------------------------------------

# Better turf
terf(
    data = xx,
    cols = matches("item"),
    k = 6,
    depth = 1,
    weight = wgt,
    min.prop = 0.4,
    force.in = c(item_1:item_5)
)

xx <-
    turfR::turf_ex_data %>%
    as_tibble()
ttrodrigz/onezero documentation built on May 9, 2023, 2:59 p.m.