test-code/time-turf.R

library(tidyverse)
library(onezero)
library(arrangements)
library(broom)


rando <- function(min.item = 2, max.item = 30, min.n = 50, max.n = 10000) {

    i <- sample(min.item:max.item, size = 1)
    ss <- sample(min.n:max.n, size = 1)

    m <- matrix(
        data = rbinom(i * ss, size = 1, prob = 0.5),
        nrow = ss
    )

    as.data.frame(m) %>% sapply(as.double) %>%
        as_tibble()

}

store <- list()
a <- Sys.time()
set.seed(4)
for (i in seq_along(1:100)) {
    r <- rando(min.item = 2, max.item = 25)
    ni <- ncol(r)
    nr <- nrow(r)
    cat("\niteration:", i, "| items:", ni, "| rows:", nr, "\n")
    res <- turf(
        data = r,
        items = everything(),
        k = 1:ni
    )
    store[[i]] <- list(res$info$n, res$clock)
    cat("\r")
}

b <- difftime(Sys.time(), a, units = "secs")

write_rds(store, "turf-test.rds")
write_rds(b, "time.rds")


times <-
    store %>%
    enframe(name = "i", value = "data") %>%
    mutate(
        ss = map_dbl(data, ~.x[[1]]),
        total_time = map_dbl(data, ~.x[[2]]$total),
        combo = map(data, ~.x[[2]]$by_k)
    ) %>%
    unnest(combo) %>%
    select(-data)

mod <- lm(turf_secs ~ n_combos * ss, data = times)

tidy(mod)
glance(mod)
augment(mod)

pred <- predict(mod, times)

resid <- times$turf_secs - pred

cor(pred, times$turf_secs)

times %>% summary()


times %>%
    arrange(desc(turf_secs))

times %>%
    select(total_time, turf_secs) %>%
    cor()

times %>%
    mutate(min = turf_secs / 60) %>%
    ggplot(aes(x = n_combos, y = min, color = ss)) +
    geom_point()

tibble(resid) %>%
    ggplot(aes(x = resid)) +
    geom_histogram(bins = 50)

estimate_turf <- function(num_items, k, num_combos, num_rows, level = 0.95, units = "s") {

    if (missing(num_combos)) {

        num_combos <- sum(
            map2_int(
                .x = num_items,
                .y = k,
                .f = ~arrangements::ncombinations(
                    n = .x,
                    k = .y
                )
            )
        )

    }


    d <- tibble(
        n_combos = num_combos,
        ss = num_rows
    )

    out <- as_tibble(predict(mod, d, interval = "confidence", level = 0.95))
    colnames(out) <- c("pred", "upper", "lower")

    if (units == "m") {
        out <- map_df(out, function(x) x / 60)
    } else if (units == "h") {
        out <- map_df(out, function(x) x / 60 / 60)
    }

    out


}

estimate_turf(25, k = 1:25, num_rows = 9395, units = "m")

times %>%
    group_by(i) %>%
    mutate(n = n()) %>%
    ungroup() %>%
    filter(n == 25) %>%
    group_by(i) %>%
    summarise(time = sum(turf_secs) / 60,
              ss = mean(ss))


x <-
    expand_grid(
        ss = seq(0, 10000, length.out = 101),
        ni = 1:30,
        k = 1:30
    ) %>%
    filter(ni >= k) %>%
    mutate(n_combos = map2_int(ni, k, ~ncombinations(.x, .y))) %>%
    group_by(ss, n_combos) %>%
    filter(row_number() == 1) %>%
    ungroup()
ttrodrigz/onezero documentation built on May 9, 2023, 2:59 p.m.