library(tidyverse)
library(onezero)
library(arrangements)
library(broom)
rando <- function(min.item = 2, max.item = 30, min.n = 50, max.n = 10000) {
i <- sample(min.item:max.item, size = 1)
ss <- sample(min.n:max.n, size = 1)
m <- matrix(
data = rbinom(i * ss, size = 1, prob = 0.5),
nrow = ss
)
as.data.frame(m) %>% sapply(as.double) %>%
as_tibble()
}
store <- list()
a <- Sys.time()
set.seed(4)
for (i in seq_along(1:100)) {
r <- rando(min.item = 2, max.item = 25)
ni <- ncol(r)
nr <- nrow(r)
cat("\niteration:", i, "| items:", ni, "| rows:", nr, "\n")
res <- turf(
data = r,
items = everything(),
k = 1:ni
)
store[[i]] <- list(res$info$n, res$clock)
cat("\r")
}
b <- difftime(Sys.time(), a, units = "secs")
write_rds(store, "turf-test.rds")
write_rds(b, "time.rds")
times <-
store %>%
enframe(name = "i", value = "data") %>%
mutate(
ss = map_dbl(data, ~.x[[1]]),
total_time = map_dbl(data, ~.x[[2]]$total),
combo = map(data, ~.x[[2]]$by_k)
) %>%
unnest(combo) %>%
select(-data)
mod <- lm(turf_secs ~ n_combos * ss, data = times)
tidy(mod)
glance(mod)
augment(mod)
pred <- predict(mod, times)
resid <- times$turf_secs - pred
cor(pred, times$turf_secs)
times %>% summary()
times %>%
arrange(desc(turf_secs))
times %>%
select(total_time, turf_secs) %>%
cor()
times %>%
mutate(min = turf_secs / 60) %>%
ggplot(aes(x = n_combos, y = min, color = ss)) +
geom_point()
tibble(resid) %>%
ggplot(aes(x = resid)) +
geom_histogram(bins = 50)
estimate_turf <- function(num_items, k, num_combos, num_rows, level = 0.95, units = "s") {
if (missing(num_combos)) {
num_combos <- sum(
map2_int(
.x = num_items,
.y = k,
.f = ~arrangements::ncombinations(
n = .x,
k = .y
)
)
)
}
d <- tibble(
n_combos = num_combos,
ss = num_rows
)
out <- as_tibble(predict(mod, d, interval = "confidence", level = 0.95))
colnames(out) <- c("pred", "upper", "lower")
if (units == "m") {
out <- map_df(out, function(x) x / 60)
} else if (units == "h") {
out <- map_df(out, function(x) x / 60 / 60)
}
out
}
estimate_turf(25, k = 1:25, num_rows = 9395, units = "m")
times %>%
group_by(i) %>%
mutate(n = n()) %>%
ungroup() %>%
filter(n == 25) %>%
group_by(i) %>%
summarise(time = sum(turf_secs) / 60,
ss = mean(ss))
x <-
expand_grid(
ss = seq(0, 10000, length.out = 101),
ni = 1:30,
k = 1:30
) %>%
filter(ni >= k) %>%
mutate(n_combos = map2_int(ni, k, ~ncombinations(.x, .y))) %>%
group_by(ss, n_combos) %>%
filter(row_number() == 1) %>%
ungroup()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.