library(tidyverse)
library(tidyselect)
library(microbenchmark)
library(turfR)
library(arrangements)
library(onezero)
library(progress)
library(profvis)
"https://stackoverflow.com/questions/51774646/efficiency-of-matrix-rowsums-vs-colsums-in-r-vs-rcpp-vs-armadillo"
x <-
turfR::turf_ex_data %>%
as_tibble()
terf2 <- function(
data, cols, k, need = 1, force.in, weight, fast
) {
# Parse information -------------------------------------------------------
# temp data for tidyeval
tmp <- slice(data, 1)
# Handle weights --
# If no weights, create a vector of 1s
if (missing(weight)) {
wgt.vec <- rep(1, times = nrow(data))
} else {
.weight <- enquo(weight)
weight.index <- eval_select(
expr = .weight,
data = tmp
)
weight.name <- names(weight.index)
if (length(weight.name) > 1) {
stop("Can only pass one column of weights to `weight` input.")
}
wgt.vec <- data[[weight.name]]
data <- select(data, -all_of(weight.name))
}
# Get columns used in the analysis --
.cols <- enquo(cols)
col.index <- eval_select(
expr = .cols,
data = tmp
)
col.names <- names(col.index)
if (length(col.names) < k) {
stop("Value of `k` cannot exceed number of columns specified in `cols`.")
}
data <- data[col.names]
# Identify names of variables forced in --
if (!missing(force.in)) {
.force.in <- enquo(force.in)
force.index <- eval_select(
expr = .force.in,
data = tmp
)
force.names <- names(force.index)
bad.names <- force.names[!force.names %in% col.names]
if (length(bad.names) > 0) {
stop(paste0(
"Tried to force in columns that are not included in `cols` argument:\n",
paste(bad.names, collapse = ", ")
))
}
}
# Get all combos ----------------------------------------------------------
combos <- combinations(
x = col.names,
k = k
)
colnames(combos) <- paste0("v", 1:k)
# subset the combos if force in
if (!missing(force.in)) {
keep.combos <- apply(combos, 1, function(x) all(force.names %in% x))
combos <- combos[keep.combos, , drop = FALSE]
}
n.combos <- nrow(combos)
# Calculate ---------------------------------------------------------------
fill <- matrix(
data = NA,
ncol = 2,
nrow = n.combos,
dimnames = list(
NULL,
c("reach", "freq")
)
)
if (fast %in% c("Rfast", "cpp")) {
data.0 <- as.matrix(data)
data.0[is.na(data.0)] <- 0
}
for (i in 1:nrow(combos)) {
if (n.combos > 1500) {
cat('\r', paste0("Combination ", i, "/", n.combos, " (", round(100*(i/n.combos)), "%)"))
flush.console()
}
if (fast == "Rfast") {
n.reached <- Rfast::rowsums(data.0[, combos[i, ]])
}
if (fast == "cpp") {
n.reached <- row_sums_cpp(data.0[, combos[i, ]])
}
if (fast == "no") {
n.reached <- rowSums(data[combos[i, ]], na.rm = TRUE)
}
is.reached <- n.reached >= need
# final versions here
reach <- weighted_mean_cpp(is.reached, wgt.vec)
freq <- sum(wgt.vec * n.reached) / sum(wgt.vec[is.reached])
out <- c(reach, freq)
fill[i, ] <- out
}
reach.compare <- matrix(
nrow = 3,
ncol = length(col.names),
dimnames = list(
c("in", "out", "net"),
col.names
)
)
# for (i in seq_along(col.names)) {
#
# item.i <- col.names[i]
#
# in.out <- apply(combos, 1, function(x) ifelse(any(item.i %in% x), "in", "out"))
#
# in.out.means <- aggregate(
# fill[, "reach"],
# by = list("in_out" = in.out),
# FUN = mean,
# simplify = TRUE
# )
#
# reach.compare[-3, i] <- in.out.means$x
#
# }
#
# reach.compare[3, ] <- reach.compare[1, ] - reach.compare[2, ]
out <-
as_tibble(combos) %>%
bind_cols(as_tibble(fill)) %>%
rowid_to_column("combo") %>%
arrange(desc(reach), desc(freq))
cat("\n\n")
out
# list(
# reach.compare = reach.compare,
# turf = out
# )
}
identical(
terf2(
data = pets,
cols = dog:bird,
weight = weight,
k = 2,
fast = "Rfast"
),
terf2(
data = pets,
cols = dog:bird,
weight = weight,
k = 2,
fast = "no"
)
)
# Compare testing ---------------------------------------------------------
make_data <- function(n, m) {
matrix(
data = sample(c(1, 0, NA), n * m, T, c(0.45, 0.45, 0.00)),
nrow = n,
ncol = m,
dimnames = list(
NULL, paste0("i", 1:m)
)
) %>%
as_tibble() %>%
rowid_to_column() %>%
add_column(weight = 1, .after = 1)
}
d1 <- make_data(n = 1000, m = 12)
microbenchmark(
"rowSums" = terf(
data = d1,
cols = matches("^i"),
k = 6,
need = 1,
weight = weight,
fast = "no"
),
"Rfast::rowsums" = terf(
data = d1,
cols = matches("^i"),
k = 6,
need = 1,
weight = weight,
fast = "Rfast"
),
"row_sums_cpp" = terf(
data = d1,
cols = matches("^i"),
k = 6,
need = 1,
weight = weight,
fast = "cpp"
),
"hack" = turf(
data = d1,
n = 12,
k = 6,
depth = 1
),
times = 30,
unit = "s"
)
microbenchmark(
"mine" = terf(
data = d1,
cols = matches("^i"),
k = 6,
need = 1,
weight = weight,
fast = "Rfast"
),
"hack" = turf(
data = d1,
n = 12,
k = 6,
depth = 1
),
times = 10,
unit = "s"
)
N <- 1000
m <- matrix(
data = sample(c(1, 0, NA), N * N, T, c(0.45, 0.45, 0.1)),
ncol = N,
nrow = N
)
mrf <- function(x) {
Rfast::rowsums(
x = x
)
}
microbenchmark::microbenchmark(
"base" = rowSums(m, na.rm = TRUE),
"new" = row_sums_cpp(m),
"rfast" = mrf(m)
)
identical(
rowSums(m, na.rm = TRUE),
mrf(m)
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.