inst/doc/cutpointr_benchmarks.R

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(fig.width = 6, fig.height = 5, fig.align = "center")
options(rmarkdown.html_vignette.check_title = FALSE)
load("vignettedata/vignettedata.Rdata")

## ----eval = FALSE-------------------------------------------------------------
# # Return cutpoint that maximizes the sum of sensitivity and specificiy
# # ROCR package
# rocr_sensspec <- function(x, class) {
#     pred <- ROCR::prediction(x, class)
#     perf <- ROCR::performance(pred, "sens", "spec")
#     sens <- slot(perf, "y.values")[[1]]
#     spec <- slot(perf, "x.values")[[1]]
#     cut <- slot(perf, "alpha.values")[[1]]
#     cut[which.max(sens + spec)]
# }
# 
# # pROC package
# proc_sensspec <- function(x, class) {
#     r <- pROC::roc(class, x, algorithm = 2, levels = c(0, 1), direction = "<")
#     pROC::coords(r, "best", ret="threshold", transpose = FALSE)[1]
# }

## ----eval = FALSE, echo = FALSE-----------------------------------------------
# library(OptimalCutpoints)
# library(ThresholdROC)
# library(dplyr)
# n <- 100
# set.seed(123)
# dat <- data.frame(x = rnorm(n), y = sample(c(0:1), size = n, replace = TRUE))
# x_pos <- dat$x[dat$y == 1]
# x_neg <- dat$x[dat$y == 0]
# bench_100 <- microbenchmark::microbenchmark(
#     cutpointr(dat, x, y, pos_class = 1, neg_class = 0,
#               direction = ">=", metric = youden, break_ties = mean),
#     rocr_sensspec(dat$x, dat$y),
#     proc_sensspec(dat$x, dat$y),
#     optimal.cutpoints(X = "x", status = "y", tag.healthy = 0, methods = "Youden",
#                       data = dat),
#     thres2(k1 = x_neg, k2 = x_pos, rho = 0.5,
#            method = "empirical", ci = FALSE),
#     times = 100, unit = "ms"
# )
# 
# n <- 1000
# set.seed(123)
# dat <- data.frame(x = rnorm(n), y = sample(c(0:1), size = n, replace = TRUE))
# x_pos <- dat$x[dat$y == 1]
# x_neg <- dat$x[dat$y == 0]
# bench_1000 <- microbenchmark::microbenchmark(
#     cutpointr(dat, x, y, pos_class = 1, neg_class = 0,
#               direction = ">=", metric = youden, break_ties = mean),
#     rocr_sensspec(dat$x, dat$y),
#     proc_sensspec(dat$x, dat$y),
#     optimal.cutpoints(X = "x", status = "y", tag.healthy = 0, methods = "Youden",
#                       data = dat),
#     thres2(k1 = x_neg, k2 = x_pos, rho = 0.5,
#            method = "empirical", ci = FALSE),
#     times = 100, unit = "ms"
# )
# 
# n <- 10000
# set.seed(123)
# dat <- data.frame(x = rnorm(n), y = sample(c(0:1), size = n, replace = TRUE))
# x_pos <- dat$x[dat$y == 1]
# x_neg <- dat$x[dat$y == 0]
# bench_10000 <- microbenchmark::microbenchmark(
#     cutpointr(dat, x, y, pos_class = 1, neg_class = 0,
#               direction = ">=", metric = youden, break_ties = mean, silent = TRUE),
#     rocr_sensspec(dat$x, dat$y),
#     optimal.cutpoints(X = "x", status = "y", tag.healthy = 0, methods = "Youden",
#                       data = dat),
#     proc_sensspec(dat$x, dat$y),
#     thres2(k1 = x_neg, k2 = x_pos, rho = 0.5,
#            method = "empirical", ci = FALSE),
#     times = 100
# )
# 
# n <- 1e5
# set.seed(123)
# dat <- data.frame(x = rnorm(n), y = sample(c(0:1), size = n, replace = TRUE))
# bench_1e5 <- microbenchmark::microbenchmark(
#     cutpointr(dat, x, y, pos_class = 1, neg_class = 0,
#               direction = ">=", metric = youden, break_ties = mean),
#     rocr_sensspec(dat$x, dat$y),
#     proc_sensspec(dat$x, dat$y),
#     times = 100, unit = "ms"
# )
# 
# n <- 1e6
# set.seed(123)
# dat <- data.frame(x = rnorm(n), y = sample(c(0:1), size = n, replace = TRUE))
# bench_1e6 <- microbenchmark::microbenchmark(
#     cutpointr(dat, x, y, pos_class = 1, neg_class = 0,
#               direction = ">=", metric = youden, break_ties = mean),
#     rocr_sensspec(dat$x, dat$y),
#     proc_sensspec(dat$x, dat$y),
#     times = 30, unit = "ms"
# )
# 
# n <- 1e7
# set.seed(123)
# dat <- data.frame(x = rnorm(n), y = sample(c(0:1), size = n, replace = TRUE))
# bench_1e7 <- microbenchmark::microbenchmark(
#     cutpointr(dat, x, y, pos_class = 1, neg_class = 0,
#               direction = ">=", metric = youden, break_ties = mean),
#     rocr_sensspec(dat$x, dat$y),
#     proc_sensspec(dat$x, dat$y),
#     times = 30, unit = "ms"
# )
# 
# results <- rbind(
#     data.frame(time = summary(bench_100)$median,
#                Solution = summary(bench_100)$expr,
#                n = 100),
#     data.frame(time = summary(bench_1000)$median,
#                Solution = summary(bench_1000)$expr,
#                n = 1000),
#     data.frame(time = summary(bench_10000)$median,
#                Solution = summary(bench_10000)$expr,
#                n = 10000),
#     data.frame(time = summary(bench_1e5)$median,
#                Solution = summary(bench_1e5)$expr,
#                n = 1e5),
#     data.frame(time = summary(bench_1e6)$median,
#                Solution = summary(bench_1e6)$expr,
#                n = 1e6),
#     data.frame(time = summary(bench_1e7)$median,
#                Solution = summary(bench_1e7)$expr,
#                n = 1e7)
# )
# results$Solution <- as.character(results$Solution)
# results$Solution[grep(pattern = "cutpointr", x = results$Solution)] <- "cutpointr"
# results$Solution[grep(pattern = "rocr", x = results$Solution)] <- "ROCR"
# results$Solution[grep(pattern = "optimal", x = results$Solution)] <- "OptimalCutpoints"
# results$Solution[grep(pattern = "proc", x = results$Solution)] <- "pROC"
# results$Solution[grep(pattern = "thres", x = results$Solution)] <- "ThresholdROC"
# 
# results$task <- "Cutpoint Estimation"

## ----echo = FALSE-------------------------------------------------------------
# These are the original results on our system
# dput(results)
results <- structure(list(time = c(4.5018015, 1.812802, 0.662101, 2.2887015, 
1.194301, 4.839401, 2.1764015, 0.981001, 45.0568005, 36.2398515, 
8.5662515, 5.667101, 2538.612001, 4.031701, 2503.8012505, 45.384501, 
43.118751, 37.150151, 465.003201, 607.023851, 583.0950005, 5467.332801, 
7850.2587, 7339.356101), Solution = c("cutpointr", "ROCR", "pROC", 
"OptimalCutpoints", "ThresholdROC", "cutpointr", "ROCR", "pROC", 
"OptimalCutpoints", "ThresholdROC", "cutpointr", "ROCR", "OptimalCutpoints", 
"pROC", "ThresholdROC", "cutpointr", "ROCR", "pROC", "cutpointr", 
"ROCR", "pROC", "cutpointr", "ROCR", "pROC"), n = c(100, 100, 
100, 100, 100, 1000, 1000, 1000, 1000, 1000, 10000, 10000, 10000, 
10000, 10000, 1e+05, 1e+05, 1e+05, 1e+06, 1e+06, 1e+06, 1e+07, 
1e+07, 1e+07), task = c("Cutpoint Estimation", "Cutpoint Estimation", 
"Cutpoint Estimation", "Cutpoint Estimation", "Cutpoint Estimation", 
"Cutpoint Estimation", "Cutpoint Estimation", "Cutpoint Estimation", 
"Cutpoint Estimation", "Cutpoint Estimation", "Cutpoint Estimation", 
"Cutpoint Estimation", "Cutpoint Estimation", "Cutpoint Estimation", 
"Cutpoint Estimation", "Cutpoint Estimation", "Cutpoint Estimation", 
"Cutpoint Estimation", "Cutpoint Estimation", "Cutpoint Estimation", 
"Cutpoint Estimation", "Cutpoint Estimation", "Cutpoint Estimation", 
"Cutpoint Estimation")), row.names = c(NA, -24L), class = "data.frame")

## ----eval = FALSE-------------------------------------------------------------
# # ROCR package
# rocr_roc <- function(x, class) {
#     pred <- ROCR::prediction(x, class)
#     perf <- ROCR::performance(pred, "sens", "spec")
#     return(NULL)
# }
# 
# # pROC package
# proc_roc <- function(x, class) {
#     r <- pROC::roc(class, x, algorithm = 2, levels = c(0, 1), direction = "<")
#     return(NULL)
# }

## ----eval = FALSE, echo = FALSE-----------------------------------------------
# n <- 100
# set.seed(123)
# dat <- data.frame(x = rnorm(n), y = sample(c(0:1), size = n, replace = TRUE))
# bench_100 <- microbenchmark::microbenchmark(
#     cutpointr::roc(dat, "x", "y", pos_class = 1,
#                    neg_class = 0, direction = ">="),
#     rocr_roc(dat$x, dat$y),
#     proc_roc(dat$x, dat$y),
#     times = 100, unit = "ms"
# )
# n <- 1000
# set.seed(123)
# dat <- data.frame(x = rnorm(n), y = sample(c(0:1), size = n, replace = TRUE))
# bench_1000 <- microbenchmark::microbenchmark(
#     cutpointr::roc(dat, "x", "y", pos_class = 1, neg_class = 0,
#                    direction = ">="),
#     rocr_roc(dat$x, dat$y),
#     proc_roc(dat$x, dat$y),
#     times = 100, unit = "ms"
# )
# n <- 10000
# set.seed(123)
# dat <- data.frame(x = rnorm(n), y = sample(c(0:1), size = n, replace = TRUE))
# bench_10000 <- microbenchmark::microbenchmark(
#     cutpointr::roc(dat, "x", "y", pos_class = 1, neg_class = 0,
#                    direction = ">="),
#     rocr_roc(dat$x, dat$y),
#     proc_roc(dat$x, dat$y),
#     times = 100, unit = "ms"
# )
# n <- 1e5
# set.seed(123)
# dat <- data.frame(x = rnorm(n), y = sample(c(0:1), size = n, replace = TRUE))
# bench_1e5 <- microbenchmark::microbenchmark(
#     cutpointr::roc(dat, "x", "y", pos_class = 1, neg_class = 0,
#                    direction = ">="),
#     rocr_roc(dat$x, dat$y),
#     proc_roc(dat$x, dat$y),
#     times = 100, unit = "ms"
# )
# n <- 1e6
# set.seed(123)
# dat <- data.frame(x = rnorm(n), y = sample(c(0:1), size = n, replace = TRUE))
# bench_1e6 <- microbenchmark::microbenchmark(
#     cutpointr::roc(dat, "x", "y", pos_class = 1, neg_class = 0,
#                    direction = ">="),
#     rocr_roc(dat$x, dat$y),
#     proc_roc(dat$x, dat$y),
#     times = 30, unit = "ms"
# )
# n <- 1e7
# set.seed(123)
# dat <- data.frame(x = rnorm(n), y = sample(c(0:1), size = n, replace = TRUE))
# bench_1e7 <- microbenchmark::microbenchmark(
#     cutpointr::roc(dat, "x", "y", pos_class = 1, neg_class = 0,
#                    direction = ">="),
#     rocr_roc(dat$x, dat$y),
#     proc_roc(dat$x, dat$y),
#     times = 30, unit = "ms"
# )
# 
# results_roc <- rbind(
#     data.frame(time = summary(bench_100)$median,
#                Solution = summary(bench_100)$expr,
#                n = 100),
#     data.frame(time = summary(bench_1000)$median,
#                Solution = summary(bench_1000)$expr,
#                n = 1000),
#     data.frame(time = summary(bench_10000)$median,
#                Solution = summary(bench_10000)$expr,
#                n = 10000),
#     data.frame(time = summary(bench_1e5)$median,
#                Solution = summary(bench_1e5)$expr,
#                n = 1e5),
#     data.frame(time = summary(bench_1e6)$median,
#                Solution = summary(bench_1e6)$expr,
#                n = 1e6),
#     data.frame(time = summary(bench_1e7)$median,
#                Solution = summary(bench_1e7)$expr,
#                n = 1e7)
# )
# results_roc$Solution <- as.character(results_roc$Solution)
# results_roc$Solution[grep(pattern = "cutpointr", x = results_roc$Solution)] <- "cutpointr"
# results_roc$Solution[grep(pattern = "rocr", x = results_roc$Solution)] <- "ROCR"
# results_roc$Solution[grep(pattern = "proc", x = results_roc$Solution)] <- "pROC"
# results_roc$task <- "ROC curve calculation"

## ----echo = FALSE-------------------------------------------------------------
# Our results
results_roc <- structure(list(time = c(0.7973505, 1.732651, 0.447701, 0.859301, 
2.0358515, 0.694802, 1.878151, 5.662151, 3.6580505, 11.099251, 
42.8208515, 35.3293005, 159.8100505, 612.471901, 610.4337005, 
2032.693551, 7806.3854515, 7081.897251), Solution = c("cutpointr", 
"ROCR", "pROC", "cutpointr", "ROCR", "pROC", "cutpointr", "ROCR", 
"pROC", "cutpointr", "ROCR", "pROC", "cutpointr", "ROCR", "pROC", 
"cutpointr", "ROCR", "pROC"), n = c(100, 100, 100, 1000, 1000, 
1000, 10000, 10000, 10000, 1e+05, 1e+05, 1e+05, 1e+06, 1e+06, 
1e+06, 1e+07, 1e+07, 1e+07), task = c("ROC curve calculation", 
"ROC curve calculation", "ROC curve calculation", "ROC curve calculation", 
"ROC curve calculation", "ROC curve calculation", "ROC curve calculation", 
"ROC curve calculation", "ROC curve calculation", "ROC curve calculation", 
"ROC curve calculation", "ROC curve calculation", "ROC curve calculation", 
"ROC curve calculation", "ROC curve calculation", "ROC curve calculation", 
"ROC curve calculation", "ROC curve calculation")), row.names = c(NA, 
-18L), class = "data.frame")

## ----echo = FALSE-------------------------------------------------------------
library(ggplot2)
results_all <- dplyr::bind_rows(results, results_roc)

ggplot(results_all, aes(x = n, y = time, col = Solution, shape = Solution)) +
  geom_point(size = 3) + geom_line() +
  scale_y_log10(breaks = c(0.5, 1, 2, 3, 5, 10, 25, 100, 250, 1000, 5000, 1e4, 15000)) +
  scale_x_log10(breaks = c(100, 1000, 1e4, 1e5, 1e6, 1e7)) +
  ylab("Median Time (milliseconds, log scale)") + xlab("Sample Size (log scale)") +
  theme_bw() +
  theme(legend.position = "bottom", 
        legend.key.width = unit(0.8, "cm"), 
        panel.spacing = unit(1, "lines")) +
  facet_grid(~task)

## ----echo = FALSE-------------------------------------------------------------
library(tidyr)
res_table <- tidyr::spread(results_all, Solution, time)
res_table <- dplyr::arrange(res_table, task)
knitr::kable(res_table)

Try the cutpointr package in your browser

Any scripts or data that you put into this service are public.

cutpointr documentation built on June 13, 2025, 5:09 p.m.