Nothing
knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library(pipetime) library(dplyr) library(stringr) library(ggplot2)
pipetime enables inline timing of R pipelines (|>), helping identify performance bottlenecks and compare different approaches without disrupting your workflow.
We illustrate this with a text processing example, comparing base R string functions against the optimized stringr package for common data cleaning tasks.
Workflow A 🐢 : Uses base R string functions (gsub, substr, grepl).
Workflow B 🚀: Uses stringr's optimised functions.
set.seed(123) make_str <- function(n) paste(sample(letters, n, TRUE), collapse = "") text_data <- data.frame( id = 1:1e5, email = paste0( sapply(sample(5:15, 1e5, TRUE), make_str), sample(c("@gmail.com", "@yahoo.com", "@hotmail.com"), 1e5, TRUE) ), phone = paste0( "(", sample(100:999, 1e5, TRUE), ") ", sample(100:999, 1e5, TRUE), "-", sample(1000:9999, 1e5, TRUE) ), text = sapply(sample(20:100, 1e5, TRUE), make_str) ) head(text_data, n = 3)
We use the log argument so each workflow stores its timings separately.
library(dplyr) library(pipetime) options(pipetime.console = FALSE) # Workflow A: Base R wf_A <- text_data |> mutate( domain = sub(".*@", "", email), clean_phone = gsub("[^0-9]", "", phone), word_count = lengths(strsplit(text, " ")) ) |> time_pipe("extract & clean", log = "base") |> filter(grepl("^[a-m]", text)) |> time_pipe("filter", log = "base") |> mutate( text_upper = toupper(text), truncated = substr(text, 1, 50) ) |> time_pipe("transform", log = "base") # Workflow B: stringr (optimized) wf_B <- text_data |> mutate( domain = str_extract(email, "(?<=@).*"), clean_phone = str_remove_all(phone, "[^0-9]"), word_count = str_count(text, "\\S+") ) |> time_pipe("extract & clean", log = "stringr") |> filter(str_detect(text, "^[a-m]")) |> time_pipe("filter", log = "stringr") |> mutate( text_upper = str_to_upper(text), truncated = str_sub(text, 1, 50) ) |> time_pipe("transform", log = "stringr")
# Collect both logs logs <- get_log() |> bind_rows(.id = "workflow") |> group_by(workflow) |> # Add a starting point group_modify(~ add_row(.x, duration = 0, label = "start", .before = 1)) |> mutate(step = factor(row_number())) library(ggplot2) logs |> ggplot( aes( x = step, y = duration, colour = workflow, group = workflow ) ) + geom_line(linewidth = 1) + geom_point(size = 3) + geom_text(aes(label = label), vjust = -0.7, size = 3.5, show.legend = FALSE) + labs( x = "Step", y = "Cumulative time (sec)", title = "Base R vs stringr", colour = "Workflow" ) + theme_classic()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.