inst/doc/timing_R_pipelines.R

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(pipetime)
library(dplyr)
library(stringr)
library(ggplot2)

## -----------------------------------------------------------------------------
set.seed(123)
make_str <- function(n) paste(sample(letters, n, TRUE), collapse = "")

text_data <- data.frame(
  id = 1:1e5,
  email = paste0(
    sapply(sample(5:15, 1e5, TRUE), make_str),
    sample(c("@gmail.com", "@yahoo.com", "@hotmail.com"), 1e5, TRUE)
  ),
  phone = paste0(
    "(",
    sample(100:999, 1e5, TRUE),
    ") ",
    sample(100:999, 1e5, TRUE),
    "-",
    sample(1000:9999, 1e5, TRUE)
  ),
  text = sapply(sample(20:100, 1e5, TRUE), make_str)
)
head(text_data, n = 3)

## -----------------------------------------------------------------------------
library(dplyr)
library(pipetime)
options(pipetime.console = FALSE)
# Workflow A: Base R
wf_A <- text_data |>
  mutate(
    domain = sub(".*@", "", email),
    clean_phone = gsub("[^0-9]", "", phone),
    word_count = lengths(strsplit(text, " "))
  ) |>
  time_pipe("extract & clean", log = "base") |>
  filter(grepl("^[a-m]", text)) |>
  time_pipe("filter", log = "base") |>
  mutate(
    text_upper = toupper(text),
    truncated = substr(text, 1, 50)
  ) |>
  time_pipe("transform", log = "base")

# Workflow B: stringr (optimized)
wf_B <- text_data |>
  mutate(
    domain = str_extract(email, "(?<=@).*"),
    clean_phone = str_remove_all(phone, "[^0-9]"),
    word_count = str_count(text, "\\S+")
  ) |>
  time_pipe("extract & clean", log = "stringr") |>
  filter(str_detect(text, "^[a-m]")) |>
  time_pipe("filter", log = "stringr") |>
  mutate(
    text_upper = str_to_upper(text),
    truncated = str_sub(text, 1, 50)
  ) |>
  time_pipe("transform", log = "stringr")


## ----dpi = 500----------------------------------------------------------------
# Collect both logs
logs <- get_log() |>
  bind_rows(.id = "workflow") |>
  group_by(workflow) |>
  # Add a starting point
  group_modify(~ add_row(.x, duration = 0, label = "start", .before = 1)) |>
  mutate(step = factor(row_number()))

library(ggplot2)
logs |>
  ggplot(
    aes(
      x = step,
      y = duration,
      colour = workflow,
      group = workflow
    )
  ) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  geom_text(aes(label = label), vjust = -0.7, size = 3.5, show.legend = FALSE) +
  labs(
    x = "Step",
    y = "Cumulative time (sec)",
    title = "Base R vs stringr",
    colour = "Workflow"
  ) +
  theme_classic()

Try the pipetime package in your browser

Any scripts or data that you put into this service are public.

pipetime documentation built on Nov. 5, 2025, 5:40 p.m.