knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

Timings hydrorecipes vs recipes

Timings for the hydrorecipes package are prefaced with an "h". The first few comparisons include the R6 interface in hydrorecipes to check if there is a loss of speed compared to the standard API. Most users are likely to use the standard API so the remaining benchmarks only present that. Typical speed improvements are between 2-10x and memory consumption is typically half of the recipes package.

#| echo: false
#| warning: false
#| message: false

library(collapse)
library(gslnls)
library(data.table)
library(hydrorecipes)
library(bench)
library(tibble)
library(RcppRoll) # for recipes::step_window
library(splines2)

creating a recipe

#| echo: true
#| warning: false

relative <- TRUE
n <- c(1e2, 1e4, 5e6)
formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows)
    bench::mark(
      hrec1 = hydrorecipes:::Recipe$new(formula = formula, data = dat),
      hrec2 = recipe(formula = formula, data = dat),
      rec   = recipes::recipe(formula = formula, data = dat),
      check = FALSE,
      relative = relative
    )
  }
)

results

add a step

#| echo: true
#| warning: false

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows)
    bench::mark(
      hrec1 = hydrorecipes:::Recipe$new(formula = formula, data = dat)$
        add_step(hydrorecipes:::StepCenter$new(x)),
      hrec2 = recipe(formula = formula, data = dat) |>
        step_center(x),
      rec  = {recipes::recipe(formula = formula, data = dat) |>
          recipes::step_center(x)},
      check = FALSE,
      relative = relative
    )
  }
)

results

step_center prep

#| echo: true
#| warning: false

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows)
    hrec1 = hydrorecipes:::Recipe$new(formula = formula, data = dat)$
      add_step(hydrorecipes:::StepCenter$new(x))
    hrec2 = recipe(formula = formula, data = dat) |>
      step_center(x)      
    rec   = recipes::recipe(formula = formula, data = dat) |>
      recipes::step_center(x)
    bench::mark(
      hrec1$prep(),
      hrec2 |> prep(),
      rec |> recipes::prep(),
      check = FALSE,
      min_iterations = 1L,
      relative = relative
    )
  }
)

results

step_center prep and bake

#| echo: true
#| warning: false

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows)
    hrec1 = hydrorecipes:::Recipe$new(formula = formula, data = dat)$
      add_step(hydrorecipes:::StepCenter$new(x))
    hrec2 = recipe(formula = formula, data = dat) |>
      step_center(x)
    rec   = recipes::recipe(formula = formula, data = dat) |>
      recipes::step_center(x)

    bench::mark(
      hrec1$prep()$bake(),
      hrec2 |> prep() |> bake(),
      rec |> recipes::prep() |> recipes::bake(new_data = NULL),
      check = FALSE,
      min_iterations = 1L,
      relative = relative
    )
  }
)

results

step_center

#| echo: true
#| warning: false

formula <- as.formula(y~x+z)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows,
                      z = rnorm(rows))

    bench::mark(
      hrec = (recipe(formula = formula, data = dat) |>
                step_center(x) |>
                plate())[["x"]],
      rec  = (recipes::recipe(formula = formula, data = dat) |>
                recipes::step_center(x) |> 
                recipes::prep() |> 
                recipes::bake(new_data = NULL))[["x"]],
      check = TRUE,
      min_iterations = 1L,
      relative = relative
    )
  }
)

results

step_scale

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows,
                      z = rnorm(rows))
    bench::mark(
      hrec = (recipe(formula = formula, data = dat) |>
                step_scale(x, fun = fsd, n_sd = 2L) |>
                plate())[["x"]],
      rec  = (recipes::recipe(formula = formula, data = dat) |>
                recipes::step_scale(x, factor = 2L) |> 
                recipes::prep() |> 
                recipes::bake(new_data = NULL))[["x"]],
      check = TRUE,
      relative = relative,
      min_iterations = 1L
    )
  }
)

results

step_intercept

#| echo: true
#| warning: false

formula <- as.formula(y~x)
results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = rnorm(rows))
    bench::mark(
      hrec = (recipe(formula = formula, data = dat) |>
                step_intercept() |>
                plate("tbl"))[["intercept"]],
      rec = (recipes::recipe(formula = formula, data = dat) |>
               recipes::step_intercept() |> 
               recipes::prep() |> 
               recipes::bake(new_data = NULL))[["intercept"]],
      check = TRUE,
      relative = relative,
      min_iterations = 1L
    )

  }
)

results

step_normalize

#| echo: true
#| warning: false

formula <- as.formula(y~x+z)
results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = rnorm(rows),
                      z = rnorm(rows))

    bench::mark(
      hrec1 = (recipe(formula = formula, data = dat) |>
                 step_normalize(c(x, z, y)) |>
                 plate("tbl"))[, c("x", "z", "y")],

      hrec2 = (recipe(formula = formula, data = dat) |>
                 step_center(c(x, z, y)) |>
                 step_scale(c(x, z, y)) |>
                 plate("tbl"))[, c("x", "z", "y")],

      rec = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_normalize(x, y, z) |> 
        recipes::prep() |> 
        recipes::bake(new_data = NULL),

      relative = relative,
      min_iterations = 1L,
      check = TRUE
    )
  }
)

results

step_drop_columns

#| echo: true
#| warning: false

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = rnorm(rows),
                      z = rnorm(rows))
    bench::mark(
      hrec = recipe(formula = formula, data = dat)  |>
        step_drop_columns(z) |>
        plate("tbl"),
      rec = recipes::recipe(formula = formula, data = dat)  |>
        recipes::step_rm(z) |>
        recipes::prep() |>
        recipes::bake(new_data = NULL),
      check = TRUE,
      relative = relative
    )
  }
)

results

step_subset_na_omit

#| echo: true
#| warning: false

formula <- as.formula(y~x+z)

results <- bench::press(
  rows = c(1e6, 1e7),
  {
    dat <- tibble(x = rnorm(rows), 
                  z = rnorm(rows),
                  y = rnorm(rows))
    dat[1:5, "x"] <- NA_real_
    dat[100:150, "z"] <- NA_real_
    dat[10000:15000, "y"] <- NA_real_

    bench::mark(
      hrec1 = (recipe(formula = formula, data = dat) |>
                 step_subset_na_omit(terms = x) |>
                 prep() |>
                 bake())$get_result("tbl"),

      rec = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_naomit(x) |> 
        recipes::prep() |> 
        recipes::bake(new_data = NULL),

      relative = FALSE,
      min_iterations = 1L,
      check = TRUE
    )
  }
)

results

step_subset_rows

#| echo: true
#| warning: false

formula <- as.formula(y~x+z)

results <- bench::press(
  rows = c(1e6, 1e7),
  {
    dat <- tibble(x = rnorm(rows), 
                  z = rnorm(rows),
                  y = rnorm(rows))
    sub <- sample(1:rows, size = 5e5)

    bench::mark(
      hrec1 = (recipe(formula = formula, data = dat) |>
                 step_subset_rows(row_numbers = sub) |>
                 prep() |>
                 bake())$get_result("tbl"),

      rec = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_slice(sub) |> 
        recipes::prep() |> 
        recipes::bake(new_data = NULL),

      base = dat[sub, ],

      relative = FALSE,
      min_iterations = 1L,
      check = TRUE
    )
  }
)

results

step_subset_sample

#| echo: true
#| warning: false

formula <- as.formula(y~x+z)

results <- bench::press(
  rows = c(1e6, 1e7),
  {
    dat <- data.frame(x = rnorm(rows), 
                      z = rnorm(rows),
                      y = rnorm(rows))

    bench::mark(
      h <- {hrec1 = recipe(formula = formula, data = dat) |>
        step_subset_sample(size = 10000L) |>
        prep() |>
        bake()
      h = nrow(hrec1$get_result("tbl"))},

      rec = nrow(recipes::recipe(formula = formula, data = dat) |>
                   recipes::step_sample(size = 10000 / rows) |>
                   recipes::prep() |>
                   recipes::bake(new_data = NULL)),

      relative = FALSE,
      min_iterations = 1L,
      check = TRUE
    )
  }
)

results

step_cross_correlation

#| echo: true
#| warning: false


formula <- as.formula(y~x)
results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = rnorm(rows))

    bench::mark(
      hrec1 = recipe(formula = formula, data = dat) |>
        step_cross_correlation(c(x, z, y), lag_max = 1000) |>
        plate("tbl"),

      min_iterations = 1L,
    )
  }
)

results


x <- rnorm(5e5)
y <- rnorm(5e5)
lag_max <- 1000
results <- bench::mark(fft_ccf  <- hydrorecipes:::convolve_correlation(x, y, lag_max),
                       ccf_base <- as.numeric(ccf(x, y, lag.max = lag_max, plot = FALSE)$acf),
                       min_iterations = 1L,
                       check = TRUE
)

results

step_lag

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = as.numeric(1:rows),
                      z = rnorm(rows))
    bench::mark(
      hrec1 = unname(recipe(formula = formula, data = dat) |>
                       step_lead_lag(x, lag = 1:30) |>
                       plate("tbl")),
      rec   = unname(recipes::recipe(formula = formula, data = dat) |>
                       recipes::step_lag(x, lag = 1:30) |> 
                       recipes::prep() |> 
                       recipes::bake(new_data = NULL)),
      check = TRUE,
      relative = relative,
      min_iterations = 1L
    )
  }
)

results

step_distributed_lag

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = c(5e5, 5e6, 1e7),
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows,
                      z = rnorm(rows))
    bench::mark(
      hrec = recipe(formula = formula, data = dat) |>
        step_distributed_lag(x, knots = log_lags(5, 86401)) |>
        prep() |> bake(),
      check = FALSE,
      relative = FALSE,
      min_iterations = 1L
    )
  }
)

results

step_harmonic

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows,
                      z = rnorm(rows))
    bench::mark(
      hrec = recipe(formula = formula, data = dat) |>
        step_harmonic(x, 
                      frequency = c(1.0, 2.0, 3.0), 
                      cycle_size = 0.1, 
                      starting_value = 0.0) |>
        plate("tbl"),
      rec  = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_harmonic(x, 
                               frequency = c(1.0, 2.0, 3.0), 
                               cycle_size = 0.1, 
                               starting_val = 0.0,
                               keep_original_cols = TRUE) |> 
        recipes::prep() |> 
        recipes::bake(new_data = NULL),

      # sin and cos terms order is different
      check = FALSE,
      relative = relative,
      min_iterations = 1L
    )
  }
)
results

# rows <- 1e6
# dat <- data.frame(x = rnorm(rows), 
#                   y = 1:rows,
#                   z = rnorm(rows))
# bench::mark(
#   
#   {hrec = recipe(formula = formula, data = dat) |>
#     step_harmonic(x, 
#                   frequency = c(1.0, 2.0, 3.0), 
#                   cycle_size = 0.1, 
#                   starting_value = 0.0,
#                   varying = "cycle_size") |>
#         step_harmonic(x, 
#                   frequency = c(1.0, 2.0, 3.0), 
#                   cycle_size = 0.1, 
#                   starting_value = 0.0) |>
#     step_intercept() |>
#     step_center(x) |>
#     prep() |>
#     bake()}, 
#   
#   {hrec$steps[[2]]$update_step("cycle_size", 0.2)
#     hrec$bake()
#   },
#   check = FALSE
# )

step_pca

#| echo: true
#| warning: false
set.seed(1)
formula <- as.formula(x~a + b + c + d + e + f + g + h + i + j + k + l)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      a = rnorm(rows),
                      b = rnorm(rows),
                      c = rnorm(rows),
                      d = rnorm(rows),
                      e = rnorm(rows),
                      f = rnorm(rows),
                      g = rnorm(rows),
                      h = rnorm(rows),
                      i = rnorm(rows),
                      j = rnorm(rows),
                      k = rnorm(rows),
                      l = rnorm(rows)
    )
    bench::mark(
      hrec1 = recipe(formula = formula, data = dat)|>
        step_pca(c(a,b,c,d,e,f,g,h,i,j,k,l), n_comp = 10L) |>
        plate(),
      hrec2 = recipe(formula = formula, data = dat)|>
        step_pca(c(a,b,c,d,e,f,g,h,i,j,k,l), n_comp = 5L) |>
        plate(),
      hrec3 = recipe(formula = formula, data = dat)|>
        step_pca(c(a,b,c,d,e,f,g,h,i,j,k,l),
                 n_comp = 10L,
                 center = FALSE,
                 scale = FALSE) |>
        plate(),
      hrec4 = recipe(formula = formula, data = dat)|>
        step_pca(c(a,b,c,d,e,f,g,h,i,j,k,l),
                 n_comp = 5L,
                 center = FALSE,
                 scale = FALSE) |>
        plate(),

      rec1  = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_pca(recipes::all_predictors(),
                          num_comp = 10L,
                          options = list(center = TRUE, scale. = TRUE))|> 
        recipes::prep() |> 
        recipes::bake(new_data = NULL),
      rec2  = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_pca(recipes::all_predictors(),
                          num_comp = 5L,
                          options = list(center = TRUE, scale. = TRUE)) |> 
        recipes::prep() |> 
        recipes::bake(new_data = NULL),
      rec3  = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_pca(recipes::all_predictors(),
                          num_comp = 10L) |> 
        recipes::prep() |> 
        recipes::bake(new_data = NULL),
      rec4  = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_pca(recipes::all_predictors(),
                          num_comp = 5L) |> 
        recipes::prep() |> 
        recipes::bake(new_data = NULL),
      check = FALSE,
      relative = relative,
      min_iterations = 1L
    )
  }
)


print(results, n = 100)

step_dummy

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = qF(sample(1:10, rows, replace = TRUE)),
                      z = rnorm(rows))
    bench::mark(
      hrec = unname(recipe(formula = formula, data = dat) |>
                      step_dummy(y) |>
                      plate("tbl"))[,3:11],
      rec  = unname(recipes::recipe(formula = formula, data = dat) |>
                      recipes::step_dummy(y, keep_original_cols = TRUE) |>
                      recipes::prep() |>
                      recipes::bake(new_data = NULL))[,3:11],
      check = TRUE,
      relative = relative,
      min_iterations = 1L
    )
  }
)

results

step_find_interval

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows,
                      z = rnorm(rows))
    bench::mark(
      hrec = recipe(formula = formula, data = dat) |>
        step_find_interval(x, vec = c(-0.1, 0, 0.1)) |>
        plate("tbl"),
      rec  = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_cut(x, breaks = c(-0.1, 0, 0.1)) |>
        recipes::prep() |>
        recipes::bake(new_data = NULL),
      check = FALSE,
      relative = relative,
      min_iterations = 1L
    )
  }
)

results

step_varying

#| echo: true
#| warning: false

formula <- as.formula(y~x+z)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rep(1, rows), 
                      y = 1:rows,
                      z = rnorm(rows))
    bench::mark(
      hrec = recipe(formula = formula, data = dat) |>
        step_varying(c(x, y, z)) |>
        plate("tbl"),
      rec  = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_zv(x, y, z) |>
        recipes::prep() |>
        recipes::bake(new_data = NULL),
      check = TRUE,
      relative = relative,
      min_iterations = 1L
    )
  }
)

results

step_kernel_filter

step_kernel_filter uses an Fast Fourier Transform (FFT) based convolution instead of an explicit sliding window. This should be much faster for large datasets and particularly when the kernel size is also large.

#| echo: true
#| warning: false

formula <- as.formula(y~x+z)

results <- bench::press(
  rows = c(2e4, 2e5),
  {
    dat <- data.frame(x = rep(1, rows), 
                      y = 1:rows,
                      z = cumsum(rnorm(rows)))
    bench::mark(
      hrec = unname((recipe(formula = formula, data = dat) |>
                       step_kernel_filter(z, kernel = list(rep(1, 5001L)/5001L), align = "center") |>
                       plate("tbl"))[10000, "kernel_filter_z"]),
      {rec  = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_window(z, size = 5001L, statistic = "mean") |>
        recipes::prep() |>
        recipes::bake(new_data = NULL)
      unname(rec[10000, "z"])},

      min_iterations = 1L,
      relative = relative,
      check = TRUE
    )
  }
)

results

step_convolve_gamma

#| echo: true
#| warning: false

formula <- as.formula(y~x+z)

results <- bench::press(
  rows = c(2e4, 2e6),
  {
    dat <- data.frame(x = rep(1, rows), 
                      y = 1:rows,
                      z = cumsum(rnorm(rows)))
    bench::mark(
      hrec = (recipe(formula = formula, data = dat) |>
                step_convolve_gamma(z, amplitude = 1, k = 1, theta = 1) |>
                plate("tbl")),
      min_iterations = 1,
      relative = FALSE,
      check = TRUE
    )
  }
)

results

step_compare_columns

multiple steps

step_harmonic dominates these results.

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = as.numeric(1:rows), 
                      y = 1:rows)
    bench::mark(
      hrec = recipe(formula = formula, data = dat) |>
        step_lead_lag(x, lag = 1:20) |>
        step_harmonic(x, 
                      frequency = c(1.0, 2.0, 3.0), 
                      cycle_size = 0.1, 
                      starting_value = 0.0) |>
        step_center(x) |> 
        plate("tbl"),
      rec  = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_lag(x, lag = 1:20, keep_original_cols = TRUE) |>
        recipes::step_harmonic(x, 
                               frequency = c(1.0, 2.0, 3.0), 
                               cycle_size = 0.1, 
                               starting_val = 0.0,
                               keep_original_cols = TRUE) |>
        recipes::step_center(x) |> 
        recipes::prep() |> 
        recipes::bake(new_data = NULL),
      check = FALSE,
      relative = relative,
      min_iterations = 1
    )
  }
)

results

step_spline_b

#| echo: true
#| warning: false

formula <- as.formula(y~x)
n <- c(100, 1e4, 5e6)
results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows),
                      y = 1:rows)
    bench::mark(
      hrec = unname(recipe(formula = formula, data = dat) |>
                      step_spline_b(x, df = 13) |>
                      plate("tbl")),
      rec  = unname(recipes::recipe(formula = formula, data = dat) |>
                      recipes::step_spline_b(x, deg_free = 13, keep_original_cols = TRUE)|> 
                      recipes::prep() |> 
                      recipes::bake(new_data = NULL)),
      check = TRUE,
      relative = relative,
      min_iterations = 2
    )
  }
)

results

step_spline_n

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows),
                      y = 1:rows)
    bench::mark(
      hrec = unname(recipe(formula = formula, data = dat) |>
                      step_spline_n(x, df = 11L) |>
                      plate("tbl")),
      rec  = unname(recipes::recipe(formula = formula, data = dat) |>
                      recipes::step_spline_natural(x, deg_free = 11L, keep_original_cols = TRUE)|> 
                      recipes::prep() |> 
                      recipes::bake(new_data = NULL)),
      check = TRUE,
      relative = relative,
      min_iterations = 2L
    )
  }
)

results

step_add_noise

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = as.numeric(1:rows),
                      y = rep(0.01, rows))
    bench::mark(
      hrec1 = recipe(formula = formula, data = dat) |>
        step_add_noise(y) |>
        plate("dt"))
  }
)

results

step_aquifer_grf & step_aquifer_theis

The Theis solution is a subset of the grf solution.

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = as.numeric(1:rows),
                      y = rep(0.01, rows))
    bench::mark(
      hrec1 = recipe(formula = formula, data = dat) |>
        step_aquifer_grf(time = x, flow_rate = y) |>
        plate("dt"),
      hrec2 = recipe(formula = formula, data = dat) |>
        step_aquifer_theis(time = x, flow_rate = y) |>
        plate("dt"),
      check = TRUE)
  }
)

results

step_aquifer_theis_aniso

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = as.numeric(1:rows),
                      y = rep(0.01, rows))
    bench::mark(
      hrec1 = unname(recipe(formula = formula, data = dat) |>
                       step_aquifer_theis_aniso(time = x, 
                                                flow_rate = y,
                                                distance_x = 0, 
                                                distance_y = 100,
                                                hydraulic_conductivity_major = 1e-4,
                                                hydraulic_conductivity_minor = 1e-4) |>
                       plate("dt")),
      hrec2 = unname(recipe(formula = formula, data = dat) |>
                       step_aquifer_theis(time = x, flow_rate = y,) |>
                       plate("dt")),
      check = TRUE)
  }
)

results

step_aquifer_leaky

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = as.numeric(1:rows),
                      y = rep(0.01, rows))
    bench::mark(
      hrec1 = unname(recipe(formula = formula, data = dat) |>
                       step_aquifer_leaky(time = x,
                                          flow_rate = y,
                                          leakage = 100000000) |>
                       plate("dt")),
      hrec2 = unname(recipe(formula = formula, data = dat) |>
                       step_aquifer_theis(time = x,
                                          flow_rate = y) |>
                       plate("dt")),
      check = TRUE)
  }
)

results

step_aquifer_patch

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = c(1e4, 1e5, 1e6),
  {
    dat <- data.frame(x = as.numeric(1:rows),
                      y = rep(0.01, rows))
    bench::mark(
      hrec1 = (recipe(formula = formula, data = dat) |>
                 step_aquifer_grf(time = x, flow_rate = y) |>
                 plate("dt")),
      hrec3 = (recipe(formula = formula, data = dat) |>
                 step_aquifer_patch(time = x,
                                    flow_rate = 0.01,
                                    thickness = 1.0,
                                    radius = 100.0,
                                    radius_patch = 200.0,
                                    specific_storage_inner = 1e-6,
                                    specific_storage_outer = 1e-6,
                                    hydraulic_conductivity_inner = 1e-4,
                                    hydraulic_conductivity_outer = 1e-4,
                                    n_stehfest = 8L
                 ) |>
                 plate("dt")),
      check = FALSE,
      relative = relative)
  }
)


results

step_aquifer_wellbore_storage

#| echo: true
#| warning: false

results <- bench::press(
  rows = c(1e3, 1e4, 1e5),
  {
    dat <- data.frame(x = as.numeric(1:rows), 
                      y = as.numeric(1:rows))
    bench::mark(
      hrec1 = unname(recipe(formula = formula, data = dat) |>
                       step_aquifer_wellbore_storage(time = x,
                                                     flow_rate = 0.01,
                                                     hydraulic_conductivity = 1e-4,
                                                     specific_storage = 1e-6, 
                                                     radius = 100,
                                                     radius_casing = 1e-15,
                                                     radius_well = 1e-15, n_terms = 18) |>
                       plate("dt")),
      hrec2 = unname(recipe(formula = formula, data = dat) |>
                       step_aquifer_theis(time = x,
                                          flow_rate = y) |>
                       plate("dt")),
      check = FALSE
    )
  }
)

results

step_vadose_weeks

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = as.numeric(1:rows), 
                      y = as.numeric(1:rows))
    bench::mark(
      hrec1 = (recipe(formula = formula, data = dat) |>
                 step_vadose_weeks(time = x, 
                                   air_diffusivity = 0.8, 
                                   thickness = 5, 
                                   precision = 1e-12) |>
                 plate("dt")),
      check = FALSE,
      min_iterations = 2
    )
  }
)

results

step_transport_ogata_banks

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(expand.grid(as.numeric(1:rows), 
                                  as.numeric(1:10)))
    names(dat) <- c('x', 'y')
    bench::mark(
      hrec1 = (recipe(formula = formula, data = dat) |>
                 step_transport_ogata_banks(time = x,
                                            distance = y) |>
                 plate("dt")),
      check = FALSE,
      min_iterations = 2
    )
  }
)

results

step_transport_fractures_solute

#| echo: true
#| warning: false

formula <- as.formula(~time+z+x)

dat <- setDT(expand.grid(10^(3:8),
                         seq(0.0, 10, 1),
                         c(0.0)))

names(dat) <- c("time", "z", "x")

results <- 
  bench::mark(
    hrec1 = recipe(formula = formula, data = dat) |>
      step_transport_fractures_solute(time = time,
                                      distance_fracture = z,
                                      distance_matrix = x) |>
      plate("dt"),
    check = FALSE,
    min_iterations = 2
  )

results

step_transport_fractures_heat

#| echo: true
#| warning: false

formula <- as.formula(~time+z+x)

dat <- setDT(expand.grid(10^(3:8),
                         seq(0.0, 100, 1),
                         c(0.0, 0.05)))

names(dat) <- c("time", "z", "x")

results <- 
  bench::mark(
    hrec1 = recipe(formula = formula, data = dat) |>
      step_transport_fractures_heat(time = time,
                                    distance_fracture = z,
                                    distance_matrix = x) |>
      plate("dt"),
    check = FALSE,
    min_iterations = 2
  )

results

step_fft_pgram, step_fft_welch

#| echo: true
#| warning: false

formula <- as.formula(y~x + z)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), y = rnorm(rows), z = rnorm(rows),
                      q = rnorm(rows), r = rnorm(rows), s = rnorm(rows))
    bench::mark(
      hrec1 = recipe(formula = formula, data = dat) |>
        step_fft_pgram(c(x, y), 
                       3,
                       TRUE,
                       TRUE,
                       FALSE,
                       0.1, 
                       time_step = 1) |> 
        prep() |>
        bake(),
      hrec2 = recipe(formula = formula, data = dat) |>
        step_fft_pgram(c(x, y), 
                       3,
                       TRUE,
                       TRUE,
                       TRUE,
                       0.1, 
                       time_step = 1) |> 
        prep() |>
        bake(),
      hrec3 = recipe(formula = formula, data = dat) |>
        step_fft_welch(c(x, y),
                       length_subset =  nrow(dat) / 10,
                       overlap = 0.60,
                       window = window_nuttall(nrow(dat) / 10),
                       time_step = 1) |>
        prep() |>
        bake(),
      check = FALSE,
      min_iterations = 1
    )
  }
)

results

step_fft_transfer_welch and step_fft_transfer_pgram, step_fft_transfer_experimental

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = c(1e5, 1e6, 1e7),
  {
    dat <- data.frame(x = rnorm(rows), y = rnorm(rows))
    bench::mark(
      hrec1 = recipe(formula = formula, data = dat) |>
        step_fft_transfer_pgram(c(x, y), 
                                3,
                                TRUE,
                                TRUE,
                                0.1, 
                                time_step = 1) |> 
        prep() |>
        bake(),
      hrec2 = recipe(formula = formula, data = dat) |>
        step_fft_transfer_welch(c(x, y),
                                length_subset =  nrow(dat) / 10,
                                overlap = 0.60,
                                window = window_nuttall(nrow(dat) / 10), 
                                time_step = 1) |> 
        prep() |>
        bake(),
      hrec3 <- recipe(formula = formula, data = dat) |>
        step_fft_transfer_experimental(c(x, y), 
                                       spans = 3, 
                                       taper = 0.1, 
                                       n_groups = 300,
                                       time_step = 1) |>
        prep() |>
        bake(),
      check = FALSE,
      min_iterations = 1
    )
  }
)

results

step_ols

#| echo: true
#| warning: false

formula <- as.formula(y~.)


results <- bench::press(
  rows = c(1e5, 1e6, 1e7),
  {
    dat <- data.frame(
      y = rnorm(rows),
      x = rnorm(rows), 
      z = rnorm(rows),
      a = rnorm(rows),
      b = rnorm(rows),
      d = rnorm(rows),
      e = rnorm(rows),
      f = rnorm(rows),
      g = rnorm(rows))
    m <- qM(dat)
    bench::mark(
      hrec1 = recipe(formula = formula, data = dat) |>
        step_ols(formula = as.formula(y~.), 
                 do_response = FALSE) |>
        prep() |>
        bake(),
      hrec2 = recipe(formula = formula, data = dat) |>
        step_ols(formula = as.formula(y~.), 
                 do_response = TRUE) |>
        prep() |>
        bake(),
      lm = lm(y~. - 1, dat),
      lm.fit(x = m[, c(2:ncol(dat))], y = m[, 1]),
      check = FALSE,
      relative = FALSE
    )
  }
)


results


# formula <- as.formula(y~x+z)
# 
# 
# results <- bench::press(
#   rows = n,
#   {
#     dat <- data.frame(x = rnorm(rows), 
#                       y = rnorm(rows),
#                       z = rnorm(rows))
#     bench::mark(
#       hrec = recipe(formula = formula, data = dat) |>
#         step_intercept() |>
#         step_nls(formula = as.formula(y~.)) |>
#         prep() |>
#         bake(),
#       check = FALSE,
#       relative = FALSE
#     )
#   }
# )

step_nls

#| echo: true
#| warning: false


n0 <- 5e5
n <- 2e4
n2 <- 1e4
b <- cumsum(rnorm(n0))
b <- b - mean(b)
max_t <- 720 * ceiling(2.554)
a <- hydrorecipes:::convolve_overlap_save(x = b,
                                          y = hydrorecipes:::gamma_3(0:max_t, 0.816, 9.221, 2.554),
                                          0)

max_t <- 720 * ceiling(2.554)

dat <- data.frame(a = a, b = b)
formula <- formula(a~b)

# for gsl_nls
f <- function(z, x) {
  max_t <- 720 * ceiling(z[3])
  hydrorecipes:::convolve_overlap_save(x = x,
                                       y = hydrorecipes:::gamma_3(0:max_t, z[1], z[2], z[3]),
                                       align = 0)[-(1:7200)]
}


results <- bench::mark(
  gsl_fun <- unname(round(coef(gsl_nls(
    fn = f,                   ## model function      
    y = a[-(1:7200)],           ## response vector 
    x = b,
    start = c(A = 0.5, n = 2.0, a = 2.0),  ## starting values
    lower = c(A = 0.01, n = 1.0, a = 1.0), 
    upper = c(A = 1.0, n = 10.0, a = 10.0),
    control = gsl_nls_control(xtol = 1e-8),
    trace = FALSE,
    algorithm = "lm"               ## algorithm
  )), 3)), 
  h_1 = {h = recipe(formula = formula, data = dat) |>
    step_convolve_gamma(b, amplitude = 0.5, k = 2.0, theta = 2.0, 
                        varying = list(name = c("amplitude","k", "theta"),
                                       start = c(0.5, 2.0, 2.0),
                                       lower = c(0.01, 1.0, 1.0),
                                       upper = c(1.0, 10.0, 10.0))) |>
    step_nls(formula = formula(a~b), n_subset = 1L, 
             trace = FALSE,
             algorithm = "lm",
             control = gsl_nls_control(xtol = 1e-8))

  h$prep()$bake()
  unname(round(coef(h$steps[[3]]$fit), 3))},

  h_10 = {h = recipe(formula = formula, data = dat) |>
    step_convolve_gamma(b, amplitude = 0.5, k = 2.0, theta = 2.0, 
                        varying = list(name = c("amplitude","k", "theta"),
                                       start = c(0.5, 2.0, 2.0),
                                       lower = c(0.01, 1.0, 1.0),
                                       upper = c(1.0, 10.0, 10.0))) |>
    step_nls(formula = formula(a~b), n_subset = 10L, 
             trace = FALSE,
             algorithm = "lm",
             control = gsl_nls_control(xtol = 1e-8))

  h$prep()$bake()
  unname(round(coef(h$steps[[3]]$fit), 3))},

  h_100 = {h = recipe(formula = formula, data = dat) |>
    step_convolve_gamma(b, amplitude = 0.5, k = 2.0, theta = 2.0, 
                        varying = list(name = c("amplitude","k", "theta"),
                                       start = c(0.5, 2.0, 2.0),
                                       lower = c(0.01, 1.0, 1.0),
                                       upper = c(1.0, 10.0, 10.0))) |>
    step_nls(formula = formula(a~b), n_subset = 100L, 
             trace = FALSE,
             algorithm = "lm",
             control = gsl_nls_control(xtol = 1e-8))

  h$prep()$bake()
  unname(round(coef(h$steps[[3]]$fit), 3))},

  check = TRUE
)

results

step_ols_gap_fill

#| echo: true
#| warning: false

set.seed(123)
n <- 100000
frm <- formula(x ~ y + z)


x <- cumsum(rnorm(n))
dat <- data.table(x = x, y = x, z = as.numeric(1:n))
dat[, x := x + c(rep(20, n/2), rep(0, n/2))]
dat[, x := x + 3.0 * sin(z * 1/n)]
tmp <- copy(dat$x)

# Set value to NA.  These values will be estimated.
dat[60000:70000, x := NA_real_]

dat <- unclass(dat)

bench::mark(
  {h = recipe(formula = frm, data = dat) |>
    step_find_interval(z, vec = c(0, n/2, n)) |>
    step_intercept() |>
    step_spline_b(z, df = 4) |>
    step_drop_columns(z)

  hrec = recipe(formula = frm, data = dat) |>
    step_ols_gap_fill(c(x, y, z), recipe = h) |>
    prep() |>
    bake()},
  check = FALSE
)

check

step_check_spacing

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows),
                      y = 1:rows)
    dat[9:50, "x"] <- NA
    dat[9L, "y"] <- NA

    bench::mark(
      hrec1 = recipe(formula = formula, data = dat) |>
        step_check_spacing(y) |>
        step_check_na(y) |>
        prep() |>
        bake(),
      hrec2 =recipe(formula = formula, data = dat) |>
        step_check_spacing(x) |>
        step_check_na(x) |>
        prep() |>
        bake(),
      check = FALSE,
      relative = FALSE,
      min_iterations = 2
    )
  }
)

results
sessionInfo()


jkennel/hydrorecipes documentation built on April 17, 2025, 4 p.m.