knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

Timings hydrorecipes vs recipes

Timings for the hydrorecipes package are prefaced with an "h". The first few comparisons include the R6 interface in hydrorecipes to check if there is a loss of speed compared to the standard API. Most users are likely to use the standard API so the remaining benchmarks only present that. Typical speed improvements are between 2-10x and memory consumption is typically half of the recipes package.

#| echo: false
#| warning: false
#| message: false

library(collapse)
library(data.table)
library(hydrorecipes)
library(bench)
library(tibble)
library(RcppRoll) # for recipes::step_window

creating a recipe

#| echo: true
#| warning: false

relative <- TRUE
n <- c(1e2, 1e4, 5e6)
formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows)
    bench::mark(
      hrec1 = hydrorecipes:::Recipe$new(formula = formula, data = dat),
      hrec2 = recipe(formula = formula, data = dat),
      rec   = recipes::recipe(formula = formula, data = dat),
      check = FALSE,
      relative = relative
    )
  }
)

results

add a step

#| echo: true
#| warning: false

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows)
    bench::mark(
      hrec1 = hydrorecipes:::Recipe$new(formula = formula, data = dat)$
        add_step(hydrorecipes:::StepCenter$new(x)),
      hrec2 = recipe(formula = formula, data = dat) |>
        step_center(x),
      rec  = {recipes::recipe(formula = formula, data = dat) |>
          recipes::step_center(x)},
      check = FALSE,
      relative = relative
    )
  }
)

results

step_center prep

#| echo: true
#| warning: false

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows)
    hrec1 = hydrorecipes:::Recipe$new(formula = formula, data = dat)$
      add_step(hydrorecipes:::StepCenter$new(x))
    hrec2 = recipe(formula = formula, data = dat) |>
      step_center(x)      
    rec   = recipes::recipe(formula = formula, data = dat) |>
      recipes::step_center(x)
    bench::mark(
      hrec1$prep(),
      hrec2 |> prep(),
      rec |> recipes::prep(),
      check = FALSE,
      min_iterations = 1L,
      relative = relative
    )
  }
)

results

step_center prep and bake

#| echo: true
#| warning: false

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows)
    hrec1 = hydrorecipes:::Recipe$new(formula = formula, data = dat)$
      add_step(hydrorecipes:::StepCenter$new(x))
    hrec2 = recipe(formula = formula, data = dat) |>
      step_center(x)
    rec   = recipes::recipe(formula = formula, data = dat) |>
      recipes::step_center(x)

    bench::mark(
      hrec1$prep()$bake(),
      hrec2 |> prep() |> bake(),
      rec |> recipes::prep() |> recipes::bake(new_data = NULL),
      check = FALSE,
      min_iterations = 1L,
      relative = relative
    )
  }
)

results

step_center

#| echo: true
#| warning: false

formula <- as.formula(y~x+z)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows,
                      z = rnorm(rows))

    bench::mark(
      hrec = (recipe(formula = formula, data = dat) |>
                step_center(x) |>
                plate())[["x"]],
      rec  = (recipes::recipe(formula = formula, data = dat) |>
                recipes::step_center(x) |> 
                recipes::prep() |> 
                recipes::bake(new_data = NULL))[["x"]],
      check = TRUE,
      min_iterations = 1L,
      relative = relative
    )
  }
)

results

step_scale

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows,
                      z = rnorm(rows))
    bench::mark(
      hrec = (recipe(formula = formula, data = dat) |>
        step_scale(x, fun = fsd, n_sd = 2L) |>
        plate())[["x"]],
      rec  = (recipes::recipe(formula = formula, data = dat) |>
        recipes::step_scale(x, factor = 2L) |> 
        recipes::prep() |> 
        recipes::bake(new_data = NULL))[["x"]],
      check = TRUE,
      relative = relative,
      min_iterations = 1L
    )
  }
)

results

step_intercept

#| echo: true
#| warning: false

formula <- as.formula(y~x)
results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = rnorm(rows))
    bench::mark(
    hrec = (recipe(formula = formula, data = dat) |>
      step_intercept() |>
      plate("tbl"))[["intercept"]],
    rec = (recipes::recipe(formula = formula, data = dat) |>
      recipes::step_intercept() |> 
      recipes::prep() |> 
      recipes::bake(new_data = NULL))[["intercept"]],
      check = TRUE,
      relative = relative,
      min_iterations = 1L
    )

  }
)

results

step_normalize

#| echo: true
#| warning: false

formula <- as.formula(y~x+z)
results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = rnorm(rows),
                      z = rnorm(rows))

    bench::mark(
      hrec1 = (recipe(formula = formula, data = dat) |>
        step_normalize(c(x, z, y)) |>
        plate("tbl"))[, c("x", "z", "y")],

      hrec2 = (recipe(formula = formula, data = dat) |>
        step_center(c(x, z, y)) |>
        step_scale(c(x, z, y)) |>
        plate("tbl"))[, c("x", "z", "y")],

      rec = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_normalize(x, y, z) |> 
        recipes::prep() |> 
        recipes::bake(new_data = NULL),

      relative = relative,
      min_iterations = 1L,
      check = TRUE
    )
  }
)

results

step_lag

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = as.numeric(1:rows),
                      z = rnorm(rows))
    bench::mark(
      hrec1 = unname(recipe(formula = formula, data = dat) |>
                       step_lead_lag(x, lag = 1:30) |>
                       plate("tbl")),
      rec   = unname(recipes::recipe(formula = formula, data = dat) |>
                       recipes::step_lag(x, lag = 1:30) |> 
                       recipes::prep() |> 
                       recipes::bake(new_data = NULL)),
      check = TRUE,
      relative = relative,
      min_iterations = 1L
    )
  }
)

results

step_distributed_lag

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = c(5e5, 5e6, 1e7),
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows,
                      z = rnorm(rows))
    bench::mark(
      hrec = recipe(formula = formula, data = dat) |>
        step_distributed_lag(x, knots = log_lags(5, 86401)) |>
        prep() |> bake(),
      check = FALSE,
      relative = FALSE,
      min_iterations = 1L
    )
  }
)

results

step_harmonic

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows,
                      z = rnorm(rows))
    bench::mark(
      hrec = recipe(formula = formula, data = dat) |>
        step_harmonic(x, 
                      frequency = c(1.0, 2.0, 3.0), 
                      cycle_size = 0.1, 
                      starting_value = 0.0) |>
        plate("tbl"),
      rec  = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_harmonic(x, 
                               frequency = c(1.0, 2.0, 3.0), 
                               cycle_size = 0.1, 
                               starting_val = 0.0,
                               keep_original_cols = TRUE) |> 
        recipes::prep() |> 
        recipes::bake(new_data = NULL),

      # sin and cos terms order is different
      check = FALSE,
      relative = relative,
      min_iterations = 1L
    )
  }
)
results

# rows <- 1e6
# dat <- data.frame(x = rnorm(rows), 
#                   y = 1:rows,
#                   z = rnorm(rows))
# bench::mark(
#   
#   {hrec = recipe(formula = formula, data = dat) |>
#     step_harmonic(x, 
#                   frequency = c(1.0, 2.0, 3.0), 
#                   cycle_size = 0.1, 
#                   starting_value = 0.0,
#                   varying = "cycle_size") |>
#         step_harmonic(x, 
#                   frequency = c(1.0, 2.0, 3.0), 
#                   cycle_size = 0.1, 
#                   starting_value = 0.0) |>
#     step_intercept() |>
#     step_center(x) |>
#     prep() |>
#     bake()}, 
#   
#   {hrec$steps[[2]]$update_step("cycle_size", 0.2)
#     hrec$bake()
#   },
#   check = FALSE
# )

step_pca

#| echo: true
#| warning: false
set.seed(1)
formula <- as.formula(x~a + b + c + d + e + f + g + h + i + j + k + l)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      a = rnorm(rows),
                      b = rnorm(rows),
                      c = rnorm(rows),
                      d = rnorm(rows),
                      e = rnorm(rows),
                      f = rnorm(rows),
                      g = rnorm(rows),
                      h = rnorm(rows),
                      i = rnorm(rows),
                      j = rnorm(rows),
                      k = rnorm(rows),
                      l = rnorm(rows)
    )
    bench::mark(
      hrec1 = recipe(formula = formula, data = dat)|>
        step_pca(c(a,b,c,d,e,f,g,h,i,j,k,l), n_comp = 10L) |>
        plate(),
      hrec2 = recipe(formula = formula, data = dat)|>
        step_pca(c(a,b,c,d,e,f,g,h,i,j,k,l), n_comp = 5L) |>
        plate(),
      hrec3 = recipe(formula = formula, data = dat)|>
        step_pca(c(a,b,c,d,e,f,g,h,i,j,k,l),
                 n_comp = 10L,
                 center = FALSE,
                 scale = FALSE) |>
        plate(),
      hrec4 = recipe(formula = formula, data = dat)|>
        step_pca(c(a,b,c,d,e,f,g,h,i,j,k,l),
                 n_comp = 5L,
                 center = FALSE,
                 scale = FALSE) |>
        plate(),

      rec1  = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_pca(recipes::all_predictors(),
                          num_comp = 10L,
                          options = list(center = TRUE, scale. = TRUE))|> 
        recipes::prep() |> 
        recipes::bake(new_data = NULL),
      rec2  = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_pca(recipes::all_predictors(),
                          num_comp = 5L,
                          options = list(center = TRUE, scale. = TRUE)) |> 
        recipes::prep() |> 
        recipes::bake(new_data = NULL),
      rec3  = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_pca(recipes::all_predictors(),
                          num_comp = 10L) |> 
        recipes::prep() |> 
        recipes::bake(new_data = NULL),
      rec4  = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_pca(recipes::all_predictors(),
                          num_comp = 5L) |> 
        recipes::prep() |> 
        recipes::bake(new_data = NULL),
      check = FALSE,
      relative = relative,
      min_iterations = 1L
    )
  }
)


print(results, n = 100)

step_dummy

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = qF(sample(1:10, rows, replace = TRUE)),
                      z = rnorm(rows))
    bench::mark(
      hrec = unname(recipe(formula = formula, data = dat) |>
                      step_dummy(y) |>
                      plate("tbl"))[,3:11],
      rec  = unname(recipes::recipe(formula = formula, data = dat) |>
                      recipes::step_dummy(y, keep_original_cols = TRUE) |>
                      recipes::prep() |>
                      recipes::bake(new_data = NULL))[,3:11],
      check = TRUE,
      relative = relative,
      min_iterations = 1L
    )
  }
)

results

step_find_interval

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows,
                      z = rnorm(rows))
    bench::mark(
      hrec = recipe(formula = formula, data = dat) |>
        step_find_interval(x, vec = c(-0.1, 0, 0.1)) |>
        plate("tbl"),
      rec  = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_cut(x, breaks = c(-0.1, 0, 0.1)) |>
        recipes::prep() |>
        recipes::bake(new_data = NULL),
      check = FALSE,
      relative = relative,
      min_iterations = 1L
    )
  }
)

results

step_varying

#| echo: true
#| warning: false

formula <- as.formula(y~x+z)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rep(1, rows), 
                      y = 1:rows,
                      z = rnorm(rows))
    bench::mark(
      hrec = recipe(formula = formula, data = dat) |>
        step_varying(c(x, y, z)) |>
        plate("tbl"),
      rec  = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_zv(x, y, z) |>
        recipes::prep() |>
        recipes::bake(new_data = NULL),
      check = TRUE,
      relative = relative,
      min_iterations = 1L
    )
  }
)

results

step_kernel_filter

step_kernel_filter uses an Fast Fourier Transform (FFT) based convolution instead of an explicit sliding window. This should be much faster for large datasets and particularly when the kernel size is also large.

#| echo: true
#| warning: false

formula <- as.formula(y~x+z)

results <- bench::press(
  rows = c(2e4, 2e5),
  {
    dat <- data.frame(x = rep(1, rows), 
                      y = 1:rows,
                      z = cumsum(rnorm(rows)))
    bench::mark(
      hrec = unname((recipe(formula = formula, data = dat) |>
                       step_kernel_filter(z, kernel = list(rep(1, 5001L)/5001L), align = "center") |>
                       plate("tbl"))[10000, "kernel_filter_z"]),
      {rec  = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_window(z, size = 5001L, statistic = "mean") |>
        recipes::prep() |>
        recipes::bake(new_data = NULL)
      unname(rec[10000, "z"])},

      min_iterations = 1L,
      relative = relative,
      check = TRUE
    )
  }
)

results

step_convolve_gamma

#| echo: true
#| warning: false

formula <- as.formula(y~x+z)

results <- bench::press(
  rows = c(2e4, 2e6),
  {
    dat <- data.frame(x = rep(1, rows), 
                      y = 1:rows,
                      z = cumsum(rnorm(rows)))
    bench::mark(
      hrec = (recipe(formula = formula, data = dat) |>
                       step_convolve_gamma(z, amplitude = 1, k = 1, theta = 1) |>
                       plate("tbl")),
      min_iterations = 1,
      relative = relative,
      check = TRUE
    )
  }
)

results

step_compare_columns

multiple steps

step_harmonic dominates these results.

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = as.numeric(1:rows), 
                      y = 1:rows)
    bench::mark(
    hrec = recipe(formula = formula, data = dat) |>
      step_lead_lag(x, lag = 1:10) |>
      step_harmonic(x, 
                    frequency = c(1, 2, 3), 
                    cycle_size = 0.1, 
                    starting_value = 0) |>
      step_center(x) |> 
      plate("tbl"),
    rec  = recipes::recipe(formula = formula, data = dat) |>
      recipes::step_lag(x, lag = 1:10, keep_original_cols = TRUE) |>
      recipes::step_harmonic(x, 
                             frequency = c(1, 2, 3), 
                             cycle_size = 0.1, 
                             starting_val = 0,
                             keep_original_cols = TRUE) |>
      recipes::step_center(x) |> 
      recipes::prep() |> 
      recipes::bake(new_data = NULL),
      check = FALSE,
      relative = relative,
      min_iterations = 1
    )
  }
)

results

step_spline_b

#| echo: true
#| warning: false

formula <- as.formula(y~x)
n <- c(100, 1e4, 5e6)
results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows),
                      y = 1:rows)
    bench::mark(
      hrec = unname(recipe(formula = formula, data = dat) |>
                      step_spline_b(x, df = 13) |>
                      plate("tbl")),
      rec  = unname(recipes::recipe(formula = formula, data = dat) |>
                      recipes::step_spline_b(x, deg_free = 13, keep_original_cols = TRUE)|> 
                      recipes::prep() |> 
                      recipes::bake(new_data = NULL)),
      check = TRUE,
      relative = relative,
      min_iterations = 2
    )
  }
)

results

step_spline_n

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows),
                      y = 1:rows)
    bench::mark(
      hrec = unname(recipe(formula = formula, data = dat) |>
                      step_spline_n(x, df = 11L) |>
                      plate("tbl")),
      rec  = unname(recipes::recipe(formula = formula, data = dat) |>
                      recipes::step_spline_natural(x, deg_free = 11L, keep_original_cols = TRUE)|> 
                      recipes::prep() |> 
                      recipes::bake(new_data = NULL)),
      check = TRUE,
      relative = relative,
      min_iterations = 2
    )
  }
)

results

step_add_noise

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = as.numeric(1:rows),
                      y = rep(0.01, rows))
    bench::mark(
      hrec1 = recipe(formula = formula, data = dat) |>
        step_aquifer_grf(time = x, flow_rate = y) |>
        plate("dt"),
      hrec2 = recipe(formula = formula, data = dat) |>
        step_aquifer_theis(time = x, flow_rate = y) |>
        plate("dt"),
      check = TRUE,
      relative = relative)
  }
)

results

step_aquifer_grf & step_aquifer_theis

The Theis solution is a subset of the grf solution.

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = as.numeric(1:rows),
                      y = rep(0.01, rows))
    bench::mark(
      hrec1 = recipe(formula = formula, data = dat) |>
        step_add_noise(y) |>
        plate("dt"),
      relative = relative)
  }
)

results

step_aquifer_leaky

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = as.numeric(1:rows),
                      y = rep(0.01, rows))
    bench::mark(
      hrec1 = unname(recipe(formula = formula, data = dat) |>
                       step_aquifer_leaky(time = x,
                                          flow_rate = y,
                                          leakage = 100000000) |>
                       plate("dt")),
      hrec2 = unname(recipe(formula = formula, data = dat) |>
                       step_aquifer_theis(time = x,
                                          flow_rate = y) |>
                       plate("dt")),
      check = TRUE,
      relative = relative)
  }
)

results

step_aquifer_patch

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = c(1e5),
  {
    dat <- data.frame(x = as.numeric(1:rows),
                      y = rep(0.01, rows))
    bench::mark(
      hrec1 = (recipe(formula = formula, data = dat) |>
                 step_aquifer_grf(time = x, flow_rate = y) |>
                 plate("dt")),
      hrec3 = (recipe(formula = formula, data = dat) |>
                 step_aquifer_patch(time = x,
                                    flow_rate = 0.01,
                                    thickness = 1.0,
                                    radius = 100.0,
                                    radius_patch = 200.0,
                                    specific_storage_inner = 1e-6,
                                    specific_storage_outer = 1e-6,
                                    hydraulic_conductivity_inner = 1e-4,
                                    hydraulic_conductivity_outer = 1e-4,
                                    n_stehfest = 8L
                 ) |>
                 plate("dt")),
      check = FALSE,
      relative = relative)
  }
)


results

step_vadose_weeks

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = as.numeric(1:rows), 
                      y = as.numeric(1:rows))
    bench::mark(
      hrec1 = (recipe(formula = formula, data = dat) |>
                 step_vadose_weeks(time = x, 
                                   air_diffusivity = 0.8, 
                                   thickness = 5, 
                                   precision = 1e-12) |>
                 plate("dt")),
      check = FALSE,
      min_iterations = 2
    )
  }
)

results

step_transport_ogata_banks

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(expand.grid(as.numeric(1:rows), as.numeric(1:10)))
    names(dat) <- c('x', 'y')
    bench::mark(
      hrec1 = (recipe(formula = formula, data = dat) |>
                 step_transport_ogata_banks(time = x,
                                            distance = y) |>
                 plate("dt")),
      check = FALSE,
      min_iterations = 2
    )
  }
)

results

step_transport_fractures_solute

#| echo: true
#| warning: false

formula <- as.formula(~time+z+x)

dat <- setDT(expand.grid(10^(3:8),
                         seq(0.0, 10, 1),
                         c(0.0)))

names(dat) <- c("time", "z", "x")

results <- 
  bench::mark(
    hrec1 = recipe(formula = formula, data = dat) |>
      step_transport_fractures_solute(time = time,
                                      distance_fracture = z,
                                      distance_matrix = x) |>
      plate("dt"),
    check = FALSE,
    min_iterations = 2
  )

results

step_transport_fractures_heat

#| echo: true
#| warning: false

formula <- as.formula(~time+z+x)

dat <- setDT(expand.grid(10^(3:8),
                         seq(0.0, 100, 1),
                         c(0.0, 0.05)))

names(dat) <- c("time", "z", "x")

results <- 
  bench::mark(
    hrec1 = recipe(formula = formula, data = dat) |>
      step_transport_fractures_heat(time = time,
                                    distance_fracture = z,
                                    distance_matrix = x) |>
      plate("dt"),
    check = FALSE,
    min_iterations = 2
  )

results

step_fft_pgram, step_fft_welch

#| echo: true
#| warning: false

formula <- as.formula(y~x + z)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), y = rnorm(rows), z = rnorm(rows),
                      q = rnorm(rows), r = rnorm(rows), s = rnorm(rows))
    bench::mark(
      hrec1 = recipe(formula = formula, data = dat) |>
        step_fft_pgram(c(x, y), 
                       3,
                       TRUE,
                       TRUE,
                       FALSE,
                       0.1, 
                       time_step = 1) |> 
        prep() |>
        bake(),
      hrec2 = recipe(formula = formula, data = dat) |>
        step_fft_pgram(c(x, y), 
                       3,
                       TRUE,
                       TRUE,
                       TRUE,
                       0.1, 
                       time_step = 1) |> 
        prep() |>
        bake(),
      hrec3 = recipe(formula = formula, data = dat) |>
        step_fft_welch(c(x, y),
                       length_subset =  nrow(dat) / 10,
                       overlap = 0.60,
                       window = window_nuttall(nrow(dat) / 10),
                       time_step = 1) |>
        prep() |>
        bake(),
      check = FALSE,
      min_iterations = 1
    )
  }
)

results

step_fft_transfer_welch and step_fft_transfer_pgram, step_fft_transfer_experimental

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = c(1e5, 1e6, 1e7),
  {
    dat <- data.frame(x = rnorm(rows), y = rnorm(rows))
    bench::mark(
      hrec1 = recipe(formula = formula, data = dat) |>
        step_fft_transfer_pgram(c(x, y), 
                                3,
                                TRUE,
                                TRUE,
                                0.1, 
                                time_step = 1) |> 
        prep() |>
        bake(),
      hrec2 = recipe(formula = formula, data = dat) |>
        step_fft_transfer_welch(c(x, y),
                                length_subset =  nrow(dat) / 10,
                                overlap = 0.60,
                                window = window_nuttall(nrow(dat) / 10), 
                                time_step = 1) |> 
        prep() |>
        bake(),
      hrec3 <- recipe(formula = formula, data = dat) |>
        step_fft_transfer_experimental(c(x, y), 
                                       spans = 3, 
                                       taper = 0.1, 
                                       n_groups = 300,
                                       time_step = 1) |>
        prep() |>
        bake(),
      check = FALSE,
      min_iterations = 1
    )
  }
)

results

step_ols

#| echo: true
#| warning: false

formula <- as.formula(y~x+z)


results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = rnorm(rows),
                      z = rnorm(rows))
    m <- qM(dat)
    bench::mark(
      hrec = recipe(formula = formula, data = dat) |>
        # step_center(x) |>
        step_distributed_lag(z, knots = log_lags(10, 50)) |>
        # step_intercept() |>
        step_ols(formula = as.formula(y~.), do_response = FALSE) |>
        prep() |>
        bake(),
      lm = lm(formula, dat),
      lm.fit(x = m[, c(2, 3)], y = m[,1]),
      check = FALSE,
      relative = FALSE
    )
  }
)

results


formula <- as.formula(y~x+z)


results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = rnorm(rows),
                      z = rnorm(rows))
    bench::mark(
      hrec = recipe(formula = formula, data = dat) |>
        step_intercept() |>
        step_nls(formula = as.formula(y~.)) |>
        prep() |>
        bake(),
      check = FALSE,
      relative = FALSE
    )
  }
)

step_ols_gap_fill

#| echo: true
#| warning: false

set.seed(123)
n <- 100000
frm <- formula(x ~ y + z)


x <- cumsum(rnorm(n))
dat <- data.table(x = x, y = x, z = as.numeric(1:n))
dat[, x := x + c(rep(20, n/2), rep(0, n/2))]
dat[, x := x + 3.0 * sin(z * 1/n)]
tmp <- copy(dat$x)

# Set value to NA.  These values will be estimated.
dat[60000:70000, x := NA_real_]

dat <- unclass(dat)

# bench::mark(
#   {h = recipe(formula = frm, data = dat) |>
#     step_find_interval(z, vec = c(0, n/2, n)) |>
#     step_intercept() |>
#     step_spline_b(z, df = 4) |>
#     step_drop_columns(z)
#   
#   hrec = recipe(formula = frm, data = dat) |>
#     step_ols_gap_fill(c(x, y, z), recipe = h) |>
#     prep() |> 
#     bake()},
# )

update

formula <- as.formula(y~x)

# results <- bench::press(
#   rows = n,
#   {
#     dat <- data.frame(x = rnorm(rows), 
#                       y = 1:rows,
#                       z = rnorm(rows))
#     bench::mark(
#       hrec1 = unname(recipe(formula = formula, data = dat) |>
#                        step_lead_lag(x, lag = 1:30) |>
#                        plate("tbl")),
#       hrec1 = unname(recipe(formula = formula, data = dat) |>
#                        step_lead_lag(x, lag = 1:30) |>
#                        plate("tbl")),
#       check = TRUE,
#       relative = relative,
#       min_iterations = 1
#     )
#   }
# )
# 
# results

check

step_check_spacing

#| echo: true
#| warning: false

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows),
                      y = 1:rows)
    dat[9, "x"] <- NA
    dat[9, "y"] <- NA

    bench::mark(
      hrec1 = recipe(formula = formula, data = dat) |>
        step_check_spacing(y) |>
        step_check_na(y) |>
        prep() |>
        bake(),
      hrec2 =recipe(formula = formula, data = dat) |>
        step_check_spacing(x) |>
        step_check_na(x) |>
        prep() |>
        bake(),
      check = FALSE,
      relative = relative,
      min_iterations = 2
    )
  }
)

results
sessionInfo()


jkennel/hydrorecipes documentation built on Dec. 24, 2024, 5:38 p.m.