The goal of hydrorecipes is to supplement the recipes package with a few steps that can help deal with moderately sized water level datasets. These were developed primarily with regression deconvolution in mind and used lm or glmnet, but other model engines could also be used. The following steps are currently available:
You can install the development version of hydrorecipes from GitHub with:
# install.packages("remotes")
remotes::install_github("jkennel/hydrorecipes")
This is the method of Kennel 2020 which uses a distributed lag model and the earthtide package to generate synthetic wave groups. A \~1.5 month dataset of water and barometric pressure having a monitoring frequency of 2 minutes is presented below. The barometric response is modeled over two days using a distributed lag model with 15 regressor terms. The knots are logarithmically separated over two days to accurately capture early and late time responses which can be caused by different physical mechanisms.
library(hydrorecipes)
library(earthtide)
library(tidyr)
library(ggplot2)
data(transducer)
# convert to numeric because step_ns doesn't handle POSIXct
transducer$datetime_num <- as.numeric(transducer$datetime)
unique(diff(transducer$datetime_num)) # times are regularly spaced
#> [1] 120
# Earth tide inputs
wave_groups <- earthtide::eterna_wavegroups
wave_groups <- na.omit(wave_groups[wave_groups$time == '1 month', ])
wave_groups <- wave_groups[wave_groups$start > 0.5, ]
latitude <- 34.0
longitude <- -118.5
# create recipe
rec <- recipe(wl~baro+datetime_num, transducer) |>
step_distributed_lag(baro, knots = log_lags(15, 86400 * 2 / 120)) |>
step_earthtide(datetime_num,
latitude = latitude,
longitude = longitude,
astro_update = 1,
wave_groups = wave_groups) |>
step_ns(datetime_num, deg_free = 10) |>
prep()
input <- rec |> bake(new_data = NULL)
summary(fit <- lm(wl~., input))
#>
#> Call:
#> lm(formula = wl ~ ., data = input)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -1.483e-03 -2.351e-04 -1.957e-05 2.122e-04 1.661e-03
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 5.321e+00 2.253e-03 2361.583 < 2e-16 ***
#> distributed_lag_baro_1 -1.940e-01 1.153e-02 -16.823 < 2e-16 ***
#> distributed_lag_baro_2 2.382e-02 8.787e-03 2.710 0.00672 **
#> distributed_lag_baro_3 7.708e-03 5.264e-03 1.464 0.14308
#> distributed_lag_baro_4 1.379e-02 2.477e-03 5.566 2.63e-08 ***
#> distributed_lag_baro_5 9.756e-03 1.226e-03 7.955 1.84e-15 ***
#> distributed_lag_baro_6 7.818e-03 5.498e-04 14.218 < 2e-16 ***
#> distributed_lag_baro_7 6.230e-03 2.341e-04 26.609 < 2e-16 ***
#> distributed_lag_baro_8 3.435e-03 9.674e-05 35.510 < 2e-16 ***
#> distributed_lag_baro_9 1.633e-03 4.091e-05 39.918 < 2e-16 ***
#> distributed_lag_baro_10 1.946e-04 1.802e-05 10.800 < 2e-16 ***
#> distributed_lag_baro_11 6.958e-05 8.130e-06 8.558 < 2e-16 ***
#> distributed_lag_baro_12 -6.846e-05 4.434e-06 -15.441 < 2e-16 ***
#> distributed_lag_baro_13 -7.734e-02 1.515e-03 -51.058 < 2e-16 ***
#> distributed_lag_baro_14 2.007e-01 3.929e-03 51.080 < 2e-16 ***
#> distributed_lag_baro_15 -1.235e-01 2.415e-03 -51.131 < 2e-16 ***
#> earthtide_cos_1 -9.055e-06 3.359e-06 -2.696 0.00702 **
#> earthtide_sin_1 4.111e-06 3.390e-06 1.213 0.22524
#> earthtide_cos_2 8.808e-05 3.475e-06 25.349 < 2e-16 ***
#> earthtide_sin_2 -3.562e-05 3.502e-06 -10.173 < 2e-16 ***
#> earthtide_cos_3 2.843e-05 2.970e-06 9.573 < 2e-16 ***
#> earthtide_sin_3 -3.611e-05 2.896e-06 -12.467 < 2e-16 ***
#> earthtide_cos_4 1.940e-04 7.839e-06 24.750 < 2e-16 ***
#> earthtide_sin_4 4.292e-05 6.851e-06 6.265 3.78e-10 ***
#> earthtide_cos_5 -4.412e-06 3.250e-06 -1.357 0.17464
#> earthtide_sin_5 -1.397e-05 3.251e-06 -4.298 1.73e-05 ***
#> earthtide_cos_6 1.090e-05 3.714e-06 2.934 0.00335 **
#> earthtide_sin_6 -8.238e-06 3.728e-06 -2.210 0.02712 *
#> earthtide_cos_7 4.002e-06 2.065e-06 1.938 0.05263 .
#> earthtide_sin_7 -1.588e-05 2.067e-06 -7.684 1.59e-14 ***
#> earthtide_cos_8 5.720e-05 2.562e-06 22.331 < 2e-16 ***
#> earthtide_sin_8 -4.668e-05 2.562e-06 -18.219 < 2e-16 ***
#> earthtide_cos_9 3.009e-04 2.695e-06 111.630 < 2e-16 ***
#> earthtide_sin_9 -2.924e-04 2.683e-06 -109.003 < 2e-16 ***
#> earthtide_cos_10 -6.168e-06 3.315e-06 -1.860 0.06285 .
#> earthtide_sin_10 5.686e-05 3.306e-06 17.198 < 2e-16 ***
#> earthtide_cos_11 2.369e-04 6.471e-06 36.605 < 2e-16 ***
#> earthtide_sin_11 -5.476e-05 5.819e-06 -9.410 < 2e-16 ***
#> earthtide_cos_12 2.122e-06 2.531e-06 0.838 0.40194
#> earthtide_sin_12 -2.205e-06 2.536e-06 -0.870 0.38448
#> datetime_num_ns_01 -2.965e-02 2.892e-05 -1025.095 < 2e-16 ***
#> datetime_num_ns_02 -4.064e-02 3.792e-05 -1071.627 < 2e-16 ***
#> datetime_num_ns_03 -6.178e-02 3.313e-05 -1864.753 < 2e-16 ***
#> datetime_num_ns_04 -7.808e-02 3.445e-05 -2266.602 < 2e-16 ***
#> datetime_num_ns_05 -9.233e-02 3.316e-05 -2784.028 < 2e-16 ***
#> datetime_num_ns_06 -1.100e-01 3.553e-05 -3095.275 < 2e-16 ***
#> datetime_num_ns_07 -1.264e-01 3.354e-05 -3768.390 < 2e-16 ***
#> datetime_num_ns_08 -1.355e-01 2.239e-05 -6053.491 < 2e-16 ***
#> datetime_num_ns_09 -1.639e-01 7.195e-05 -2278.361 < 2e-16 ***
#> datetime_num_ns_10 -1.499e-01 1.563e-05 -9587.262 < 2e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.0003564 on 35231 degrees of freedom
#> (1440 observations deleted due to missingness)
#> Multiple R-squared: 0.9999, Adjusted R-squared: 0.9999
#> F-statistic: 1.067e+07 on 49 and 35231 DF, p-value: < 2.2e-16
The decomposition consists of:
pred <- predict_terms(fit = fit,
rec = rec,
data = input)
pred <- bind_cols(transducer[, c('datetime', 'wl')], pred)
pred$residuals <- pred$wl - pred$predicted
pred_long <- pivot_longer(pred, cols = !datetime)
levels <-c('intercept', 'ns_datetime_num', 'distributed_lag_baro',
'earthtide_datetime_num', 'predicted', 'wl', 'residuals')
labels <- c('Intercept', 'Background trend', 'Barometric Component',
'Earth Tide Component', 'Predicted', 'Water Pressure',
'Residuals (obs-mod)')
pred_long$name <- factor(pred_long$name,
levels = levels,
labels = labels)
ggplot(pred_long, aes(x = datetime, y = value)) +
geom_line() +
scale_y_continuous(labels = scales::comma) +
scale_x_datetime(expand = c(0,0)) +
ggtitle('Water Level Decomposition Results') +
xlab("") +
facet_grid(name~., scales = 'free_y') +
theme_bw()
There are two responses for this model:
resp <- response(fit, rec)
resp_ba <- resp[resp$name == 'cumulative', ]
resp_ba <- resp_ba[resp_ba$term == 'baro', ]
ggplot(resp_ba, aes(x = x * 120 / 3600, y = value)) +
ggtitle('A: Barometric Loading Response') +
xlab('lag (hours)') +
ylab('Cumulative Response') +
scale_y_continuous(limits = c(0, 1)) +
geom_line() +
theme_bw()
resp_et <- resp[resp$name %in% c('amplitude', 'phase'), ]
ggplot(resp_et, aes(x = x, xend = x, y = 0, yend = value)) +
geom_segment() +
ggtitle('B: Earthtide Response') +
xlab('Frequency (cycles per day)') +
ylab('Phase (radians) | Amplitude (dbar)') +
facet_grid(name~., scales = 'free_y') +
theme_bw()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.