knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
### Optionally: specify GitHub Personal Access Token (GITHUB_PAT) ### See here why this might be important for you: ### https://kwb-r.github.io/kwb.pkgbuild/articles/install.html#set-your-github_pat # Sys.setenv(GITHUB_PAT = "mysecret_access_token") # Install package "remotes" from CRAN if (! require("remotes")) { install.packages("remotes", repos = "https://cloud.r-project.org") } # Install KWB package 'kwb.heatsine' from GitHub remotes::install_github("KWB-R/kwb.heatsine")
The sinus fit optimisation works for daily time series of a surface water and a groundwater monitoring point, which follow the following requirements:
The time series must be provided in a .csv
(comma separated value) file, which
follows the naming convention:
temperature_<type>
_<monitoring_id>
.csv
where <type>
needs to be exactly groundwater
or surface-water
. In case any
other value for <type>
is chosen the program will not work (e.g SURFACE
WATER
). The value of the <monitoring_id>
will be used for labeling the
monitoring points in the plots and can be almost any value but must not contain
_
(underscore) or " "
(space).
The data structure of each .csv
file is very simple as it contains only the
two columns date
(formatted: YYYY-MM-DD
, e.g. 2020-06-23
) and value
(temperature data in degree Celsius) as shown exemplary below:
date,value 2020-06-23,12.3 2020-06-24,12.3
Please make sure that both columns date
and value
are all lowercase and
correctly spelled as the program will not work if this is not the case.
The R package includes the following temporal times series data for testing:
list.files(kwb.heatsine::extdata_file())
In this the example the files temperature_surface-water_Txxsxx-mxxxxsxxx.csv
and temperature_groundwater_Txxxx3.csv
are used for testing the functionality
and imported by using the code below:
## Load the R package library(kwb.heatsine) load_temp <- function(base_name) { kwb.heatsine::load_temperature_from_csv( kwb.heatsine::extdata_file(base_name) ) } data_sw <- load_temp("temperature_surface-water_Txxsxx-mxxxxsxxx.csv") data_gw <- load_temp("temperature_groundwater_Txxxx3.csv")
Plot time series for whole time period (moveover on data points to select)
kwb.heatsine::plot_temperature_interactive(data_sw)
kwb.heatsine::plot_temperature_interactive(data_gw)
Reduce time period to a full sinus period which is definde by the user input
(date_end
is optional, if not given it is set to 'date_start' + 365.25 days).
data_sw_selected <- kwb.heatsine::select_timeperiod( data_sw, date_start = "2015-10-10", date_end = "2016-10-14" ) data_gw_selected <- kwb.heatsine::select_timeperiod( data_gw, date_start = "2015-12-28", date_end = "2016-12-26" )
Plot selected datasets:
kwb.heatsine::plot_temperature_interactive(df = data_sw_selected)
kwb.heatsine::plot_temperature_interactive(df = data_gw_selected)
According to Cross Validated (2013)
the sinus fit can not only be written as y = y0 + amplitude * sin(x + phase_shift)
, but also
in a linear form y = y0 + alpha * sin(x) + beta * cos(x)
. The latter approach is used
within this R package as it enables the calculation of both amplitude
(a
) and
phase_shift
(x0
) by using R's build-in stats::lm()
function as exemplary shown below:
### Generate synthetic temperature data following sine curve period_length <- 365 # assumed period length day_number <- seq_len(period_length) # daily sequence from 1 to period_length temperature <- 26.9188 + 2.123641 * sin(2 * pi * day_number / period_length + 0.6049163) x <- 2 * pi * day_number / period_length ### Print first data points: tibble::tibble(day_number = day_number, temperature = temperature, x = x) ### Now check the if both formulas yield (almost) equal results fit.lm2 <- lm(temperature ~ sin(x) + cos(x)) coeffs <- coef(fit.lm2) y0 <- coeffs[1L] # y0 alpha <- coeffs[2L] # alpha = sin(phi) beta <- coeffs[3L] # beta = cos(phi) a <- sqrt(alpha ^ 2 + beta ^ 2) # amplitude x0 <- atan2(beta, alpha) # phase t ### Print parameters of sinus fit tibble::tibble( y0 = y0, alpha = alpha, beta = beta, a = a, x0 = x0 ) x <- 2 * pi * day_number / period_length par(mfrow = c(1, 2)) ### Formula option 1: y = y0 + amplitude * sin(x + phase) plot( x = day_number, y = y0 + a * sin(x + x0), type = "l", lty = 1, lwd = 3, col = "gray", main = "Overplotted Graphs", xlab = "day_number", ylab = "temperature" ) ### Formula option 2: y = y0 + alpha * sin(x) + beta * cos(x) lines( x = day_number, y = y0 + alpha * sin(x) + beta * cos(x), lwd = 2, lty = 2, col = "red", ) ### Check numeric differences between both approaches plot( x = day_number, y = y0 + a * sin(x + x0) - (y0 + alpha * sin(x) + beta * cos(x)), lwd = 3, col = "gray", main = "Difference", xlab = "day_number", ylab = "temperature" )
As shown above the differences between both sinus calculation approaches are mostly
zero but sometimes show a minimal offset (which is close to R's floating point
accuracy of 10^r .Machine$double.ulp.digits * log10(2)
, i.e. r 10^(.Machine$double.ulp.digits * log10(2))
for the machine the calculation was carried out). As a consequence the second
approach can and will be used for sinus fitting within this R package.
Sinus fit operation is performed by minimizing the RMSE error between observed and simulated temperature curves.
limits <- c(100, 500) # minimum/maximum period length tolerance <- 0.001 # the desired accuracy () debug <- TRUE # Helper function to do the sinus optimisation do_sinus_optimisation <- function(temp_df) { kwb.heatsine::optimise_sinus_variablePeriod( temp_df = temp_df, opt_limits = limits, opt_tolerance = tolerance, opt_debug = debug ) } sinusfit_sw <- do_sinus_optimisation(temp_df = data_sw_selected) sinusfit_gw <- do_sinus_optimisation(temp_df = data_gw_selected) # Generate data frame with simulated and observed values predictions <- kwb.heatsine::get_predictions( sinusfit_sw = sinusfit_sw, sinusfit_gw = sinusfit_gw, retardation_factor = 1.8 )
kwb.heatsine::plot_prediction_interactive(predictions)
knitr::kable(predictions$traveltimes)
While only RMSE is used for optimising the sinus fit, a number of goodness-of-fit
parameters is also calculated and shown in the table below. For details on each
of these parameters the use is referred to the function hydroGOF::gof()
which is
used for calculating all these values.
knitr::kable(predictions$gof)
knitr::kable(predictions$paras)
# only first entries knitr::kable(head(predictions$data))
Export results to csv
:
write_to_csv <- function(df, file_base) { readr::write_csv(df, path = file.path( kwb.utils::createDirectory("csv"), paste0(file_base, ".csv") )) } write_to_csv(predictions$data, "sinus-fit_predictions") write_to_csv(predictions$paras, "sinus-fit_parameters") write_to_csv(predictions$gof, "sinus-fit_goodness-of-fit") write_to_csv(predictions$traveltimes, "sinus-fit_traveltimes") write_to_csv(predictions$residuals, "sinus-fit_residuals") ## Export plots: save_to_html <- function(p, file_base) { htmlwidgets::saveWidget(p, paste0(file_base, ".html")) } withr::with_dir(new = kwb.utils::createDirectory("plots"), code = { save_to_html( plot_temperature_interactive(data_sw), "temperature_surface-water_time-series_full" ) save_to_html( plot_temperature_interactive(data_sw_selected), "temperature_surface-water_time-series_selected" ) save_to_html( plot_temperature_interactive(data_gw), "temperature_groundwater_time-series_full" ) save_to_html( plot_temperature_interactive(data_gw_selected), "temperature_groundwater_time-series_selected" ) save_to_html( plot_prediction_interactive(predictions), "temperature_prediction" ) # save_to_html( # plot_residuals_interactive(prediction_df, binwidth = 0.5), # "temperature_prediction_residuals" # ) })
environment <- sessioninfo::session_info() knitr::kable(tibble::enframe(unlist(environment$platform)))
environment$packages
```r if(rmarkdown::pandoc_available()) { data.frame(pandoc_directory = rmarkdown::pandoc_exec(), pandoc_version = as.character(rmarkdown::pandoc_version()), stringsAsFactors = FALSE) } else { print("No PANDOC installed!") }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.