# predict_quality --------------------------------------------------------------
#' Predict Water Quality for Bathing Spot Using Stored Model
#'
#' If no time period is specified, the function will first check the
#' measurements for missing E.coli values and do the prediction for the days
#' for which an E.coli value of -1 has been uploaded. If all E.coli measurements
#' are valid, the prediction will be done for the current day
#' (\code{Sys.Date()}).
#'
#' @param user_id user ID
#' @param spot_id bathing spot ID
#' @param from Date object or date string in format yyyy-mm-dd giving the first
#' day of the time period to be predicted. Default: "today"
#' @param to Date object or date string in format yyyy-mm-dd giving the last day
#' of the time period to be predicted. Default: "today"
#' @param import logical telling whether to import new rain data or not.
#' Default: \code{TRUE}.
#' @param return_debug_info logical with default \code{FALSE}. If \code{TRUE}
#' the prediction is not written to the database. Instead, what would be send
#' to the database is returned with all relevant variables that were used to
#' prepare the prediction being set as attributes.
#' @param radolan_time time string ("hhmm") against which to match the RADOLAN
#' file names to be loaded. By default, the latest available time for the day
#' given in \code{from} is used (for all days within \code{from} and
#' \code{to}).
#' @return list with elements \code{data}, \code{success}, \code{message} or (if
#' \code{return_debug_info = TRUE}) data frame representing the predictions
#' with attributes \code{spot_data}, \code{riverdata_raw}, \code{riverdata},
#' \code{newdata_raw}, \code{newdata}, \code{prediction} representing
#' intermediate variables that were used to prepare the prediction (see
#' source code of \code{fhpredict::predict_quality} to understand their
#' meaning)
#' @export
predict_quality <- function(
user_id, spot_id, from = Sys.Date(), to = from, import = TRUE,
return_debug_info = FALSE, radolan_time = NULL
)
{
#kwb.utils::assignPackageObjects("fhpredict")
#kwb.utils:::assignArgumentDefaults(predict_quality)
#user_id=11;spot_id=58;to=from;import=FALSE
(before_from <- as.Date(from) - 1L)
(before_to <- as.Date(to) - 1L)
# Default RADOLAN time string: latest available for the day before the first
# day to predict
(radolan_time <- kwb.utils::defaultIfNULL(
radolan_time, utils::tail(available_radolan_times_of_day(before_from), 1L)
))
# Try to get the model that was added last (if any)
model <- try(get_last_added_model(user_id, spot_id))
if (is_error(model)) {
return(create_failure(model))
}
# Provide new data for prediction
newdata <- try({
# Determine the days of which to load data and the days to be predicted
(data_days <- determine_date_range(before_from, before_to))
(prediction_days <- determine_date_range(from, to))
# Load new (rain) data required for the prediction
if (import) {
import_new_data(
user_id, spot_id, data_days, radolan_time = radolan_time
)
}
# Collect all data that are available for the given bathing spot
spot_data <- provide_input_data(user_id, spot_id, require_hygiene = FALSE)
# Prepare the data (filter for bathing season, log-transform rain)
riverdata_raw <- prepare_river_data(spot_data)
# If no data are available for the "to" date, add a fake entry for the "to"
# date (with all values 0) and reorder the data frame by the date
riverdata <- lapply(riverdata_raw, add_fake_entry_if_required, to)
# Calculate daily means just in case there is more than one record per day
riverdata <- lapply(riverdata, calculate_daily_means)
#identify_date_duplicates(riverdata_raw)
stopifnot(all(lengths(identify_date_duplicates(riverdata)) == 0L))
# Provide data frame with all additional variables, as required by the model
newdata_raw <- provide_data_for_lm(riverdata, for_model_building = FALSE)
stopifnot(length(identify_date_duplicates(newdata_raw)) == 0L)
# Filter for the dates to be predicted
all_dates <- substr(newdata_raw$datum, 1, 10)
use_me <- all_dates %in% as.character(prediction_days)
if (! any(use_me)) clean_stop(
"No data available for these required days for prediction: ",
kwb.utils::stringList(as.character(data_days))
)
#kwb.utils::removeColumns(newdata[use_me, ], "log_e.coli")
# Names of variables that are acutally used by the model
model_vars <- get_independent_variables(model$formula)
# Keep only the rows related to days to be predicted and keep only the
# variables that are required by the model
kwb.utils::selectColumns(newdata_raw[use_me, ], c("datum", model_vars))
})
if (is_error(newdata)) {
return(create_failure(newdata))
}
result <- try({
# Get a prediction using the model and the new data
prediction <- rstanarm::posterior_predict(model, newdata = newdata)
percentiles <- finish_prediction(prediction, newdata)
if (return_debug_info) {
return(structure(
percentiles,
spot_data = spot_data,
riverdata_raw = riverdata_raw,
riverdata = riverdata,
newdata_raw = newdata_raw,
newdata = newdata,
prediction = prediction
))
}
api_replace_predictions(user_id, spot_id, percentiles)
})
if (is_error(result)) {
return(create_failure(result))
}
return(create_result(
data = result,
success = TRUE,
message = get_text(
"predictions_posted",
n = length(result),
n_updated = length(kwb.utils::getAttribute(result, "updated")),
n_added = length(kwb.utils::getAttribute(result, "added"))
)
))
}
# determine_date_range ---------------------------------------------------------
determine_date_range <- function(from = NULL, to = NULL)
{
#kwb.utils::assignPackageObjects("fhpredict");from=NULL;to=NULL
from_missing <- is.null(from)
to_missing <- is.null(to)
# If only one of "from" and "to" are given, return the one that is given
if ((from_missing && ! to_missing) || (to_missing && ! from_missing)) {
return(as.Date(if (from_missing) to else from))
}
# If "from" and "to" are given, return a sequence of days between the two
if (! from_missing && ! to_missing) {
return(seq(as.Date(from), as.Date(to), by = 1L))
}
clean_stop(
"Either 'from' or 'to' must be given to 'determine_date_range'"
)
}
# get_days_with_missing_values -------------------------------------------------
get_days_with_missing_values <- function(hygiene)
{
is_missing <- missing_ecoli(hygiene)
if (! any(is_missing)) {
return(NULL)
}
times <- kwb.utils::selectColumns(hygiene, "datum")[is_missing]
as.Date(substr(times, 1, 10))
}
# import_new_data --------------------------------------------------------------
import_new_data <- function(user_id, spot_id, days, radolan_time = "1050")
{
stopifnot(inherits(days, "Date"))
# Extend days to 5-day periods before each day
dates <- add_days_before(days, n_days_before = 5)
urls <- get_radolan_urls_for_days(dates = dates, time = radolan_time)
control <- provide_rain_data(user_id, spot_id, urls = urls, info = FALSE)
while (control$remaining > 0) {
control <- provide_rain_data(control = control)
}
# Import new data from other sources...
}
# finish_prediction ------------------------------------------------------------
finish_prediction <- function(prediction, newdata)
{
stopifnot(ncol(prediction) == nrow(newdata))
percentiles <- get_percentiles_from_prediction(prediction)
percentiles$prediction <- get_quality_from_percentiles(percentiles)
names(percentiles) <- kwb.utils::multiSubstitute(
strings = names(percentiles),
replacements = list("^P" = "percentile", "\\." = "_")
)
dates <- kwb.utils::selectColumns(newdata, "datum")
percentiles$date <- dates
percentiles$dateTime <- dates
percentiles
}
# add_fake_entry_if_required ---------------------------------------------------
add_fake_entry_if_required <- function(df, to)
{
#df <- riverdata_tmp$hygiene_spot58
to_date <- as.POSIXct(paste0(to, "00:00:00"))
if (! to_date %in% df$datum) {
to_record <- do.call(
what = data.frame,
args = c(as.list(to_date), as.list(rep(0, ncol(df) - 1L)))
)
df <- rbind(df, stats::setNames(to_record, names(df)))
df <- df[order(df$datum), ]
}
df
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.