# hsFillUp ---------------------------------------------------------------------
#' Fill-up with Missing Timestamps
#'
#' Based on the time series given in "tseries" a new time series is generated in
#' which timestamps that are lacking in tseries are included. Optionally missing
#' values are generated by interpolation between existing values.
#'
#' @param tseries data frame representing a time-series of values
#' @param tsField Name of timestamp column in tseries. Default: name of first
#' available POSIXt-column
#' @param step_s Time step in seconds that shall lie between consecutive
#' timestamps. Non-existing timestamps are generated.
#' @param forceStep If TRUE, only timestamps that are multiples of the time step
#' given in step_s are put into the result data frame
#' @param limits data frame or matrix with exactly two columns containing first
#' and last timestamp of intervals for which timestamps are to be generated.
#' @param interpol if TRUE, the returned data frame will for each value column
#' contain a corresponding column containing interpolated values
#' @param includeOrig if TRUE and interpol = TRUE the original columns will
#' remain in the output in addition to the columns containing all (original
#' plus interpolated) values.
#' @param default value to be used if there are not enough (at least to) non-NA
#' values to be used for interpolation. Default: NA
#' @param dbg If TRUE, debug messages are shown
#' @export
#' @return Returns a data.frame
#' @importFrom kwb.utils catIf posixColumnAtPosition
hsFillUp <- function(
tseries, tsField = names(tseries)[kwb.utils::posixColumnAtPosition(tseries)[1]],
step_s = 60, forceStep = TRUE, limits = NULL, interpol = TRUE,
includeOrig = TRUE, default = NA, dbg = FALSE
)
{
# Stop if there are unexpected argument values
.stopOnBadArguments(tseries, step_s, limits)
## Check if there are timestamps that are not at exact minutes (seconds = 0)
.warnOnNoStepMultiples(timestamps = tseries[[tsField]], step_s = step_s)
# If no limits are given use the full time range of the timestamps in the
# given data frame as "artificial" limits
if (is.null(limits)) {
timeRange <- range(tseries[[tsField]])
limits <- data.frame(from = timeRange[1], to = timeRange[2])
}
## Initialise result (will become a data.frame)
result <- NULL
## Call fillup for each data block defined by the intervals given in limits
# (may be only one interval)
for (i in seq_len(nrow(limits))) {
tbeg <- limits[i, 1]
tend <- limits[i, 2]
kwb.utils::catIf(dbg, sprintf("tbeg: %s, tend: %s\n", tbeg, tend))
## Cut block of rows representing timestamps between tbeg and tend from
## tseries
selected <- tseries[[tsField]] >= tbeg & tseries[[tsField]] <= tend
# Skip empty areas
if (any(selected)) {
## Call fillup for the data block, not giving limits
blockResult <- fillup(
tseries = tseries[selected, ],
tsField = tsField,
step_s = step_s,
forceStep = forceStep,
interpol = interpol,
includeOrig = includeOrig,
default = default,
dbg = dbg
)
## add filled-up data block to result data.frame
result <- rbind(result, blockResult)
}
else {
warning("No data available between the given limits: ",
tbeg, " and ", tend, "!")
}
}
result
}
# .stopOnBadArguments ----------------------------------------------------------
.stopOnBadArguments <- function(tseries, step_s, limits)
{
## The timeseries must be given as a data.frame
if (class(tseries)[1] != "data.frame") {
stop(paste("In tseries, a data.frame must be given, containing timestamps",
"in the first column.\n"))
}
## The time-step must be given as a number
if (! is.numeric(step_s)) {
stop(sprintf("step_s must be numeric (is %s).", class(step_s)))
}
## If limits are given they must be of type matrix or data.frame
if (! is.null(limits) && class(limits) != "data.frame"
&& class(limits) != "matrix") {
stop(sprintf("limits must be data.frame or matrix (is %s).", class(limits)))
}
}
# .warnOnNoStepMultiples -------------------------------------------------------
.warnOnNoStepMultiples <- function(timestamps, step_s)
{
isNoStepMultiple <- as.integer(timestamps) %% step_s != 0
numberOfNoStepMultiples <- sum(isNoStepMultiple)
if (numberOfNoStepMultiples > 0) {
cat("There are", numberOfNoStepMultiples, "timestamps",
"(out of a total of", length(timestamps), ") that are not multiples",
"of the timestep (", step_s, "seconds ):\n")
print(utils::head(timestamps[isNoStepMultiple]))
}
}
# fillup -----------------------------------------------------------------------
#' Fill-up with Missing Timestamps
#'
#' Based on the time series given in "tseries" a new time series is generated in
#' which timestamps that are lacking in tseries are included. Optionally missing
#' values are generated by interpolation between existing values.
#'
#' @param tseries data frame representing a time-series of values
#' @param tsField Name of timestamp column in tseries. Default: name of first
#' available POSIXt-column
#' @param step_s Time step in seconds that shall lie between consecutive
#' timestamps. Non-existing timestamps are generated.
#' @param forceStep If TRUE, only timestamps that are multiples of the time step
#' given in step_s are put into the result data frame
#' @param interpol if TRUE, the returned data frame will for each value column
#' contain a corresponding column containing interpolated values
#' @param includeOrig if TRUE and interpol = TRUE the original columns will
#' remain in the output in addition to the columns containing all (original
#' plus interpolated) values.
#' @param default value to be used if there are not enough (at least to) non-NA
#' values to be used for interpolation. Default: NA
#' @param dbg If TRUE, debug messages are shown
#'
#' @return Returns a data.frame
#' @export
#' @importFrom kwb.datetime roundTime
fillup <- function(
tseries, tsField, step_s, forceStep, interpol, includeOrig, default = NA,
dbg = FALSE
)
{
## Initialise result (will become a data.frame)
result <- NULL
## Fill-up between the first and the last timestamp of the given time series
timeRange <- range(tseries[[tsField]])
## If needed, get lower minimum or greater maximum representing multiples
## of the time step.
tbeg <- kwb.datetime::roundTime(timeRange[1], step_s, 1)
tend <- kwb.datetime::roundTime(timeRange[2], step_s, 0)
## Generate the complete series of "regular" timestamps (multiples of
## time-step) between tbeg and tend
## Handle the special case of only one value
if (tbeg == tend) {
timestamps <- tbeg
} else {
timestamps <- seq(tbeg, tend, by = step_s)
}
tsBlock <- tseries
if (dbg) {
.showStartAndEndOfBlock(tsBlock, tbeg, tend, timestamps)
}
## Merge all timestamps with time series block by joining the regular
## timestamps (multiples of time-step) with the timestamps of the given
## timeseries block:
## - If forcStep is TRUE we do a "left join", where the result only contains
## the "regular" timestamps (multiples of time-step).
## - If forcStep is FALSE we do a "left or right join", where the result
## contains both all "regular" timestamps (multiples of time-step) and all
## timestamps contained in the original timeseries.
tsBlock <- merge(
x = data.frame(timestamps = timestamps),
y = tsBlock,
by.x = "timestamps",
by.y = tsField,
all.x = TRUE,
all.y = ! forceStep
)
## Interpolate values, if desired
if (interpol) {
tsBlock <- .interpolateAllColumns(
tsBlock = tsBlock,
tseries = tseries,
tsField = tsField,
default = default,
dbg = dbg
)
}
## Set result data frame
result <- tsBlock
## Get column names without timestamp column
dataColumnNames <- setdiff(colnames(tseries), tsField)
## Set column names in result data.frame
names(result) <- .newColumnNames(
columnNames = dataColumnNames,
tsField = tsField,
interpol = interpol,
dbg = dbg
)
columns <- .reorderedAndSelectedColumns(
numberOfColumns = ncol(result),
numberOfDataColumns = length(dataColumnNames),
interpol = interpol,
includeOrig = includeOrig
)
result[, columns]
}
# .showStartAndEndOfBlock ------------------------------------------------------
.showStartAndEndOfBlock <- function(tsBlock, tbeg, tend, timestamps)
{
cat(sprintf("Block of timeseries within %s and %s (full multiples of timestep)\n", tbeg, tend))
print(utils::head(tsBlock))
cat("...\n")
print(utils::tail(tsBlock))
cat(sprintf("Generated timestamps between %s and %s:\n", tbeg, tend))
print(utils::head(timestamps))
cat("...\n")
print(utils::tail(timestamps))
cat(sprintf("Available timestamps: %10d\n", nrow(tsBlock)))
cat(sprintf("All timestamps: %10d\n", length(timestamps)))
}
# .interpolateAllColumns -------------------------------------------------------
.interpolateAllColumns <- function(
tsBlock, tseries, tsField, default = NA, dbg = FALSE
)
{
# Skip the timestamp field itself
fields <- setdiff(colnames(tseries), tsField)
## For each value field
for (field in fields) {
kwb.utils::catIf(dbg, sprintf("Interpolating field: %s\n", field))
interpolated <- .getInterpolatedValues(
timestamps = tseries[[tsField]],
values = tseries[[field]],
requiredTimestamps = tsBlock$timestamps,
default = default,
dbg = dbg
)
## Add column with interpolated values to the result data.frame
tsBlock <- cbind(tsBlock, interpolated)
}
tsBlock
}
# .getInterpolatedValues -------------------------------------------------------
#' @keywords internal
#' @noRd
#' @noMd
#' @importFrom stats approx
.getInterpolatedValues <- function
(
timestamps, values, requiredTimestamps, default = NA, dbg = FALSE
)
{
## We need at least two non-NA values to interpolate
if (sum(!is.na(values)) > 1 ) {
# Calculate interpolation for this field. approx returns a list with
# components x and y of which we use the y component.
interpolated <- stats::approx(
timestamps, values, xout = requiredTimestamps
)$y
}
else {
# Determine a subsitute value. Either the value itself if there is only
# one value or the given default value!
substituteValue <- ifelse(length(values) == 1, values, default)
kwb.utils::catIf(dbg, "Not at least two non-NA values available for interpolation!\n",
"Using one value for all timestamps:", substituteValue, "\n")
interpolated <- rep(substituteValue, length(requiredTimestamps))
}
interpolated
### list with components \emph{x} (requiredTimestamps) and \emph{y}
### (interpolated values)
}
# .newColumnNames --------------------------------------------------------------
.newColumnNames <- function(columnNames, tsField, interpol, dbg = FALSE)
{
if (interpol) {
columnNames <- c(paste(columnNames, "orig", sep = "_"), columnNames)
}
columnNames <- c(tsField, columnNames)
kwb.utils::catIf(dbg, sprintf("Column names: %s\n", paste(columnNames, collapse = ", ")))
columnNames
}
# .reorderedAndSelectedColumns -------------------------------------------------
.reorderedAndSelectedColumns <- function(
numberOfColumns, numberOfDataColumns, interpol, includeOrig
)
{
## Reorder columns
columnNumbers <- 1
if (interpol) {
columnNumbers <- c(columnNumbers, (numberOfDataColumns + 2):numberOfColumns)
}
## If desired, include original columns
if (!interpol || includeOrig) {
columnNumbers <- c(columnNumbers, 2:(numberOfDataColumns + 1))
}
columnNumbers
}
# demo_hsFillUp ----------------------------------------------------------------
#' Create Plot Demonstrating hsFillUp()
#'
#' @return demo plot hsFillUp
#' @export
#' @importFrom stats rnorm
#' @importFrom graphics lines plot
demo_hsFillUp <- function()
{
message(
"See the code creating the plot by typing 'demo_hsFillUp' and pressing ",
"Enter."
)
tstamps <- hsMkTimestamps("2010-03-27", to = "2010-03-30", step.s = 3600)
df_1 <- data.frame(
DateTimeUTC = tstamps,
DataValue = stats::rnorm(n = length(tstamps))
)
limits <- data.frame(
from = kwb.datetime::hsToPosix("2010-03-28"), to = kwb.datetime::hsToPosix("2010-03-29")
)
df_2 <- hsFillUp(df_1, includeOrig = FALSE, limits = limits)
graphics::plot(df_1, main = "hsFillUp() missing values within a range")
graphics::lines(df_2, xlim = range(df_1$DateTimeUTC), type = "b", cex = 0.3)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.