#==============================================================================#
# merge
# print
# is.tse
# sum, diff
# D
# tse
# update_available_ (private)
#------------------------------------------------------------------------------#
# tse creation
tse <- function(data, period = 'hour') {
# input check
stopifnot(period %in% c('hour', 'period', 'minute'))
# create time columns
data$time.period <- hour(data$time.local)*2 + minute(data$time.local)/30
data$time.hour <- hour(data$time.local)
data$time.day <- as.POSIXct(trunc(data$time.local, 'day'))
data$time.wday <- wday(data$time.local)
data$time.month <- month(data$time.local)
data$time.year <- year(data$time.local)
if(period == 'minute') {
data$time.minute <- minute(data$time.local)
data$time.min_in_day <- hour(data$time.local)*60 + minute(data$time.local)
}
# order
data <- data[order(data$time.local), ] # order by time
class(data) <- c('tse', class(data)) # the data is now a piece of time-series
attributes(data)$period <- paste0('time.', period)
# merge public holiday info
data <- data %>%
merge(PublicHolidayData, by.x = "time.day", by.y = "date") %>%
dplyr::rename(time.holiday = public_holiday_name)
# Change the holiday level to include Normal days (no holiday)
levels(data$time.holiday) <- c(levels(data$time.holiday), 'Normal')
data$time.holiday[is.na(data$time.holiday)] <- 'Normal'
# set availability
data %>% order_columns %>% update_available
}
order_columns <- function(tse) {
atb <- attributes(tse)
value_col_i <- colnames(tse) %>%
grep('^time\\.', ., invert = TRUE) # find all the variables starting by "time."
value_col <- colnames(tse)[value_col_i] %>% # get the column names
setdiff('available') # remove the availability column
tse[, c(time_cols(tse), 'available', value_col)] %>% restore_attrib_(atb)
}
# read from a time indexed data frame, timezone is assumed 2 b SG
# time is still as string
parse_time <- function(data, time_i, time_format = '%d/%m/%Y %H:%M') {
# put the local time variable first
col_i <- seq_along(colnames(data))
col_i[1] <- time_i
col_i[time_i] <- 1
data <- data[, col_i]
colnames(data)[1] <- 'time.local'
# parse the time information
data$time.local <- as.POSIXct(strptime(data[, 1], format = time_format, tz = 'Asia/Singapore'))
data
}
#' a time series cannot have gaps in the time indexation the following
#' method fills the missing times
#'
#' @param data A time indexed data.frame
#' @param period One of the following: time.hour, time.period, time.minute
fill_time <- function(data, time_col, period) {
switch(period,
time.hour = {
# compute the number of indices needed
n_ind <- interval(start = data[1, time_col], end = tail(data[, time_col])) %/% hours(1)
# create all the indices
data2 <- data.frame(new_time_i = data[1, time_col] + dhours(0:n_ind))
},
time.period = {
# compute the number of indices needed
n_ind <- interval(start = data[1, time_col], end = tail(data[, time_col])) %/% minutes(30)
# create all the indices
data2 <- data.frame(new_time_i = data[1, time_col] + 30*dminutes(0:n_ind))
},
time.minute = {
# compute the number of indices needed
n_ind <- interval(start = data[1, time_col], end = tail(data[, time_col])) %/% minutes(1)
# create all the indices
data2 <- data.frame(new_time_i = data[1, time_col] + dminutes(0:n_ind))
}
)
# merge the full time_index into the one with gaps
merge.data.frame(data2, data, by.x = 'new_time_i', by.y = time_col, all.x = TRUE) %>%
rename_(.dots = structure(list('new_time_i'), names = time_col))
}
#' Internal method to update the availability column.
update_available <- function(data) {
data$available <- TRUE
for (col in data) {
data$available <- data$available & !is.na(col)
}
data
}
#------------------------------------------------------------------------------#
# tse indexation and subsetting
#' Subset columns.
subcol <- function(tse, j) {
atb <- attributes(tse)
# add the time info columns
cn <- j
if(!is.character(j)) cn <- colnames(tse)[j]
cn <- union(c(time_cols(tse), 'available'), cn)
# subset, order columns and update availability
tse <- `[.data.frame`(tse, T, cn) %>%
restore_attrib_(atb) %>%
order_columns %>%
update_available
# restore the attributes
# lol
}
#' Subset assignment operator.
`[<-.tse` <- function(tse, i, j, value) {
# currently we don't allow row subsetting as it
# would probably kill the constant time difference
# between rows
stopifnot(missing(i))
# save attributes
atb <- attributes(tse)
# subset
tse <- `[<-.data.frame`(tse, i, j, value)
tse %>% restore_attrib_(atb) %>% update_available # update availability and restore
}
#' Subset operator.
#' @description Currently not implemented
# `[.tse` <- function(tse, i, j) {
# stop('[`[.tse`] Sorry I\'m not implemented')
# }
#' Filter rows.
#'
#' @description Filter returns a data.frame
#'
#' @return A data.frame
#' @family filter_
filter.tse <- function(.data, ...) {
filter_(.data, .dots = lazyeval::lazy_dots(...))
}
filter_.tse <- function(.data, ..., .dots) {
atb <- attributes(.data) # save attributes
.data <- filter_(as.data.frame(.data), ..., .dots = .dots) # filter the data
# make sure the result is still a time serie -> but we don't want a ts
.data %>%
arrange(time.local) %>%
restore_attrib_(atb) %>%
order_columns
}
mutate.tse <- function(.data, ..., .dots) {
stop('[mutate] Sorry I\'m not implemented')
}
mutate_.tse <- function(.data, ..., .dots) {
stop('[mutate_] Sorry I\'m not implemented')
}
#' Rename column of a tse object.
#'
#' @description Does not allow modification of the time index column.
#' @return Modified tse object.
rename.tse <- function(.data, ..., .dots) {
rename_(.data, .dots = lazyeval::lazy_dots(...))
}
rename_.tse <- function(.data, ..., .dots) {
# if we are reasssigning the index column it should throw an error
if('time.local' %in% names(list(...)))
stop('Cannot modify the time index of a tse')
if('time.local' %in% names(.dots))
stop('Cannot modify the time index of a tse')
atb <- attributes(.data) # save attributes
rename_(as.data.frame(.data), ..., .dots = .dots) %>% # rename the columns
restore_attrib_(atb) %>%
order_columns
}
#
# select.tse <- function(.data, ..., .dots) {
# filter_(.data, .dots = lazyeval::lazy_dots(...))
# }
#
# select_.tse <- function(.data, ..., .dots) {
# stop('[select_] Sorry I\'m not implemented')
# }
#' Remove useless rows from a time-serie
#'
#' @description Some time-series start or end with pure NAs rows in the value
#' columns. This rows bring no information and can be safely removed. This is
#' the purpose of this function
#'
#' @return The same tse without the useless pure NAs rows at the stard and end
#' of the time-serie
clamp_tse <- function(tse) {
# initialize vector who locates empty rows
any_available <- rep(TRUE, dim(tse)[1])
# compute empty row vectors
for(col in get_value_cols(tse))
any_available <- any_available & !is.na(tse[, col])
# run-length encoding of empty rows
r <- rle(any_available)
# if starts with empty rows
if(!r$values[1])
tse <- tail(tse, -r$lengths[1])
# if ends with empty rows
if(!tail(r$values, 1))
tse <- head(tse, -tail(r$lengths, 1))
tse
}
#------------------------------------------------------------------------------#
# Operations on multiple tse's: merging, cbind, compatibility
#' Merging of time series.
#'
#' @description This function merges a time serie with another time-serie or
#' data.frame.
#'
#' @param tse A time-serie object.
#' @param y A time-serie object or a data.frame object.
#' @return a tse object
#' @family cbind.tse, rbind.tse, are_compatible
merge.tse <- function(tse, y, by = intersect(names(tse), names(y)),
by.x = by, by.y = by){
atb <- attributes(tse) # serve attributes
tse2 <- merge.data.frame(tse, y, by, by.x, by.y, all.x = TRUE, sort = FALSE)
tse2 <- tse2[order(tse2$time.local), ] # order by time the new columns
tse[, setdiff(names(tse2), names(tse))] <- tse2[, setdiff(names(tse2), names(tse))]
tse %>% restore_attrib_(atb) %>% update_available
}
#' Checks whether several time-series share a common time-indexation.
#'
#' @description check whether 2 time-series are compatible (same frequency,
#' same start and end) = same time indexation
#'
#' @return Boolean value
are_compatible <- function(tse1, tse2) {
stopifnot(is.tse(tse1))
stopifnot(is.tse(tse2))
t1 <- identical(period_tse(tse1), period_tse(tse2)) # equality of periods
t2 <- identical(tse1$time.local[1], tse2$time.local[1]) # equality of starting date
t3 <- identical(tail(tse1$time.local, 1), tail(tse2$time.local, 1)) # equality of end date
t1 & t2 & t3
}
#' Binds columns of compatible time-series.
#' @return tse object
#' @family merge.tse, rbind.tse, are_compatible
cbind.tse <- function(tse1, tse2, deparse.level = 1) {
# input check
stopifnot(are_compatible(tse1, tse2))
atb <- attributes(tse1)
cbind.data.frame(tse1, tse2[, get_value_cols(tse2)]) %>%
restore_attrib_(atb)
}
#' Concatenate time-series.
#' @return tse object
#' @family cbind.tse, merge.tse, are_compatible
rbind.tse <- function(tse1, tse2, deparse.level = 1) {
stop('[rbind.tse] Sorry not implemented :(')
}
#------------------------------------------------------------------------------#
# tse utilities
# get the period
period_tse <- function(tse) { attr(tse, 'period') }
get_period_in_day <- function(tse) {
switch(period_tse(tse),
'time.hour' = 'time.hour',
'time.period' = 'time.period',
'time.minute' = 'time.min_in_day')
}
is.tse <- function(data) {
atb <- attributes(data)
# check class attribute and columns
t <- inherits(data, 'tse') &
all(c(time_cols(data), 'available') %in% colnames(data))
# chack that there are no gaps, and that it is ordered
if (identical(atb$period, 'hour')) t <- t & all(diff(data$time.local) == dhours(1))
if (identical(atb$period, 'period')) t <- t & all(diff(data$time.local) == dminutes(30))
# check that the availability flag is correctly set
#lol
# return
t
}
# print method, by default doesn't show the time variables
print.tse <- function(tse, n = 6L, show_all = F) {
# find the value columns
value_col_i <- colnames(tse) %>%
grep('^time\\.', ., invert = TRUE) # find all the variables starting by "time."
value_col <- colnames(tse)[value_col_i] %>% # get the column names
setdiff('available') # remove the availability column
# print some meta-info
period_help <- c(time.hour = 'hourly', time.period = 'half-houly', time.minute = 'every minute')
c('Date range: ', tse$time.local[1] %>% format, ' -> ',
tail(tse$time.local, 1) %>% format, '\n') %>% cat # time range
c('Local time in:', attr(tse$time.local[1], 'tzone'),
' time inveral:', period_help[attributes(tse)$period], '\n') %>% cat # print the timezone and period
c('#rows:', dim(tse)[1],
' #columns:', length(time_cols(tse)) + 1, '+', length(value_col),
' size:', object_size(tse) %>% format, '\n\n') %>% cat
#paste(c(length(value_col), 'value columns:', value_col, '\n\n'), collapse = ' ') %>% cat # print value column names
# select the columns to show
display_cols <- c('time.local', 'available', value_col)
if(show_all) display_cols <- colnames(tse)
#
tse[, display_cols] %>% head(n) %>% print.data.frame # print the data
}
as.data.frame.tse <- function(tse) {
# alter attributes
class(tse) <- 'data.frame'
attributes(tse)$period <- NULL
tse
}
restore_attrib_ <- function(tse, atb) {
structure(tse, class = c('tse', 'data.frame'),
period = atb$period)
}
# get all the time indexation columns
time_cols <- function(tse) {
per <- period_tse(tse)
# period is hour
if(per == 'time.hour')
return(c('time.local', 'time.year', 'time.month', 'time.day', 'time.wday',
'time.holiday', 'time.hour', 'time.period'))
# period is half-hour
if(per == 'time.period')
return(c('time.local', 'time.year', 'time.month', 'time.day', 'time.wday',
'time.holiday', 'time.hour', 'time.period'))
# period is minute
if(per == 'time.minute')
# time.min_in_day is the the index of the minute in the day
# time.min_in_day is between 0 and 60*24-1 = 1439
return(c('time.local', 'time.year', 'time.month', 'time.day', 'time.wday',
'time.holiday', 'time.hour', 'time.minute', 'time.min_in_day'))
}
# get all the base columns: time index + availability
base_cols <- function(tse) c(time_cols(tse), 'available')
# get all the columns that are not for internal use
get_value_cols <- function(tse) {
setdiff(colnames(tse), base_cols(tse))
}
# get the columns name who have the specified type
get_cols_type <- function(tse, sel_type) {
tse %>% sapply(typeof) %>%
grep(pattern = sel_type, value = T) %>% names
}
# get the column names that are factors ()
get_cols_class <- function(tse, sel_class = 'factor') {
r <- tse %>% sapply(class) %>% grepl(pattern = sel_class)
colnames(tse)[r]
}
#-------------------------------------------------------------------------------
# time-series operators
split_tse <- function(data, ratio, var_out) {
l <- dim(data)[1]
data[, var_out] <- seq_len(l) <= l*ratio
data
}
split_tse2 <- function(data, var_out, ...) {
l <- dim(data)[1]
ratios <- list(...) %>% c(recursive = TRUE)
stopifnot(all(ratios>=0))
stopifnot(all(ratios<=1))
stopifnot(identical(sum(ratios), 1))
r_old <- 0
index <- seq_len(l)
col <- rep('', l) # result column
for(n in names(ratios)) {
this_span <- index >= l*r_old & index <= l*(r_old + ratios[[n]])
col[this_span] <- n
r_old <- r_old + ratios[[n]]
}
data[, var_out] <- factor(col)
data
}
D <- function (data, col, n = 1L) {
stopifnot(col %in% colnames(data))
colnamestart <- paste(col, 'd', sep = '.')
for (k in n) {
data[, paste(colnamestart, k, sep = '')] <- lag(data[, col], k)
}
data
}
# Lags several columns at once, columns to be lagged and their
# orders can be given as parameters:
# - D_multi(tse, load = c(1,3), drybulb = 1:4)
# Or as a list:
# - D_multi(tse, list = list(load=c(1,3), drybulb = 1:4))
D_multi <- function(tse, ..., orders_list) {
# list parameter is missing
if(missing(orders_list) || is.null(orders_list))
orders <- list(...) # use the dots
else orders <- orders_list
# input checks
stopifnot(names(orders) %in% colnames(tse)) # all the orders must match some column in the tse
stopifnot(all(c(list(...), recursive = TRUE) >= 0)) # all orders must be non-negative
for(col in names(orders))
tse <- D(tse, col, orders[[col]])
tse
}
diff.tse <- function(data, col1, col2 = paste(col1, 'd1', sep = '.'), out_col) {
stopifnot(c(col1, col2) %in% colnames(data))
data[, out_col] <- data[, col1] - data[, col2]
data
}
sum.tse <- function(tse, col1, col2, out = paste(col1, col2, sep = '+')) {
tse[, out] <- tse[, col1] + tse[, col2]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.