R/tse_func.R

#------------------------------------------------------
# tse_func class
# functions operating on TSEs
# currently quite experimental

tse_func <- function(f, ...) {
  stop('not implemented')
  # f should be a function of only one argument
  # inputs should be a vector of names of columns the function is using
  structure(f, class = 'tse_func', inputs = inputs)
}

tse_func_ <- function(f, inputs) {
  # f should be a function of only one argument
  # inputs should be a vector of names of columns the function is using
  structure(f, class = 'tse_func', inputs = inputs)
}

# expects the function inputs to be the names
# calling f(x,y) by call_tse_(f, x = 'load', y = 'temperature')
call_tse_ <- function(tsef, tse, ...) {
  map <- construct_mapping(...)
  do.call(tsef, list(map_in(tse, map)))
}

# new = old
map_in <- function(tse, ...) {
  
  mapping <- construct_mapping(...)
  
  stopifnot(mapping %in% colnames(tse))  
  mapping_rev <- structure(names(mapping), names = mapping)
  # do not sample the basic columns
  mapping_rev <- mapping_rev[setdiff(mapping, c(time_cols(tse), 'available'))] 
  
  # construct a mapping from old column names to new ones
  revmap <- structure(c(time_cols(tse), 'available', mapping_rev), 
                      names = c(time_cols(tse), 'available',
                                names(mapping_rev)))
  
  # select and rename the columns we need
  tse <- subcol(tse, names(mapping_rev))
  colnames(tse) <- revmap[colnames(tse)] # rename columns

  # for the basic columns
  mapping_rev <- structure(names(mapping), names = mapping)
  # do not sample the basic columns
  mapping_rev <- mapping_rev[intersect(mapping, c(time_cols(tse), 'available'))]
  tse[, mapping_rev] <- as.data.frame(tse)[, names(mapping_rev)] 
  
  tse
}

is.tse_func <- function(tsef) {
  inputs <- attributes(tsef)$inputs
  inherits(tsef, 'tse_func') & # class atributes is well set
    is.character(inputs) & # function inputs columns are characters
    setequal(inputs, unique(inputs)) # inputs columns names are distinct
}

inputs <- function(tsef) {
  attributes(tsef)$inputs
}

construct_mapping <- function(...) {
  # construct the mapping
  args <- list(...)
  if(is.null(args$mapping)) mapping <- args
  else mapping <- args$mapping
  
  c(mapping, recursive = TRUE)
}

# pullback input composition function
pbi_ <- function(f, M, col_y, col_x, in_f = NULL, in_M = NULL) {
  # input check
  stopifnot(col_y %in% inputs(M))
  if(missing(in_f)) in_f <- inputs(f)
  if(missing(in_M)) in_M <- inputs(M) %>% setdiff(col_y)
  
  # the training function
  tse_func_(function(tset) {
    # if output is only one
    if(length(col_y) == 1) tset[, col_y] <- call_tse_(f, tset, in_f)
    # multivariate output
    else tset <- cbind(tset, map_in(call_tse_(f, tset, in_f), col_y))
    trained <- call_tse_(M, tset, structure(c(names(col_y), in_M), 
                                            names = c(c(names(col_y), in_M))))
    
    # prediction function
    tse_func_(function(tse) {
      # if output is only one
      if(length(col_y) == 1) tse[, col_y] <- f(tse)
      # multivariate output
      else tse <- cbind(tse, map_in(call_tse_(f, tse, in_f), col_y))
      tse %>% print
      trained %>% inputs %>% print
      structure(c(names(col_y), in_M), 
                names = c(c(names(col_y), in_M))) %>% print
      call_tse_(trained, tse, structure(c(names(col_y), setdiff(in_M, col_x)), 
                                        names = c(c(names(col_y), 
                                                    setdiff(in_M, col_x)))))
    }, inputs = union(in_f, in_M %>% setdiff(col_x)))
  }, inputs = union(in_f, in_M))
}

# pullback output composition function
pbo_ <- function(f, f_inv, M, fcol_x, Mcol_x) {
  # input check
  stopifnot(Mcol_x %in% inputs(M))
  stopifnot(fcol_x %in% inputs(f))
  tse_func_(function(tset) {
    tset[, Mcol_x] <- f(tset)
    trained <- M(tset)
    tse_func_(function(tse) {
      tse[, fcol_x] <- trained(tse)
      f_inv(tse)
    }, inputs = union(inputs(f), inputs(trained)) %>% setdiff(fcol_x))
  }, inputs = union(inputs(f), inputs(M)) %>% setdiff(Mcol_x))
}

# index_dx = c(d1 = 1, d2 = 2, d5 = 5)
tsefD <- function(M, index_dx) {
  # input checks
  stopifnot(names(index_dx) %in% inputs(M))
  # the training function
  tse_func_(function(tset) {
    for(dx_name in names(index_dx)) {
      # lag the column by the corresponding value
      tset[, dx_name] <- lag(tset[, 'colx'], index_dx[dx_name]) 
    }     
    Mtrained <- M(tset)
    # the trained model function
    tse_func_(function(tse) {
      for(dx_name in names(index_dx)) {
        # lag the column by the corresponding value
        tse[, dx_name] <- lag(tse[, 'colx'], index_dx[dx_name]) 
      }
      tse[, 'colx'] <- NULL # drop the colx column
      Mtrained(tse) # call the trained  model
    }, inputs = union(
      setdiff(inputs(Mtrained), names(index_dx)),
      'colx'))
  }, inputs = union(
    setdiff(inputs(M), names(index_dx)),
    'colx'))
}

dcm <- tse_func_(function(tse) {
  daily_profiles <- tse %>%
    melt(id.vars = c('group_by', period_tse(tse)),
         measure.vars = 'x') %>%
    dcast(paste('group_by', period_tse(tse), sep = '~'),
          mean, na.rm = TRUE) %>% # group_by ~ period
    melt(id.vars = 'group_by', variable.name = attributes(tse)$period, 
         value.name = 'x.f')
  
  tse_func_(function(tse) {
    # match each row of the test with a day-profile
    tse %>%
      merge(daily_profiles) %>%
      `$`('x.f')
  }, inputs = c('group_by'))
  
}, inputs = c('x', 'group_by'))

# add flag to group days according to several factors
add_worked <- tse_func_(function(data) {
  # build the worked / non worked factors
  data$worked <- "worked"
  data$worked[data$wday %in% c(0, 6)] <- "non_worked"
  data$worked[data$holiday != 'Normal'] <- "non_worked"
  factor(data$worked)
}, inputs = NULL)

add_w_we_ph <- tse_func_(function(data) {
  # worked, weekend, ph groups
  data$w_we_ph <- "worked"
  data$w_we_ph[data$wday %in% c(0, 6)] <- "weekend"
  data$w_we_ph[data$holiday != 'Normal'] <- "ph"
  factor(data$w_we_ph)
}, inputs = NULL)

lm_trend <- tse_func_(function(tset){
  trend <- as.data.frame(tset)[, c('time.local', 'x')] %>%
    lm('x ~ time.local', .)
  tse_func_(function(tse){
    predict(trend, tse)
  }, inputs = c('x'))
}, inputs = c('y', 'x'))

delta <- tse_func_(function(tse) {
  tse[, 'x1'] - tse[, 'x2']
},inputs = c('x1', 'x2'))

delta_inv <- tse_func_(function(tse) {
  tse[, 'x1'] + tse[, 'x2']
},inputs = c('x1', 'x2'))

lag4tse <- function(index_dx) {
  tse_func_(function(tse) {
    tse2 <- tse
    for(dx_name in names(index_dx)) {
      tse2[, dx_name] <- lag(tse[, 'x'], index_dx[dx_name]) # lag the column by the corresponding value
    }   
    tse2[, 'x'] <- NULL
    tse2
  }, inputs = 'x')
}

# Examples
# 
# # diff daily cluster model
# ddcm <- D(pbo_(delta, delta_inv, dcm, 'x1', 'x'), c(x1 = 0, x2 = 1))
# 
# # linear regression using drybulb and drybulb.d1
# MdTemp1 <- tse_func_(function(tset) {
#   trained <- lm('x ~ T + T.d1', as.data.frame(tset))
#   tse_func_(function(tse) {
#     predict(trained, as.data.frame(tse))
#   }, inputs = c('T', 'T.d1'))
# }, inputs = c('T', 'T.d1', 'x'))
# 
# 
# MdTemp <- pbi_(lag4tse(c('T' = 0, 'T.d1' = 1)), MdTemp1, 
#                col_y = c('T' = 'T', 'T.d1' = 'T.d1'), 
#                col_x = 'x',
#                in_f = c(x = 'T'), in_M = c(x = 'x'))
EBlonkowski/timeseries documentation built on May 6, 2019, 2:57 p.m.