R/R-views.R

# R-views
# plots and informations about core objects

#------------------------------------------------------
# time-series plot

# plots and time-series, start and end should be between 0 and 1
tse_plot <- function(tse, start = 0, end = 1) {
  vcols <- get_value_cols(tse)
  if(length(vcols) == 0) { return () } # nothing to plot
  stopifnot(0 <= start & start <= 1)
  stopifnot(start <= end & end <= 1)
  
  # select the time period
  l <- dim(tse)[1]
  tse[, 'temp.select_flag'] <- seq_along(tse$time.local) >= start*l &
    seq_along(tse$time.local) <= end*l
  data <- as.data.frame(tse) %>%
    filter_('temp.select_flag') # keep only the period we want
  data <- data[, c('time.local', vcols)] %>%
    melt(id.vars = 'time.local')
  
  # plot
  ggplot(data = data, mapping = aes(x = time.local, y = value, 
                                    color = variable, group = variable)) +
    geom_line()
}

plot.tse <- function(tse, ...) {
  tse_plot(tse, ...)
}

#' Autocorrelation plot
#' @description Right now plots the autocorrelation function only for the first
#' value column of the tse. Can work if the given tse has missing values.
#' @param lag Lag in % of the length of the total timeserie: between 0 and 1
#' @todo Have the function plot the autocorrelation plot for each value column
#' and do a facet plot to show them all in the same window.
autocorr_plot <- function(tse, lag = 25) {
  # select the first value column
  x <- tse[, get_value_cols(tse)[1]]
  acf(x = x, lag.max = sum(dim(tse)[1])*lag, main = "Autocorrelation plot",
      na.action = na.pass)
}

#------------------------------------------------------
# Profiles (daily or other)

plot_profiles <- function(tse, alpha = 0.5) {
  profiles <- tse %>%
    as.data.frame %>%
    melt(id.vars = c('time.day', get_period_in_day(tse)),
         measure.vars = get_value_cols(tse))

  profiles$profile_id <- interaction(profiles$variable, profiles$time.day)
  ggplot(profiles, 
         mapping = aes_string(x = get_period_in_day(tse), y = 'value', 
                              color = 'variable', group = 'profile_id')) +
    geom_line()
}

plot_versus <- function(tse) {
  # if less than 2 tse then cannot plot much
  if(length(get_value_cols(tse)) < 2) {
    warning('Need to select more than one time-serie for a versus plot')
    return(NULL)
  }
  # if 2 tses then makes sense to plot a 'simple' versus plot
  if(length(get_value_cols(tse)) == 2) {
    return(ggplot(data = tse, 
                  mapping = aes_string(x = get_value_cols(tse)[1], 
                                       y = get_value_cols(tse)[2])) +
             geom_point())
  }
  # if there is 3 or more tses we plot each combination in
  # a plot matrix
  pairs(as.data.frame(tse)[, get_value_cols(tse)])
}

#------------------------------------------------------
# Categorical stats

available_analysis <- function(tse) {
  summary(tse$available)
}

# simple bar plot of the number of occurence of each factor
cat_barplot <- function(X) {
  stopifnot(is.factor(X))
  barplot(table(X))
}

# get some information about the spans information of
# one level
cat_level_span <- function(X, this_level) {
  stopifnot(is.factor(X))
  stopifnot(this_level %in% levels(X))
  
  # compute the total number of occurences of this level
  level_total <- sum(X == this_level)
  
  # compute the level spans of this level
  r <- rle(as.character(X))
  level_spans <- r$lengths[r$values == this_level]
  
  # summarize the information per span width
  r <- level_spans %>% sort %>% rle
  level_spans <- data.frame(width = r$values, occurence = r$lengths, 
                            total = r$values * r$lengths) %>%
    arrange(desc(total)) %>%
    mutate(ratio = total / level_total)
  
  level_spans$cumulative <- cumsum(level_spans$ratio)
  level_spans
}
#------------------------------------------------------
# General stats

column_infos <- function(tse) {
  l <- dim(tse)[1]
  data.frame(
    cols = colnames(tse),
    types = sapply(tse, typeof),
    availability = vapply(tse, function(x) 1-sum(is.na(x))/l, FUN.VALUE = c(1))
  )
}
EBlonkowski/timeseries documentation built on May 6, 2019, 2:57 p.m.