R/ecomplex.R

Defines functions get_epsilons.all get_epsilons.lift get_epsilons.cspline get_epsilons.bspline get_epsilons ecomplex

Documented in ecomplex get_epsilons

#' Compute the epsilon-complexity of a time series.
#'
#' @param x A vector of points. 
#' @param ds Number of times to downsample the input sequence.
#' @param method The interpolation or approximation method. One of
#'                c("bspline", "cspline")
#' @param max_degree The maximum order spline used in the approximation
#'              step
#' @param err_norm The norm type used in computing the approximation error.
#' @param sample_type The downsampling type. Either randomly sampled  
#'                      or downsampled in integer steps.
#'
#' @return A \code{list} with :
#' \tabular{ll}{
#' \code{A}        \tab The epsilon-complexity intercept coefficient \cr
#' \code{B}        \tab The epsilon-complexity slope coefficient \cr
#' \code{fit}      \tab The full linear model generated by fitting log(epsilons) ~ log(S) using \code{lm()}. \cr
#' \code{epsilons} \tab The mean sum of absolute errors at each downsample level. \cr
#' \code{S}        \tab The fraction of samples maintained at each downsample level. \cr
#' \code{method}  \tab The method used or a list of methods if method 
#'                      "all" is used. 
#'}
#'@export
#'@importFrom stats lm coefficients
ecomplex <- function(x, ds = 6, 
                        max_degree = 5,
                        method = c("cspline", "bspline", "lift", "all"), 
                        err_norm = c("mae", "mse", "max"), 
                        sample_type = c("step", "random")) {

  if (!is.null(dim(x))) stop("Data must be a vector of numeric values")
  x <- as.numeric(x)  
  if (anyNA(x))         stop("Data contains NA values")
  if (length(x) < 100)  warning("Complexity estimate may not be stable ", 
                                "for short series")  
  x <- normalize(x)
  method   <- match.arg(method)
  err_norm <- match.arg(err_norm) 
  sample_type <- match.arg(sample_type)

  func <- structure(list(x     = x, 
                         ds    = ds, 
                         deg   = max_degree, 
                         err_norm  = err_norm, 
                         sample_type = sample_type),
                         class = method)

  # Compute error for each downsample level up to 'ds'
  res <- get_epsilons(func)  
  S   <- 1 / (2:(length(res$epsilons) + 1))
  epsilons <- res$epsilons
  method   <- res$methods

  # Catch lm() errors silently and return NA values
  A <- B <- fit <- NA
  try({      
     fit <- lm(log(epsilons) ~ log(S))
     A   <- unname(stats::coef(fit)[1])
     B   <- unname(stats::coef(fit)[2])
  }, silent = TRUE )

  if(is.na(A) || is.na(B)) warning("Coefficients could not be computed.", 
                          " Check data for invalid values.")

  structure(list(A        = A,
                 B        = B,
                 fit      = fit, 
                 epsilons = epsilons, 
                 S        = S,
                 method   = method, 
                 err_norm = err_norm, 
                 sample_type = sample_type), 
                 class    = "ecomplex")
}


#' Compute epsilon errors for a time series.
#'
#' Computes the mean absolute error (MAE) of a time
#'  series for each downsample level using an 
#'  interpolation (or approximation) method of type
#'  basis-spline, cubic spline or lifting sche 
#'
#' @param func A structure with the time series, interpolation method, 
#'              and parameters for the method.
#' @return A \code{list} with :
#' \tabular{ll}{
#' \code{epsilons} \tab The mean sum of absolute errors at each level \cr
#' \code{methods}  \tab The method used or a list of methods if method 
#'                      "all" is used. 
#'}
get_epsilons <- function(func) UseMethod("get_epsilons")

get_epsilons.bspline <- function(func){
  epsilons <- double(func$ds - 1)
  ds <- 2:func$ds
    for (k in ds) {
    epsilons[k - 1] <- bspline_err(func$x, sample_num = k, max_degree = func$deg)
  }
  list(epsilons = epsilons, methods = class(func))
}

get_epsilons.cspline <- function(func){
  epsilons <- double(func$ds - 1)
  ds <- 2:func$ds
    for (k in ds) {
    epsilons[k - 1] <- cspline_err(func$x, sample_num = k, 
                                           max_degree = func$deg, 
                                           err_norm = func$err_norm, 
                                           sample_type = func$sample_type)
  }
  list(epsilons = epsilons, methods = class(func))
}

get_epsilons.lift <- function(func) {
  ds <- min(func$ds, 6)
  epsilons <- unlist(lapply((2:ds), function(y) interp_err(func$x, iwt_mod(y))))
  list(epsilons = epsilons, methods = class(func))
}

# Find best fit among all methods. If the series length
# is longer than 500, this defaults to using just the 
# cubic spline and lift methods.
get_epsilons.all <- function(func){
  methods <- c("cspline", "bspline", "lift")
  if (length(func$x) > 500 ) {
    methods <- c("cspline", "lift")
  }
  eps <- lapply(methods, 
                function(method) get_epsilons({class(func) <- method; func}))
  eps <- lapply(eps, function(eps) eps$epsilons)
  df <- data.frame(do.call(cbind, eps))
  names(df) <- methods
  # get minimum epsilons
  epsilons <- apply(df, 1, min)
  methods_used  <- methods[apply(df, 1, which.min)]
  list(epsilons = epsilons, methods = methods_used)
}
nateaff/ecomplex documentation built on Aug. 19, 2017, 12:16 a.m.