R/synthetic_data_generation.R

Defines functions SynthesizePattern ScalePattern WarpPattern AddNoise

Documented in AddNoise ScalePattern SynthesizePattern WarpPattern

#'Randomize a Time Series Pattern
#'
#'Takes an xts object and performs time scaling to change the length of the whole pattern,
#'time warping to randomly change the time positions of individual points to a limited extent, and
#'noise adding to alter the amplitude of individual points randomly.
#'
#'@references
#'Zhe Zhang, Jian Jiang, Xiaoyan Liu, Ricky Lau, Huaiqing Wang, and Rui Zhang. A real time hybrid pattern matching scheme for stock time series. In Proceedings of the Twenty-First Australasian Conference on Database Technologies - Volume 104, ADC ’10, pages 161–170, Darlinghurst, Australia, Australia, 2010. Australian Computer Society, Inc.
#'
#'Xueyuan Gong, Yain-Whar Si, Simon Fong, and Robert P. Biuk-Aghai. Financial time series pattern matching with extended ucr suite and support vector machine. Expert Syst. Appl., 55(C):284–296, August 2016.
#'@param pattern An xts time series
#'@export
SynthesizePattern <- function(pattern, scaling.factor = 4, warping.factor = 'l', noise.factor = 'l'){
  library(xts)

  ###########################################################################################
  #Time Scaling: For a pattern P, add points between each pair of pattern points Pi and Pi-1#
  ###########################################################################################
  scaled.pattern <- ScalePattern(pattern, scaling.factor)

  ###########################################################################################
  #Time Warping
  ###########################################################################################
  warped.scaled.pattern <- WarpPattern(scaled.pattern, warping.factor)



  ###########################################################################################
  #Noise Adding
  ###########################################################################################
  noisy.warped.scaled.pattern <- AddNoise(warped.scaled.pattern, noise.factor)

  return(noisy.warped.scaled.pattern)
}

#'Scale a Time Series Pattern To Add More Points
#'
#'Points are added in between the points of the pattern, with a density cotrolled by
#'the scaling.factor parameter. The shape of the pattern is maintained.
#'
#'@param pattern A time series xts pattern
#'@param scaling.factor The new length of the pattern will be the scaling.factor multipled by the old length
#'@export
ScalePattern <- function(pattern, scaling.factor){
  new.length <- length(pattern)*scaling.factor - length(pattern)
  x.values <- sort(
    runif(
      new.length,
      time( pattern[1] ),
      time( pattern[length(pattern)] )
    )
  )
  #Find the subset of times from x.values between each two pattern points, and calculate the associated y values
  #using the formula y.c = m(x.c - x.1) + y.1, where m = (y.2 - y.1) / (x.2 - x.1). This is just calculating the
  #slope of the line connecting the pattern points, and then using one of the pattern points (Pi-1) and the x value of
  #each point being added between Pi and Pi-1 to find the y value of said point so that it lies on said line, thus
  #maintaining the shape of the pattern.
  #Notation:
  #x.1 is the time of the first pattern point pi-1
  #x.2 is the time of the second pattern point pi
  #y.1 is the value of the first pattern point pi-1
  #y.2 is the value of the second pattern point pi
  #x.c are the times of the points between pi and pi-1, this is a vector
  #y.c are the values of the points between pi and pi-1, this is a vector
  y.c <- vector("numeric", new.length)
  counter <- 1
  for(i in 2:length(pattern)){
    x.1 <- as.numeric(time(pattern[i-1]))
    x.2 <- as.numeric(time(pattern[i]))
    y.1 <- pattern[[i-1]]
    y.2 <- pattern[[i]]
    x.c <- x.values[ x.values > x.1 & x.values < x.2 ]
    slope <- ( pattern[[i]] - pattern[[i-1]] )/(x.2[1] - x.1[1])
    if(length(x.c)!=0){
      for(j in 1:length(x.c)){
        y.c[counter] <- slope*(x.c[j]-x.1) + y.1[[1]]
        counter <- counter+1
      }
    }
  }
  scaled.points <- c( pattern, xts(y.c, order.by = as.POSIXlt(x.values, origin = "1970-01-01 00:00:00" )))
  return(scaled.points)
}

#'Warp the Pattern Locally in the Time Dimension
#'
#'Add noise to the pattern in the time (x) dimension
#'@param pattern A time series xts pattern
#'@param warping.factor May be set to 'l', 'm', or 'h', designating "low", "medium", and "high", respectively.
#'Controls the magnitude of warping
#'@export
WarpPattern <- function(pattern, warping.factor){
  if(warping.factor == 'l'){
    modifier <- 1/3
  }
  if(warping.factor == 'm'){
    modifier <- 2/3
  }
  if(warping.factor == 'h'){
    modifier <- 1
  }

  for(i in 2:(length(pattern)-1)){
    l.interval <- time(pattern[i-1]) - time(pattern[i])
    r.interval <- time(pattern[i+1]) - time(pattern[i])
    attr(pattern, 'index')[i] <- as.POSIXct(time(pattern[i]) + modifier*runif(1, l.interval, r.interval))
  }

  return(pattern)
}

#'Warp the Pattern Locally in the Time Dimension
#'
#'Add noise to the pattern in the y dimension
#'@param pattern A time series xts pattern
#'@param noise.factor May be set to 'l', 'm', or 'h', designating "low", "medium", and "high", respectively.
#'Controls the magnitude of thee noise added
#'@export
AddNoise <- function(pattern, noise.factor){
  if(noise.factor == 'l'){
    modifier <- runif(1, 1, 4)
  }
  if(noise.factor == 'm'){
    modifier <- runif(1, 4, 7)
  }
  if(noise.factor == 'h'){
    modifier <- runif(1, 7, 10)
  }
    for(i in 2:(length(pattern)-1)){
      if(runif(1, 0, 1) > 0.5){
      pattern[[i]] <- pattern[[i]] + modifier*(pattern[[i+1]] - pattern[[i]])
      }
    }
  return(pattern)
}
joshmarsh/TSTestDataUtil documentation built on May 19, 2019, 8:54 p.m.