R/surrogate.data.R
In nonlinearTseries: Nonlinear Time Series Analysis

Documented in FFTsurrogate surrogateTest

#' Generate surrogate data using the Fourier transform
#' @description
#' Generates surrogate samples from the original time series.
#' @details
#' This function uses the phase randomization procedure for 
#' generating the surrogated data. This algorithm generates surrogate data with
#'  the same mean and autocorrelation function (and thus, the same power 
#'  spectrum because of the Wiener-Khinchin theorem) as the original time 
#'  series.
#' 
#' The phase randomization algorithm is often used when the null hypothesis 
#' being tested consist on the assumption that the time.series data comes from 
#' a stationary linear stochastic process with Gaussian inputs. The phase 
#' randomization preserves the Gaussian distribution.
#' @param time.series The original time.series from which the surrogate data 
#' is generated.
#' @param n.samples The number of surrogate data sets to generate,
#' @return A matrix containing the generated surrogate data (one time series 
#' per row).
#' @references H. Kantz  and T. Schreiber: Nonlinear Time series Analysis 
#' (Cambridge university press)
#' @author Constantino A. Garcia
#' @examples
#' \dontrun{
#' # generate 20 surrogate sets using as original time series
#' # an arma(1,1) simulation
#' time.series = arima.sim(list(order = c(1,0,1), ar = 0.6, ma = 0.5), n = 200)
#' surrogate = FFTsurrogate(time.series, 20)
#' }
#' @export FFTsurrogate
FFTsurrogate = function(time.series,n.samples=1){
  fourier.transform = fft(time.series)
  length.data = length(time.series)
  surrogate.data = matrix(0, nrow = n.samples, ncol = length.data)
  for (counter in 1:n.samples) {
    surrogate.data[counter,] = generateSingleFFTsurr(fourier.transform, 
                                                     length.data)
  }
  surrogate.data
}

#' Surrogate data testing 
#' @details
#' This function tests the null hypothesis (H0) stating that the series 
#' is a gaussian linear process. The test is performed by generating several 
#' surrogate data according to H0 and comparing the values of a discriminating
#' statistic between both original data and the surrogate data. If the value of 
#' the statistic is significantly different for the original series than for the
#' surrogate set, the null hypothesis is rejected and nonlinearity assumed.  
#' 
#' To test with a significance level of \eqn{\alpha}{alpha} if the statistic
#' from the original data is smaller than the expected  value under the null
#' hypothesis (a one-side test),  \eqn{K/\alpha - 1}{K/alpha - 1} surrogates
#' are generated. The null hypothesis is then rejected if the statistic from
#'  the data has one of the K smallest values. For a two-sided test, 
#' \eqn{2K/\alpha - 1}{2K/alpha - 1} surrogates are generated. The null 
#' hypothesis is rejected if the statistic from the data gives one of the K 
#' smallest or largest values.
#' 
#' The  surrogate data is generated by using a phase randomization procedure.
#' @param time.series The original time.series from which the surrogate data is
#' generated.
#' @param significance Significance of the test
#' @param one.sided Logical value. If \emph{TRUE}, the routine runs a one-side
#' test. If \emph{FALSE}, a two-side test is applied (default).
#' @param alternative Specifies the concrete type of one-side test that should be 
#' performed: If the the user wants to test if the statistic from the original
#' data is smaller (\emph{alternative="smaller"}) or larger 
#' (\emph{alternative="larger"}) than the expected value under the
#'  null hypothesis.
#' @param K Integer controlling the number of surrogates to be generated (see
#' details). 
#' @param FUN The function that computes the discriminating statistic that shall
#'  be used for testing.
#' @param verbose Logical value. If TRUE, a brief summary of the test is shown.
#' @param do.plot Logical value. If TRUE, a graphical representation of the 
#' statistic value for  both surrogates and original data is shown.
#' @param main an overall title for the plot.
#' @param xlab a title for the x axis.
#' @param ylab a title for the y axis. 
#' @param ... Additional arguments for the FUN function.
#' @return  A list containing the values of the statistic for the surrogates 
#' (\emph{surrogates.statistics} field) and the value for the original time 
#' series (\emph{data.statistic})
#' @references SCHREIBER, Thomas; SCHMITZ, Andreas. Surrogate time series. 
#' Physica D: Nonlinear Phenomena, 2000, vol. 142, no 3, p. 346-382.
#' @author Constantino A. Garcia
#' @export surrogateTest
#' @examples
#' \dontrun{
#'  lx = lorenz(do.plot=F)$x 
#'  sdt = surrogateTest(time.series = lx,significance = 0.05,
#'                      K=5, one.sided = FALSE, FUN=timeAsymmetry) 
#' }
surrogateTest = function(time.series, significance = 0.05, one.sided=FALSE,
                         alternative = c("smaller", "larger"), K = 1, FUN, 
                         verbose = TRUE, do.plot = TRUE,  
                         xlab = "Values of the statistic", ylab = "", 
                         main = "Surrogate data testing", 
                         ...) {
  ktol = 1e-6
  
  if (K < 1) {
    stop("Invalid K (K < 1 not allowed")
  }
  
  # make sure that K is an integer
  if (abs(K %% 1) > ktol) {
    warning("K is not an integer... rounding it!")
    K = round(K)
  }
  
  if (one.sided) {
    alternative = match.arg(alternative)
    n.surrogates = ceiling(K / significance - 1 )
  } else {
    n.surrogates = ceiling((2*K) / significance - 1 )
  }
  surrogates = FFTsurrogate(time.series, n.samples = n.surrogates)
  nltest = list()
  if (verbose) {
    message("Computing statistics\n")  
  }
  surrogates.statistics = apply(surrogates, MARGIN = 1, FUN = FUN, ...)  
  FUN = match.fun(FUN)
  data.statistic = FUN(time.series,...)
  
  if (verbose) {
    if (one.sided) {
      sdtMessage1s(data.statistic, surrogates.statistics, alternative, K)
    }else{
      sdtMessage2s(data.statistic, surrogates.statistics, K)
    }
  }
  nltest$surrogates.statistics = surrogates.statistics
  nltest$data.statistic = data.statistic
  class(nltest) = "surrogateTest"
  
  if (do.plot) {
    plot(nltest, main = main, xlab = xlab, ylab = ylab)
  }
  nltest
}

#' @export
plot.surrogateTest = function(x, xlab = "Values of the statistic", ylab = "", 
                              main="Surrogate data testing",
                              type = "h", lwd = 2, 
                              xlim = NULL, ylim = NULL, 
                              add.legend = T, ...) {
  surrogates.statistics = x$surrogates.statistics 
  data.statistic = x$data.statistic 
  
  if (is.null(xlim)) {
    xlim = range(data.statistic, surrogates.statistics)
  }
  if (is.null(ylim)) {
    ylim = c(0, 3)
  } 
    
  plot(surrogates.statistics, rep(1, length(surrogates.statistics)), 
       xlim = xlim, ylim = ylim, 
       type = type, lwd = lwd, main = main, xlab = xlab, ylab = ylab, 
       col = 1, lty = 1)
  lines(data.statistic,2, type = type, lwd = lwd, col = 2, lty = 2)
  
  if (add.legend) {
    legend("top",bty = "n", legend = c("Surrogate data", "Original data"), 
           col = 1:2,  lty = c(1, 2), lwd = lwd)
  }
}

# Surrogate Data Testing two-sided Message 
sdtMessage2s = function(data.statistic, surrogates.statistics,K){
  sorted.values = sort( c(data.statistic,surrogates.statistics) ) 
  # threshold for being significant smaller (being among the K smallest)
  min.st = sorted.values[K]
  # threshold for being significant larger (being among the K largest)
  len = length(sorted.values)
  max.st = sorted.values[[len - K + 1]]
  message("Null Hypothesis: Data comes from a linear stochastic process")
  if (data.statistic <= min.st) {
    message(paste("Reject Null hypothesis:\n",
                  "\tOriginal data's stat is significant smaller",
                  "than surrogates' stats"))
  } else {
    if (data.statistic >= max.st) {
      message(paste("Reject Null hypothesis:\n",
                    "\tOriginal data's stat is significant larger",
                    "than surrogates' stats"))
    } else {
      message("Accept Null hypothesis")
    }  
  }
}

# Surrogate one sided Data Testing 
sdtMessage1s = function(data.statistic, surrogates.statistics, alternative, K) {
  sorted.values = sort(c(data.statistic, surrogates.statistics)) 
  # threshold for being significant smaller (being among the K smallest)
  min.st = sorted.values[K]
  # threshold for being significant larger (being among the K largest)
  len = length(sorted.values)
  max.st = sorted.values[[len - K + 1]]
  
  message("Null Hypothesis: Original data comes from a linear stochastic process")
  if ((alternative == "smaller") && (data.statistic <= min.st)) {
    message(paste("Reject Null hypothesis:\n",
                  "\tOriginal data's stat is significant smaller",
                  "than surrogates' stats"))
  } else {
    if ((alternative == "larger") && (data.statistic >= max.st)) {
      message(paste("Reject Null hypothesis:\n",
                    "\tOriginal data's stat is significant larger",
                    "than surrogates' stats"))
    }else{
      message("Accept Null hypothesis")
    }  
  }
}


# private function
# generates one surrogate data set using the fourier.transform Fourier transform 
# of length length.data
generateSingleFFTsurr = function(fourier.transform, length.data) {
  # we must respect the hermitian simmetry to generate real time series
  # phase[2] = - phase[N]; phase[3] = -phase[N-1]... phase[k] = -phase[N-k+2]
  
  # Differentiate the odd and the even case
  if (length.data %% 2 == 0) {
    # even case
    for (k in 1:(length.data / 2 - 1)) {
      phase = runif(1, min = 0, 2 * pi)
      fourier.transform[[k + 1]] = (
        abs(fourier.transform[[k + 1]]) * exp(1i * phase)
      )
      fourier.transform[[length.data - k + 1]] = (
        abs(fourier.transform[[length.data - k + 1]]) * exp(-1i * phase)
      )
    }
  }else{
    # odd case
    for (k in 1:((length.data - 1) / 2)) {
      phase = runif(1, min = 0, 2 * pi)
      fourier.transform[[k + 1]] = (
        abs(fourier.transform[[k + 1]]) * exp(1i * phase)
      )
      fourier.transform[[length.data - k + 1]] = (
        abs(fourier.transform[[length.data - k + 1]]) * exp(-1i * phase)
      )
    }
  }
  # give warning if some imaginary part is too big 
  Re(fft(fourier.transform, inverse = "TRUE")) / length.data
}