runminmax: Minimum and Maximum of Moving Windows
In caTools: Tools: Moving Window Statistics, GIF, Base64, ROC AUC, etc

runmin & runmax

R Documentation

Minimum and Maximum of Moving Windows

Description

Moving (aka running, rolling) Window Minimum and Maximum calculated over a vector

Usage

  runmin(x, k, alg=c("C", "R"),
         endrule=c("min", "NA", "trim", "keep", "constant", "func"),
         align = c("center", "left", "right"))
  runmax(x, k, alg=c("C", "R"),
         endrule=c("max", "NA", "trim", "keep", "constant", "func"),
         align = c("center", "left", "right"))

Arguments

`x`	numeric vector of length n or matrix with n rows. If `x` is a matrix than each column will be processed separately.
`k`	width of moving window; must be an integer between one and n
`endrule`	character string indicating how the values at the beginning and the end, of the array, should be treated. Only first and last `k2` values at both ends are affected, where `k2` is the half-bandwidth `k2 = k %/% 2`. `"min"` & `"max"` - applies the underlying function to smaller and smaller sections of the array. In case of min equivalent to: `for(i in 1:k2) out[i]=min(x[1:(i+k2)])`. Default. `"trim"` - trim the ends; output array length is equal to `length(x)-2*k2 (out = out[(k2+1):(n-k2)])`. This option mimics output of `apply` `(embed(x,k),1,FUN)` and other related functions. `"keep"` - fill the ends with numbers from `x` vector `(out[1:k2] = x[1:k2])` `"constant"` - fill the ends with first and last calculated value in output array `(out[1:k2] = out[k2+1])` `"NA"` - fill the ends with NA's `(out[1:k2] = NA)` `"func"` - same as `"min"` & `"max"` but implimented in R. This option could be very slow, and is included mostly for testing Similar to `endrule` in `runmed` function which has the following options: “`c("median", "keep", "constant")`” .
`alg`	an option allowing to choose different algorithms or implementations. Default is to use of code written in C (option `alg="C"`). Option `alg="R"` will use slower code written in R. Useful for debugging and studying the algorithm.
`align`	specifies whether result should be centered (default), left-aligned or right-aligned. If `endrule`="min" or "max" then setting `align` to "left" or "right" will fall back on slower implementation equivalent to `endrule`="func".

Details

Apart from the end values, the result of y = runFUN(x, k) is the same as “for(j=(1+k2):(n-k2)) y[j]=FUN(x[(j-k2):(j+k2)], na.rm = TRUE)”, where FUN stands for min or max functions. Both functions can handle non-finite numbers like NaN's and Inf's the same way as min(x, na.rm = TRUE)).

The main incentive to write this set of functions was relative slowness of majority of moving window functions available in R and its packages. With the exception of runmed, a running window median function, all functions listed in "see also" section are slower than very inefficient “apply(embed(x,k),1,FUN)” approach. Relative speeds runmin and runmax functions is O(n) in best and average case and O(n*k) in worst case.

Both functions work with infinite numbers (NA,NaN,Inf, -Inf). Also default endrule is hardwired in C for speed.

Value

Returns a numeric vector or matrix of the same size as x. Only in case of endrule="trim" the output vectors will be shorter and output matrices will have fewer rows.

Author(s)

Jarek Tuszynski (SAIC) jaroslaw.w.tuszynski@saic.com

Examples

  # show plot using runmin, runmax and runmed
  k=25; n=200;
  x = rnorm(n,sd=30) + abs(seq(n)-n/4)
  col = c("black", "red", "green", "blue", "magenta", "cyan")
  plot(x, col=col[1], main = "Moving Window Analysis Functions")
  lines(runmin(x,k), col=col[2])
  lines(runmean(x,k), col=col[3])
  lines(runmax(x,k), col=col[4])
  legend(0,.9*n, c("data", "runmin", "runmean", "runmax"), col=col, lty=1 )

  # basic tests against standard R approach
  a = runmin(x,k, endrule="trim") # test only the inner part 
  b = apply(embed(x,k), 1, min)   # Standard R running min
  stopifnot(all(a==b));
  a = runmax(x,k, endrule="trim") # test only the inner part
  b = apply(embed(x,k), 1, max)   # Standard R running min
  stopifnot(all(a==b));
  
  # test against loop approach
  k=25; 
  data(iris)
  x = iris[,1]
  n = length(x)
  x[seq(1,n,11)] = NaN;                # add NANs
  k2 = k
  k1 = k-k2-1
  a1 = runmin(x, k)
  a2 = runmax(x, k)
  b1 = array(0,n)
  b2 = array(0,n)
  for(j in 1:n) {
    lo = max(1, j-k1)
    hi = min(n, j+k2)
    b1[j] = min(x[lo:hi], na.rm = TRUE)
    b2[j] = max(x[lo:hi], na.rm = TRUE)
  }
  # this test works fine at the R prompt but fails during package check - need to investigate
  ## Not run:  
  stopifnot(all(a1==b1, na.rm=TRUE));
  stopifnot(all(a2==b2, na.rm=TRUE));
  
## End(Not run)
  
  # Test if moving windows forward and backward gives the same results
  # Two data sets also corespond to best and worst-case scenatio data-sets
  k=51; n=200;
  a = runmin(n:1, k) 
  b = runmin(1:n, k)
  stopifnot(all(a[n:1]==b, na.rm=TRUE));
  a = runmax(n:1, k)
  b = runmax(1:n, k)
  stopifnot(all(a[n:1]==b, na.rm=TRUE));

  # test vector vs. matrix inputs, especially for the edge handling
  nRow=200; k=25; nCol=10
  x = rnorm(nRow,sd=30) + abs(seq(nRow)-n/4)
  x[seq(1,nRow,10)] = NaN;              # add NANs
  X = matrix(rep(x, nCol ), nRow, nCol) # replicate x in columns of X
  a = runmax(x, k)
  b = runmax(X, k)
  stopifnot(all(a==b[,1], na.rm=TRUE));        # vector vs. 2D array
  stopifnot(all(b[,1]==b[,nCol], na.rm=TRUE)); # compare rows within 2D array
  a = runmin(x, k)
  b = runmin(X, k)
  stopifnot(all(a==b[,1], na.rm=TRUE));        # vector vs. 2D array
  stopifnot(all(b[,1]==b[,nCol], na.rm=TRUE)); # compare rows within 2D array

  # Compare C and R algorithms to each other for extreme window sizes
  numeric.test = function (x, k) {
    a = runmin( x, k, alg="C")
    b = runmin( x, k, alg="R")
    c =-runmax(-x, k, alg="C")
    d =-runmax(-x, k, alg="R")
    stopifnot(all(a==b, na.rm=TRUE));
    #stopifnot(all(c==d, na.rm=TRUE)); 
    #stopifnot(all(a==c, na.rm=TRUE));
    stopifnot(all(b==d, na.rm=TRUE));
  }
  n=200;                               # n is an even number
  x = rnorm(n,sd=30) + abs(seq(n)-n/4) # random data
  for(i in 1:5) numeric.test(x, i)     # test for small window size
  for(i in 1:5) numeric.test(x, n-i+1) # test for large window size
  n=201;                               # n is an odd number
  x = rnorm(n,sd=30) + abs(seq(n)-n/4) # random data
  for(i in 1:5) numeric.test(x, i)     # test for small window size
  for(i in 1:5) numeric.test(x, n-i+1) # test for large window size
  n=200;                               # n is an even number
  x = rnorm(n,sd=30) + abs(seq(n)-n/4) # random data
  x[seq(1,200,10)] = NaN;              # with some NaNs
  for(i in 1:5) numeric.test(x, i)     # test for small window size
  for(i in 1:5) numeric.test(x, n-i+1) # test for large window size
  n=201;                               # n is an odd number
  x = rnorm(n,sd=30) + abs(seq(n)-n/4) # random data
  x[seq(1,200,2)] = NaN;               # with some NaNs
  for(i in 1:5) numeric.test(x, i)     # test for small window size
  for(i in 1:5) numeric.test(x, n-i+1) # test for large window size

  # speed comparison
  ## Not run: 
  n = 1e7;  k=991; 
  x1 = runif(n);                       # random data - average case scenario
  x2 = 1:n;                            #  best-case scenario data for runmax
  x3 = n:1;                            # worst-case scenario data for runmax
  system.time( runmax( x1,k,alg="C"))  # C alg on average data O(n)
  system.time( runmax( x2,k,alg="C"))  # C alg on  best-case data O(n)
  system.time( runmax( x3,k,alg="C"))  # C alg on worst-case data O(n*k)
  system.time(-runmin(-x1,k,alg="C"))  # use runmin to do runmax work
  system.time( runmax( x1,k,alg="R"))  # R version of the function
  x=runif(1e5); k=1e2;                 # reduce vector and window sizes
  system.time(runmax(x,k,alg="R"))     # R version of the function
  system.time(apply(embed(x,k), 1, max)) # standard R approach 
  
## End(Not run)

caTools documentation built on Sept. 11, 2024, 6:06 p.m.