#' Extract features from a collection of time series
#'
#' @description This function extract time series features from a collection of time series.
#' This is a modification oftsmeasures function of anomalous package
#' package .
#' @param normalise If TRUE, each time series is scaled to be normally distributed with mean 0 and sd 1
#' @param width A window size for variance change, level shift and lumpiness
#' @param window A window size for KLscore
#' @param y A multivariate time serie
#' @return An object of class features with the following components:
#' \item{mean}{Mean}
#' \item{variance}{Variance}
#' \item{lumpiness}{Variance of annual variances of remainder}
#' \item{lshift}{Level shift using rolling window}
#' \item{vchange}{Variance change}
#' \item{linearity}{Strength of linearity}
#' \item{curvature}{Strength of curvature}
#' \item{spikiness}{Strength of spikiness}
#' \item{season}{Strength of seasonality}
#' \item{peak}{Strength of peaks}
#' \item{trough}{Strength of trough}
#' \item{BurstinessFF}{Burstiness of time series using Fano Factor}
#' \item{minimum}{Minimum value}
#' \item{maximum}{Maximum value}
#' \item{rmeaniqmean}{Ratio between interquartile mean and the arithmetic mean }
#' \item{moment3}{Third moment}
#' \item{highlowmu}{Ratio between the means of data that is below and upper the global mean}
#' @seealso \code{\link{find_odd_streams}}, \code{\link{get_pc_space}},
#' \code{\link{set_outlier_threshold}}, \code{\link{gg_featurespace}}
#'
#' @export
#' @importFrom moments moment
#' @importFrom RcppRoll roll_mean
#' @importFrom RcppRoll roll_var
#' @importFrom mgcv gam
#' @import stats
#' @references {Hyndman, R. J., Wang, E., & Laptev, N. (2015). Large-scale unusual time series detection.
#' In 2015 IEEE International Conference on Data Mining Workshop (ICDMW), (pp. 1616-1619). IEEE.}\cr
#'
#' {Fulcher, B. D. (2012). Highly comparative time-series analysis. PhD thesis, University of Oxford.}
#' @examples
#' \donttest{
#' mvtsplot::mvtsplot(anomalous_stream, levels=8, gcol=2, norm="global")
#' features <- extract_tsfeatures(anomalous_stream[500:550, ])
#' plot.ts(features[, 1:10])
#' }
#'
extract_tsfeatures <- function(y, normalise = TRUE, width = ifelse(frequency(y) > 1, frequency(y), 10), window = width) {
# y: a multivariate time series normalise: TRUE: scale data to be normally distributed width: a window size for variance
# change and level shift, lumpiness window: a window size for KLscore
y <- as.ts(y)
tspy <- tsp(y)
freq <- frequency(y)
# Remove columns containing all NAs
nay <- is.na(y)
allna <- apply(nay, 2, all)
x <- y[, !allna]
# Normalise data to mean = 0, sd = 1
if (normalise) {
x <- as.ts(scale(x, center = TRUE, scale = TRUE))
}
# create a location to store the measures
measures <- list()
# measure1 - Calculate mean of ts
measures$mean <- colMeans(y, na.rm = TRUE)
# measure2 - Calculate variance of ts
measures$variance <- apply(y, 2, var, na.rm = TRUE)
# measure5 - Lumpiness
measures$lumpiness <- apply(y, 2, Lump, width = width)
# measure6 - Level shift using rolling window
measures$lshift <- apply(y, 2, RLshift, width = width)
# measure7 - variance change using rolling window
measures$vchange <- apply(y, 2, RVarChange, width = width)
# measure11,12,13,14,15,16,17 - Strength of trend and seasonality and spike
varts <- apply(y, 2, VarTS, tspx = tspy)
# measures$trend <- sapply(varts, function(y) y$trend)
measures$linearity <- sapply(varts, function(y) y$linearity)
measures$curvature <- sapply(varts, function(y) y$curvature)
measures$spikiness <- sapply(varts, function(y) y$spike)
if (freq > 1) {
measures$season <- sapply(varts, function(y) y$season)
measures$peak <- sapply(varts, function(y) y$peak)
measures$trough <- sapply(varts, function(y) y$trough)
}
# measure19 - burstiness of time series
measures$BurstinessFF <- apply(y, 2, BurstFF)
# measure21,22 - Calculate the time series length)
minmax <- apply(y, 2, function(x) tsminmax(x)) # original data set
measures$minimum <- sapply(minmax, function(x) x$mn)
measures$maximum <- sapply(minmax, function(x) x$mx)
# measure28 - the ratio between interquartile mean and the arithmetic mean
measures$rmeaniqmean <- apply(y, 2, rmeaniqmean) # apply to original data
# measure29 - Calculate the moments moments <- apply(y, 2, dmoments) #apply to original data set measures$moment3 <-
# sapply(moments, function(y) y$m3)
measures$moment3 <- apply(y, 2, dmoments)
# measure32 - compare the means of data that is below and upper the global mean
measures$highlowmu <- apply(y, 2, highlowmu) # apply to original data
# get all the measures to one data frame
tmp <- do.call(cbind, measures)
nr <- ncol(y)
nc <- length(measures)
mat <- matrix(, nrow = nr, ncol = nc)
colnames(mat) <- colnames(tmp)
mat[!allna, ] <- tmp
out <- structure(mat, class = c("features", "matrix"))
return(out)
}
### A function to calculate Lumpiness: cannot be used for yearly data first divide a series into blocks. Then the variances
### of each block are computed. The variance of the variances across these blocks measures the 'lumpiness' of the series.
### measure5 - Lumpiness
Lump <- function(x, width) {
start <- seq(1, nr <- length(x), by = width)
end <- seq(width, nr + width, by = width)
nsegs <- nr / width
varx <- sapply(1:nsegs, function(idx) var(x[start[idx]:end[idx]], na.rm = TRUE))
lumpiness <- var(varx, na.rm = TRUE)
return(lumpiness)
}
### A function to calculate Level shift using rolling window The 'level shift' is defined as the maximum difference in mean
### between consecutive blocks of 10 observations measure6 - Level shift using rolling window
RLshift <- function(x, width) {
rollmean <- RcppRoll::roll_mean(x, width, na.rm = TRUE)
lshifts <- tryCatch(max(abs(diff(rollmean, width)), na.rm = TRUE), warning = function(w) w)
if (any(class(lshifts) == "warning")) {
lshifts <- NA
}
return(lshifts)
}
### A function to calculate the maximum difference in variance using rolling window The 'variance change' is defined as the
### maximum difference in variance between consecutive blocks of 10 observations measure7 - variance change using rolling
### window
RVarChange <- function(x, width) {
rollvar <- RcppRoll::roll_var(x, width, na.rm = TRUE)
vchange <- tryCatch(max(abs(diff(rollvar, width)), na.rm = TRUE), warning = function(w) w)
if (any(class(vchange) == "warning")) {
vchange <- NA
}
return(vchange)
}
## A function to find Strength of trend and seasonality and spike Some of our features rely on a robust STL decomposition
## [3]. For example, the size and location of the peaks and troughs in the seasonal component are used, and the spikiness
## feature is the variance of the leave-one-out variances of the remainder component. Other features measure structural
## changes over time. measure11-17 - Strength of trend and seasonality and spike
VarTS <- function(x, tspx) {
x <- as.ts(x)
tsp(x) <- tspx
freq <- tspx[3]
contx <- try(na.contiguous(x), silent = TRUE)
len.contx <- length(contx)
if (length(contx) < 2 * freq || class(contx) == "try-error") {
trend <- linearity <- curvature <- season <- spike <- peak <- trough <- NA
} else {
if (freq > 1L) {
all.stl <- stl(contx, s.window = "periodic", robust = TRUE)
starty <- start(contx)[2L]
pk <- (starty + which.max(all.stl$time.series[, "seasonal"]) - 1L) %% freq
th <- (starty + which.min(all.stl$time.series[, "seasonal"]) - 1L) %% freq
pk <- ifelse(pk == 0, freq, pk)
th <- ifelse(th == 0, freq, th)
trend0 <- all.stl$time.series[, "trend"]
fits <- trend0 + all.stl$time.series[, "seasonal"]
adj.x <- contx - fits
v.adj <- var(adj.x, na.rm = TRUE)
detrend <- contx - trend0
deseason <- contx - all.stl$time.series[, "seasonal"]
peak <- pk * max(all.stl$time.series[, "seasonal"], na.rm = TRUE)
trough <- th * min(all.stl$time.series[, "seasonal"], na.rm = TRUE)
remainder <- all.stl$time.series[, "remainder"]
season <- ifelse(var(detrend, na.rm = TRUE) < 1e-10, 0, max(0, min(1, 1 - v.adj / var(detrend, na.rm = TRUE))))
} else {
# No seasonal component
tt <- 1:len.contx
trend0 <- fitted(mgcv::gam(contx ~ s(tt)))
remainder <- contx - trend0
deseason <- contx - trend0
v.adj <- var(trend0, na.rm = TRUE)
}
trend <- ifelse(var(deseason, na.rm = TRUE) < 1e-10, 0, max(0, min(1, 1 - v.adj / var(deseason, na.rm = TRUE))))
n <- length(remainder)
v <- var(remainder, na.rm = TRUE)
d <- (remainder - mean(remainder, na.rm = TRUE))^2
varloo <- (v * (n - 1) - d) / (n - 2)
spike <- var(varloo, na.rm = TRUE)
pl <- poly(1:len.contx, degree = 2)
tren.coef <- coef(lm(trend0 ~ pl))[2:3]
linearity <- tren.coef[1]
curvature <- tren.coef[2]
}
if (freq > 1) {
return(list(trend = trend, season = season, spike = spike, peak = peak, trough = trough, linearity = linearity, curvature = curvature))
} else {
# No seasonal component
return(list(trend = trend, spike = spike, linearity = linearity, curvature = curvature))
}
}
## A function to calculate the burstiness of time series using Fano Factor Returns the 'burstiness' statistic: $(\sigma^2)
## / \mu$. Another measures of burstiness is the Fano Factor: a ratio between the variance and the mean . In statistis Fano
## Factor , like the coefficient of variation, is a measure of dispersion of proability distributions of a Fano noise. Fano
## factor is defined as $(\sigma^2) / \mu$. measure19 - Calculate the burstiness statistic
BurstFF <- function(x) {
B <- (sd(x, na.rm = TRUE)^2) / mean(x, na.rm = TRUE)
return(B)
}
### Time series Minimum and Maximum Returns the minimum and maximum of the time series measure21,22 - Calculate the time
### series minimum and maximum
tsminmax <- function(x) {
return(list(mn = min(x, na.rm = TRUE), mx = max(x, na.rm = TRUE)))
}
### a function to calculate the ratio between trimmed mean to mean the ratio between interquartile mean and the arithmetic
### mean of the scaled data. Low values (values closer to zero) indicate the presence of outliers. measure28 - the ratio
### between interquartile mean and the arithmetic mean
rmeaniqmean <- function(x) {
out <- mean(x, trim = 0.5, na.rm = TRUE) / mean(x, na.rm = TRUE)
return(out)
}
## A function to calculate the moments Returns the moment of the distribution (the measure moments, m of the distribution,
## for m= 3,4,5,...,11.) of the input time series, normalizes by the standard deviation. Output This operation Uses the
## moments package in R measure29 - moments m of the distribution
dmoments <- function(x) {
momentssd <- 0
momentssd <- moments::moment(x, order = 3, na.rm = TRUE)
out <- momentssd / sd(x, na.rm = TRUE)
return(out)
}
## A function to compare the means of data that is below and upper the global mean Calculates a statistic related to the
## mean of the time series data that is above the (global) time-series mean compared to the mean of the data that is below
## the global time-series mean. measure32 - compare the means of data that is below and upper the global mean
highlowmu <- function(x) {
mu <- mean(x, na.rm = TRUE)
mhi <- mean(x[x > mu], na.rm = TRUE)
mlo <- mean(x[x < mu], na.rm = TRUE)
out <- (mhi - mu) / (mu - mlo)
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.