knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.align = "center",
  fig.path = "man/figures/README-"
)

SaMUraiS: StAtistical Models for the UnsupeRvised segmentAtIon of time-Series

Travis build status CRAN versions CRAN logs

samurais is an open source toolbox (available in R and in Matlab) including many original and flexible user-friendly statistical latent variable models and unsupervised algorithms to segment and represent, time-series data (univariate or multivariate), and more generally, longitudinal data which include regime changes.

Our samurais use mainly the following efficient "sword" packages to segment data: Regression with Hidden Logistic Process (RHLP), Hidden Markov Model Regression (HMMR), Piece-Wise regression (PWR), Multivariate 'RHLP' (MRHLP), and Multivariate 'HMMR' (MHMMR).

The models and algorithms are developed and written in Matlab by Faicel Chamroukhi, and translated and designed into R packages by Florian Lecocq, Marius Bartcus and Faicel Chamroukhi.

Installation

You can install the samurais package from GitHub with:

# install.packages("devtools")
devtools::install_github("fchamroukhi/SaMUraiS")

To build vignettes for examples of usage, type the command below instead:

# install.packages("devtools")
devtools::install_github("fchamroukhi/SaMUraiS", 
                         build_opts = c("--no-resave-data", "--no-manual"), 
                         build_vignettes = TRUE)

Use the following command to display vignettes:

browseVignettes("samurais")

Usage

library(samurais)

RHLP

# Application to a toy data set
data("univtoydataset")
x <- univtoydataset$x
y <- univtoydataset$y

K <- 5 # Number of regimes (mixture components)
p <- 3 # Dimension of beta (order of the polynomial regressors)
q <- 1 # Dimension of w (order of the logistic regression: to be set to 1 for segmentation)
variance_type <- "heteroskedastic" # "heteroskedastic" or "homoskedastic" model

n_tries <- 1
max_iter = 1500
threshold <- 1e-6
verbose <- TRUE
verbose_IRLS <- FALSE

rhlp <- emRHLP(X = x, Y = y, K, p, q, variance_type, n_tries, 
               max_iter, threshold, verbose, verbose_IRLS)

rhlp$summary()

rhlp$plot()
# Application to a real data set
data("univrealdataset")
x <- univrealdataset$x
y <- univrealdataset$y2

K <- 5 # Number of regimes (mixture components)
p <- 3 # Dimension of beta (order of the polynomial regressors)
q <- 1 # Dimension of w (order of the logistic regression: to be set to 1 for segmentation)
variance_type <- "heteroskedastic" # "heteroskedastic" or "homoskedastic" model

n_tries <- 1
max_iter = 1500
threshold <- 1e-6
verbose <- TRUE
verbose_IRLS <- FALSE

rhlp <- emRHLP(X = x, Y = y, K, p, q, variance_type, n_tries, 
               max_iter, threshold, verbose, verbose_IRLS)

rhlp$summary()

rhlp$plot()

HMMR

# Application to a toy data set
data("univtoydataset")
x <- univtoydataset$x
y <- univtoydataset$y

K <- 5 # Number of regimes (states)
p <- 3 # Dimension of beta (order of the polynomial regressors)
variance_type <- "heteroskedastic" # "heteroskedastic" or "homoskedastic" model

n_tries <- 1
max_iter <- 1500
threshold <- 1e-6
verbose <- TRUE

hmmr <- emHMMR(X = x, Y = y, K, p, variance_type, 
               n_tries, max_iter, threshold, verbose)

hmmr$summary()

hmmr$plot(what = c("smoothed", "regressors", "loglikelihood"))
# Application to a real data set
data("univrealdataset")
x <- univrealdataset$x
y <- univrealdataset$y2

K <- 5 # Number of regimes (states)
p <- 3 # Dimension of beta (order of the polynomial regressors)
variance_type <- "heteroskedastic" # "heteroskedastic" or "homoskedastic" model

n_tries <- 1
max_iter <- 1500
threshold <- 1e-6
verbose <- TRUE

hmmr <- emHMMR(X = x, Y = y, K, p, variance_type, 
               n_tries, max_iter, threshold, verbose)

hmmr$summary()

hmmr$plot(what = c("smoothed", "regressors", "loglikelihood"))

PWR

# Application to a toy data set
data("univtoydataset")
x <- univtoydataset$x
y <- univtoydataset$y

K <- 5 # Number of segments
p <- 3 # Polynomial degree

pwr <- fitPWRFisher(X = x, Y = y, K, p)

pwr$summary()

pwr$plot()
# Application to a real data set
data("univrealdataset")
x <- univrealdataset$x
y <- univrealdataset$y2

K <- 5 # Number of segments
p <- 3 # Polynomial degree

pwr <- fitPWRFisher(X = x, Y = y, K, p)

pwr$summary()

pwr$plot()

MRHLP

# Application to a toy data set
data("multivtoydataset")
x <- multivtoydataset$x
y <- multivtoydataset[,c("y1", "y2", "y3")]

K <- 5 # Number of regimes (mixture components)
p <- 1 # Dimension of beta (order of the polynomial regressors)
q <- 1 # Dimension of w (order of the logistic regression: to be set to 1 for segmentation)
variance_type <- "heteroskedastic" # "heteroskedastic" or "homoskedastic" model

n_tries <- 1
max_iter <- 1500
threshold <- 1e-6
verbose <- TRUE
verbose_IRLS <- FALSE

mrhlp <- emMRHLP(X = x, Y = y, K, p, q, variance_type, n_tries, 
                 max_iter, threshold, verbose, verbose_IRLS)

mrhlp$summary()

mrhlp$plot()
# Application to a real data set (human activity recogntion data)
data("multivrealdataset")
x <- multivrealdataset$x
y <- multivrealdataset[,c("y1", "y2", "y3")]

K <- 5 # Number of regimes (mixture components)
p <- 3 # Dimension of beta (order of the polynomial regressors)
q <- 1 # Dimension of w (order of the logistic regression: to be set to 1 for segmentation)
variance_type <- "heteroskedastic" # "heteroskedastic" or "homoskedastic" model

n_tries <- 1
max_iter <- 1500
threshold <- 1e-6
verbose <- TRUE
verbose_IRLS <- FALSE

mrhlp <- emMRHLP(X = x, Y = y, K, p, q, variance_type, n_tries, 
                 max_iter, threshold, verbose, verbose_IRLS)

mrhlp$summary()

mrhlp$plot()

MHMMR

# Application to a simulated data set
data("multivtoydataset")
x <- multivtoydataset$x
y <- multivtoydataset[,c("y1", "y2", "y3")]

K <- 5 # Number of regimes (states)
p <- 1 # Dimension of beta (order of the polynomial regressors)
variance_type <- "heteroskedastic" # "heteroskedastic" or "homoskedastic" model

n_tries <- 1
max_iter <- 1500
threshold <- 1e-6
verbose <- TRUE

mhmmr <- emMHMMR(X = x, Y = y, K, p, variance_type, n_tries, 
                 max_iter, threshold, verbose)

mhmmr$summary()

mhmmr$plot(what = c("smoothed", "regressors", "loglikelihood"))
# Application to a real data set (human activity recognition data)
data("multivrealdataset")
x <- multivrealdataset$x
y <- multivrealdataset[,c("y1", "y2", "y3")]

K <- 5 # Number of regimes (states)
p <- 3 # Dimension of beta (order of the polynomial regressors)
variance_type <- "heteroskedastic" # "heteroskedastic" or "homoskedastic" model

n_tries <- 1
max_iter <- 1500
threshold <- 1e-6
verbose <- TRUE

mhmmr <- emMHMMR(X = x, Y = y, K, p, variance_type, n_tries, 
                 max_iter, threshold, verbose)

mhmmr$summary()

mhmmr$plot(what = c("smoothed", "regressors", "loglikelihood"))

Model selection

samurais also implements model selection procedures to select an optimal model based on information criteria including BIC, AIC and ICL.

The selection can be done for the two following parameters:

Instructions below can be used to illustrate the model on provided simulated and real data sets.

RHLP

Let's select a RHLP model for the following time series:

data("univtoydataset")
x = univtoydataset$x
y = univtoydataset$y

plot(x, y, type = "l", xlab = "x", ylab = "Y")
selectedrhlp <- selectRHLP(X = x, Y = y, Kmin = 2, Kmax = 6, pmin = 0, pmax = 3)

selectedrhlp$plot(what = "estimatedsignal")

HMMR

Let's select a HMMR model for the following time series:

data("univtoydataset")
x = univtoydataset$x
y = univtoydataset$y

plot(x, y, type = "l", xlab = "x", ylab = "Y")
selectedhmmr <- selectHMMR(X = x, Y = y, Kmin = 2, Kmax = 6, pmin = 0, pmax = 3)

selectedhmmr$plot(what = "smoothed")

MRHLP

Let's select a MRHLP model for the following multivariate time series:


data("multivtoydataset")
x <- multivtoydataset$x
y <- multivtoydataset[, c("y1", "y2", "y3")]
matplot(x, y, type = "l", xlab = "x", ylab = "Y", lty = 1)
selectedmrhlp <- selectMRHLP(X = x, Y = y, Kmin = 2, Kmax = 6, pmin = 0, pmax = 3)

selectedmrhlp$plot(what = "estimatedsignal")

MHMMR

Let's select a MHMMR model for the following multivariate time series:

data("multivtoydataset")
x <- multivtoydataset$x
y <- multivtoydataset[, c("y1", "y2", "y3")]
matplot(x, y, type = "l", xlab = "x", ylab = "Y", lty = 1)
selectedmhmmr <- selectMHMMR(X = x, Y = y, Kmin = 2, Kmax = 6, pmin = 0, pmax = 3)

selectedmhmmr$plot(what = "smoothed")

References



fchamroukhi/SaMUraiS documentation built on Jan. 23, 2020, 9:21 a.m.