knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.align = "center", fig.path = "man/figures/README-" )
samurais is an open source toolbox (available in R and in Matlab) including many original and flexible user-friendly statistical latent variable models and unsupervised algorithms to segment and represent, time-series data (univariate or multivariate), and more generally, longitudinal data which include regime changes.
Our samurais use mainly the following efficient "sword" packages to segment data: Regression with Hidden Logistic Process (RHLP), Hidden Markov Model Regression (HMMR), Piece-Wise regression (PWR), Multivariate 'RHLP' (MRHLP), and Multivariate 'HMMR' (MHMMR).
The models and algorithms are developed and written in Matlab by Faicel Chamroukhi, and translated and designed into R packages by Florian Lecocq, Marius Bartcus and Faicel Chamroukhi.
You can install the samurais package from GitHub with:
# install.packages("devtools") devtools::install_github("fchamroukhi/SaMUraiS")
To build vignettes for examples of usage, type the command below instead:
# install.packages("devtools") devtools::install_github("fchamroukhi/SaMUraiS", build_opts = c("--no-resave-data", "--no-manual"), build_vignettes = TRUE)
Use the following command to display vignettes:
browseVignettes("samurais")
library(samurais)
RHLP
# Application to a toy data set data("univtoydataset") x <- univtoydataset$x y <- univtoydataset$y K <- 5 # Number of regimes (mixture components) p <- 3 # Dimension of beta (order of the polynomial regressors) q <- 1 # Dimension of w (order of the logistic regression: to be set to 1 for segmentation) variance_type <- "heteroskedastic" # "heteroskedastic" or "homoskedastic" model n_tries <- 1 max_iter = 1500 threshold <- 1e-6 verbose <- TRUE verbose_IRLS <- FALSE rhlp <- emRHLP(X = x, Y = y, K, p, q, variance_type, n_tries, max_iter, threshold, verbose, verbose_IRLS) rhlp$summary() rhlp$plot()
# Application to a real data set data("univrealdataset") x <- univrealdataset$x y <- univrealdataset$y2 K <- 5 # Number of regimes (mixture components) p <- 3 # Dimension of beta (order of the polynomial regressors) q <- 1 # Dimension of w (order of the logistic regression: to be set to 1 for segmentation) variance_type <- "heteroskedastic" # "heteroskedastic" or "homoskedastic" model n_tries <- 1 max_iter = 1500 threshold <- 1e-6 verbose <- TRUE verbose_IRLS <- FALSE rhlp <- emRHLP(X = x, Y = y, K, p, q, variance_type, n_tries, max_iter, threshold, verbose, verbose_IRLS) rhlp$summary() rhlp$plot()
HMMR
# Application to a toy data set data("univtoydataset") x <- univtoydataset$x y <- univtoydataset$y K <- 5 # Number of regimes (states) p <- 3 # Dimension of beta (order of the polynomial regressors) variance_type <- "heteroskedastic" # "heteroskedastic" or "homoskedastic" model n_tries <- 1 max_iter <- 1500 threshold <- 1e-6 verbose <- TRUE hmmr <- emHMMR(X = x, Y = y, K, p, variance_type, n_tries, max_iter, threshold, verbose) hmmr$summary() hmmr$plot(what = c("smoothed", "regressors", "loglikelihood"))
# Application to a real data set data("univrealdataset") x <- univrealdataset$x y <- univrealdataset$y2 K <- 5 # Number of regimes (states) p <- 3 # Dimension of beta (order of the polynomial regressors) variance_type <- "heteroskedastic" # "heteroskedastic" or "homoskedastic" model n_tries <- 1 max_iter <- 1500 threshold <- 1e-6 verbose <- TRUE hmmr <- emHMMR(X = x, Y = y, K, p, variance_type, n_tries, max_iter, threshold, verbose) hmmr$summary() hmmr$plot(what = c("smoothed", "regressors", "loglikelihood"))
PWR
# Application to a toy data set data("univtoydataset") x <- univtoydataset$x y <- univtoydataset$y K <- 5 # Number of segments p <- 3 # Polynomial degree pwr <- fitPWRFisher(X = x, Y = y, K, p) pwr$summary() pwr$plot()
# Application to a real data set data("univrealdataset") x <- univrealdataset$x y <- univrealdataset$y2 K <- 5 # Number of segments p <- 3 # Polynomial degree pwr <- fitPWRFisher(X = x, Y = y, K, p) pwr$summary() pwr$plot()
MRHLP
# Application to a toy data set data("multivtoydataset") x <- multivtoydataset$x y <- multivtoydataset[,c("y1", "y2", "y3")] K <- 5 # Number of regimes (mixture components) p <- 1 # Dimension of beta (order of the polynomial regressors) q <- 1 # Dimension of w (order of the logistic regression: to be set to 1 for segmentation) variance_type <- "heteroskedastic" # "heteroskedastic" or "homoskedastic" model n_tries <- 1 max_iter <- 1500 threshold <- 1e-6 verbose <- TRUE verbose_IRLS <- FALSE mrhlp <- emMRHLP(X = x, Y = y, K, p, q, variance_type, n_tries, max_iter, threshold, verbose, verbose_IRLS) mrhlp$summary() mrhlp$plot()
# Application to a real data set (human activity recogntion data) data("multivrealdataset") x <- multivrealdataset$x y <- multivrealdataset[,c("y1", "y2", "y3")] K <- 5 # Number of regimes (mixture components) p <- 3 # Dimension of beta (order of the polynomial regressors) q <- 1 # Dimension of w (order of the logistic regression: to be set to 1 for segmentation) variance_type <- "heteroskedastic" # "heteroskedastic" or "homoskedastic" model n_tries <- 1 max_iter <- 1500 threshold <- 1e-6 verbose <- TRUE verbose_IRLS <- FALSE mrhlp <- emMRHLP(X = x, Y = y, K, p, q, variance_type, n_tries, max_iter, threshold, verbose, verbose_IRLS) mrhlp$summary() mrhlp$plot()
MHMMR
# Application to a simulated data set data("multivtoydataset") x <- multivtoydataset$x y <- multivtoydataset[,c("y1", "y2", "y3")] K <- 5 # Number of regimes (states) p <- 1 # Dimension of beta (order of the polynomial regressors) variance_type <- "heteroskedastic" # "heteroskedastic" or "homoskedastic" model n_tries <- 1 max_iter <- 1500 threshold <- 1e-6 verbose <- TRUE mhmmr <- emMHMMR(X = x, Y = y, K, p, variance_type, n_tries, max_iter, threshold, verbose) mhmmr$summary() mhmmr$plot(what = c("smoothed", "regressors", "loglikelihood"))
# Application to a real data set (human activity recognition data) data("multivrealdataset") x <- multivrealdataset$x y <- multivrealdataset[,c("y1", "y2", "y3")] K <- 5 # Number of regimes (states) p <- 3 # Dimension of beta (order of the polynomial regressors) variance_type <- "heteroskedastic" # "heteroskedastic" or "homoskedastic" model n_tries <- 1 max_iter <- 1500 threshold <- 1e-6 verbose <- TRUE mhmmr <- emMHMMR(X = x, Y = y, K, p, variance_type, n_tries, max_iter, threshold, verbose) mhmmr$summary() mhmmr$plot(what = c("smoothed", "regressors", "loglikelihood"))
samurais also implements model selection procedures to select an optimal model based on information criteria including BIC, AIC and ICL.
The selection can be done for the two following parameters:
Instructions below can be used to illustrate the model on provided simulated and real data sets.
RHLP
Let's select a RHLP model for the following time series:
data("univtoydataset") x = univtoydataset$x y = univtoydataset$y plot(x, y, type = "l", xlab = "x", ylab = "Y")
selectedrhlp <- selectRHLP(X = x, Y = y, Kmin = 2, Kmax = 6, pmin = 0, pmax = 3) selectedrhlp$plot(what = "estimatedsignal")
HMMR
Let's select a HMMR model for the following time series:
data("univtoydataset") x = univtoydataset$x y = univtoydataset$y plot(x, y, type = "l", xlab = "x", ylab = "Y")
selectedhmmr <- selectHMMR(X = x, Y = y, Kmin = 2, Kmax = 6, pmin = 0, pmax = 3) selectedhmmr$plot(what = "smoothed")
MRHLP
Let's select a MRHLP model for the following multivariate time series:
data("multivtoydataset") x <- multivtoydataset$x y <- multivtoydataset[, c("y1", "y2", "y3")] matplot(x, y, type = "l", xlab = "x", ylab = "Y", lty = 1)
selectedmrhlp <- selectMRHLP(X = x, Y = y, Kmin = 2, Kmax = 6, pmin = 0, pmax = 3) selectedmrhlp$plot(what = "estimatedsignal")
MHMMR
Let's select a MHMMR model for the following multivariate time series:
data("multivtoydataset") x <- multivtoydataset$x y <- multivtoydataset[, c("y1", "y2", "y3")] matplot(x, y, type = "l", xlab = "x", ylab = "Y", lty = 1)
selectedmhmmr <- selectMHMMR(X = x, Y = y, Kmin = 2, Kmax = 6, pmin = 0, pmax = 3) selectedmhmmr$plot(what = "smoothed")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.