seqHMM.R
In seqHMM: Mixture Hidden Markov Models for Social Sequence Data and Other Multivariate, Multichannel Categorical Time Series

## ----setup, include=FALSE, cache=FALSE------------------------------------
library(knitr)
opts_chunk$set(concordance = TRUE, tidy = FALSE, dev = 'png')
options(prompt = "R> ", continue = "+  ", width = 76, useFancyQuotes = FALSE)

## ----settingdata, message=FALSE, cache=FALSE, echo = FALSE, eval = TRUE----
library("seqHMM")

data("biofam", package = "TraMineR")
biofam_seq <- seqdef(
  biofam[, 10:25], start = 15, labels = c(
    "parent", "left", "married", "left+marr", "child", "left+child", 
    "left+marr+ch","divorced")
)

data("biofam3c")
marr_seq <- seqdef(
  biofam3c$married, start = 15, 
  alphabet = c( "single", "married", "divorced"), 
  cpal = c("violetred2", "darkgoldenrod2", "darkmagenta")
)
child_seq <- seqdef(
  biofam3c$children, start = 15,
  alphabet = c("childless", "children"),
  cpal =  c("darkseagreen1", "coral3")
)
left_seq <- seqdef(
  biofam3c$left, start = 15, alphabet = c("with parents", "left home"),
  cpal = c("lightblue", "red3")
)

## ----graphicalillustrations2, dev.args=list(pointsize=10), fig.keep='last', cache=FALSE, message=FALSE, echo=FALSE, fig.cap='Stacked sequence plot of the first ten individuals in the \\code{biofam} data plotted with the \\code{stacked\\_sequence\\_plot} function. The top plot shows the original sequences, and the three bottom plots show the sequences in the separate channels for the same individuals. The sequences are in the same order in each plot, i.e., the same row always matches the same individual.', fig.align='center'----
p <- stacked_sequence_plot(
  list(
    Original = biofam_seq[1:10, ], 
    Marriage = marr_seq[1:10, ], 
    Parenthood = child_seq[1:10, ],
    Residence = left_seq[1:10, ]
  ),
  sort_by = "start", sort_channel = "Original", type = "index")  + 
  patchwork::plot_annotation(title = "Ten first sequences") &
  ggplot2::xlab("Age")
p

## ----plottingsequences, fig.cap="Stacked sequence plot of annual state distributions in the three-channel \\code{biofam} data. This is the default output of the \\code{stacked\\_sequence\\_plot} function. The labels for the channels are taken from the named list of state sequence objects, and the labels for the x axis ticks are taken from the column names of the first object.", fig.keep='last', fig.align='center', cache=FALSE, echo = FALSE, eval = TRUE----
p <- stacked_sequence_plot(
  list(
    "Marriage" = marr_seq, "Parenthood" = child_seq, "Residence" = left_seq
  )
)
p

## ----gridplot1, fig.width=5.5, fig.height=8.5, dev.args=list(pointsize=10), echo=FALSE, fig.cap="Showing state distribution plots for women and men in the \\code{biofam} data.", fig.align='center', fig.keep='last', cache = FALSE, eval = TRUE, echo = FALSE----
p <- stacked_sequence_plot(
  list(
    "Marriage" = marr_seq, "Parenthood" = child_seq, "Residence" = left_seq
  ), group = biofam3c$covariates$sex, type = "index", sort_by = "mds")
p

## ----code_mcHMM, cache=FALSE, echo = FALSE, message=FALSE, warning=TRUE, eval = TRUE----
mc_init <- c(0.9, 0.05, 0.02, 0.02, 0.01)

mc_trans <- matrix(c(
  0.80, 0.10, 0.05, 0.03, 0.02, 0, 0.90, 0.05,
  0.03, 0.02, 0, 0, 0.90, 0.07, 0.03, 0, 0, 0, 0.90, 0.10, 0, 0, 0,
  0, 1
), nrow = 5, ncol = 5, byrow = TRUE)

mc_emiss_marr <- matrix(
  c(
    0.90, 0.05, 0.05, 0.90, 0.05, 0.05, 0.05,
    0.90, 0.05, 0.05, 0.90, 0.05, 0.30, 0.30, 0.40
  ),
  nrow = 5, ncol = 3,
  byrow = TRUE
)

mc_emiss_child <- matrix(c(
  0.9, 0.1, 0.9, 0.1, 0.1, 0.9, 0.1, 0.9,
  0.5, 0.5
), nrow = 5, ncol = 2, byrow = TRUE)

mc_emiss_left <- matrix(c(
  0.9, 0.1, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9,
  0.5, 0.5
), nrow = 5, ncol = 2, byrow = TRUE)

mc_obs <- list(marr_seq, child_seq, left_seq)

mc_emiss <- list(mc_emiss_marr, mc_emiss_child, mc_emiss_left)

mc_initmod <- build_hmm(
  observations = mc_obs, initial_probs = mc_init,
  transition_probs = mc_trans, emission_probs = mc_emiss,
  channel_names = c("Marriage", "Parenthood", "Residence")
)

# For CRAN vignette: load the estimated model object for speed-up
data("hmm_biofam")
# mc_fit <- fit_model(mc_initmod, em_step = FALSE, local_step = TRUE,
#   threads = 4)

## ----plottingHMM, out.width='\\linewidth', fig.height=3.5, dev.args=list(pointsize=10), echo=FALSE, fig.cap="Illustrating a hidden Markov model as a directed graph. Pies represent five hidden states, with slices showing emission probabilities of combinations of observed states. States with emission probability less than 0.05 are combined into one slice. Edges show the transtion probabilities. Initial probabilities of hidden states are given below the pies.", fig.align='center', fig.keep='last', cache = FALSE----
plot(
  hmm_biofam,
  vertex.size = 50, vertex.label.dist = 1.5,
  edge.curved = c(0, 0.6, -0.8, 0.6, 0, 0.6, 0), legend.prop = 0.3,
  combined.slice.label = "States with prob. < 0.05"
)

## ----graphicalillustrations5, out.width='\\linewidth', fig.height=3.5, dev.args=list(pointsize=10), echo=FALSE, fig.cap="Another version of the hidden Markov model of Figure 4 with a different layout and modified labels, legends, and colors. All observed states are shown.", fig.align='center', fig.keep='last', cache = FALSE----
vertex_layout <- matrix(c(1, 2, 2, 3, 1, 0, 0.5, -0.5, 0, -1), ncol = 2)

plot(
  hmm_biofam,
  layout = vertex_layout, xlim = c(0.5, 3.5),
  ylim = c(-1.5, 1), rescale = FALSE, vertex.size = 50,
  vertex.label.pos = c("left", "top", "bottom", "right", "left"),
  edge.curved = FALSE, edge.width = 1, edge.arrow.size = 1,
  with.legend = "left", legend.prop = 0.4, label.signif = 1,
  combine.slices = 0, cpal = colorpalette[[30]][c(14:5)]
)

## ----ssplotHMM, fig.width=5.5, fig.height=8.5, dev.args=list(pointsize=10), fig.cap="Using the \\code{stacked\\_sequence\\_plot} function for an \\code{hmm} object makes it easy to plot the observed sequences together with the most probable paths of hidden states given the model.", fig.align='center', fig.keep='last', cache = FALSE, echo = FALSE----
suppressMessages(
  p <- stacked_sequence_plot(
    hmm_biofam,
    plots = "both", type = "index", sort_by = "mds", 
    sort_channel = "Hidden states"
  ) +
    patchwork::plot_annotation("Observed and hidden state sequences") &
    ggplot2::scale_x_discrete("Age", labels = 15:30)
)
p

## ----code_settingdata, ref.label = 'settingdata', message=FALSE, warning = TRUE, echo = TRUE----
library("seqHMM")

data("biofam", package = "TraMineR")
biofam_seq <- seqdef(
  biofam[, 10:25], start = 15, labels = c(
    "parent", "left", "married", "left+marr", "child", "left+child", 
    "left+marr+ch","divorced")
)

data("biofam3c")
marr_seq <- seqdef(
  biofam3c$married, start = 15, 
  alphabet = c( "single", "married", "divorced"), 
  cpal = c("violetred2", "darkgoldenrod2", "darkmagenta")
)
child_seq <- seqdef(
  biofam3c$children, start = 15,
  alphabet = c("childless", "children"),
  cpal =  c("darkseagreen1", "coral3")
)
left_seq <- seqdef(
  biofam3c$left, start = 15, alphabet = c("with parents", "left home"),
  cpal = c("lightblue", "red3")
)

## ----code_plottingsequences, ref.label = 'plottingsequences', echo = TRUE, eval = FALSE----
# p <- stacked_sequence_plot(
#   list(
#     "Marriage" = marr_seq, "Parenthood" = child_seq, "Residence" = left_seq
#   )
# )
# p

## ----code_sc_buildHMM_random, cache=FALSE---------------------------------
sc_initmod_random <- build_hmm(observations = biofam_seq, n_states = 5)

## ----code_sc_initialvalues, cache=FALSE-----------------------------------
sc_init <- c(0.9, 0.06, 0.02, 0.01, 0.01)

sc_trans <- matrix(c(
  0.80, 0.10, 0.05, 0.03, 0.02, 0.02, 0.80, 0.10,
  0.05, 0.03, 0.02, 0.03, 0.80, 0.10, 0.05, 0.02, 0.03, 0.05, 0.80, 0.10,
  0.02, 0.03, 0.05, 0.05, 0.85
), nrow = 5, ncol = 5, byrow = TRUE)

sc_emiss <- matrix(NA, nrow = 5, ncol = 8)
sc_emiss[1, ] <- seqstatf(biofam_seq[, 1:4])[, 2] + 0.1
sc_emiss[2, ] <- seqstatf(biofam_seq[, 5:7])[, 2] + 0.1
sc_emiss[3, ] <- seqstatf(biofam_seq[, 8:10])[, 2] + 0.1
sc_emiss[4, ] <- seqstatf(biofam_seq[, 11:13])[, 2] + 0.1
sc_emiss[5, ] <- seqstatf(biofam_seq[, 14:16])[, 2] + 0.1
sc_emiss <- sc_emiss / rowSums(sc_emiss)

rownames(sc_trans) <- colnames(sc_trans) <- rownames(sc_emiss) <-
  paste("State", 1:5)

colnames(sc_emiss) <- attr(biofam_seq, "labels")

sc_trans
round(sc_emiss, 3)

## ----code_sc_buildHMM, cache=FALSE----------------------------------------
sc_initmod <- build_hmm(
  observations = biofam_seq, initial_probs = sc_init,
  transition_probs = sc_trans, emission_probs = sc_emiss
)

## ----code_sc_fitHMM, cache=FALSE, eval=FALSE------------------------------
# sc_fit <- fit_model(sc_initmod)

## ----load_sc_fit, cache=FALSE, echo=FALSE---------------------------------
sc_fit <- readRDS("sc_fit.rds")

## ----code_sc_results1, cache=FALSE----------------------------------------
sc_fit$logLik

## ----code_sc_results2, cache=FALSE----------------------------------------
sc_fit$model

## ----code_mcHMM2, echo = TRUE, message=TRUE, warnings=TRUE, eval = TRUE, cache = FALSE----
mc_init <- c(0.9, 0.05, 0.02, 0.02, 0.01)

mc_trans <- matrix(
  c(
    0.80, 0.10, 0.05, 0.03, 0.02, 0, 0.90, 0.05, 0.03,
    0.02, 0, 0, 0.90, 0.07, 0.03, 0, 0, 0, 0.90, 0.10, 0, 0, 0, 0, 1
  ),
  nrow = 5, ncol = 5, byrow = TRUE
)

mc_emiss_marr <- matrix(
  c(
    0.90, 0.05, 0.05, 0.90, 0.05, 0.05, 0.05, 0.90,
    0.05, 0.05, 0.90, 0.05, 0.30, 0.30, 0.40
  ),
  nrow = 5, ncol = 3,
  byrow = TRUE
)

mc_emiss_child <- matrix(c(
  0.9, 0.1, 0.9, 0.1, 0.1, 0.9, 0.1, 0.9, 0.5,
  0.5
), nrow = 5, ncol = 2, byrow = TRUE)

mc_emiss_left <- matrix(c(
  0.9, 0.1, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.5,
  0.5
), nrow = 5, ncol = 2, byrow = TRUE)

mc_obs <- list(marr_seq, child_seq, left_seq)

mc_emiss <- list(mc_emiss_marr, mc_emiss_child, mc_emiss_left)

mc_initmod <- build_hmm(
  observations = mc_obs, initial_probs = mc_init,
  transition_probs = mc_trans, emission_probs = mc_emiss,
  channel_names = c("Marriage", "Parenthood", "Residence")
)

# For CRAN vignette: load the estimated model object for speed-up
data("hmm_biofam")
# mc_fit <- fit_model(mc_initmod, em_step = FALSE, local_step = TRUE,
# threads = 4)

## ----code_mcHMM_BIC, cache=FALSE, echo = TRUE, message=FALSE, eval = TRUE----
# Vignette: already loaded hmm_biofam
# hmm_biofam <- mc_fit$model
BIC(hmm_biofam)

## ----code_MHMM, cache=FALSE, echo = TRUE, eval = FALSE, warning=TRUE------
# mc_init2 <- c(0.9, 0.05, 0.03, 0.02)
# 
# mc_trans2 <- matrix(c(
#   0.85, 0.05, 0.05, 0.05, 0, 0.90, 0.05, 0.05, 0, 0,
#   0.95, 0.05, 0, 0, 0, 1
# ), nrow = 4, ncol = 4, byrow = TRUE)
# 
# mc_emiss_marr2 <- matrix(c(
#   0.90, 0.05, 0.05, 0.90, 0.05, 0.05, 0.05,
#   0.85, 0.10, 0.05, 0.80, 0.15
# ), nrow = 4, ncol = 3, byrow = TRUE)
# 
# mc_emiss_child2 <- matrix(c(0.9, 0.1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5),
#   nrow = 4, ncol = 2, byrow = TRUE
# )
# 
# mc_emiss_left2 <- matrix(c(0.9, 0.1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5),
#   nrow = 4, ncol = 2, byrow = TRUE
# )
# 
# mhmm_init <- list(mc_init, mc_init2)
# 
# mhmm_trans <- list(mc_trans, mc_trans2)
# 
# mhmm_emiss <- list(
#   list(mc_emiss_marr, mc_emiss_child, mc_emiss_left),
#   list(mc_emiss_marr2, mc_emiss_child2, mc_emiss_left2)
# )
# 
# biofam3c$covariates$cohort <- cut(
#   biofam3c$covariates$birthyr,
#   c(1908, 1935, 1945, 1957)
# )
# biofam3c$covariates$cohort <- factor(biofam3c$covariates$cohort,
#   labels = c("1909-1935", "1936-1945", "1946-1957")
# )
# 
# init_mhmm <- build_mhmm(
#   observations = mc_obs, initial_probs = mhmm_init,
#   transition_probs = mhmm_trans, emission_probs = mhmm_emiss,
#   formula = ~ sex + cohort, data = biofam3c$covariates,
#   channel_names = c("Marriage", "Parenthood", "Residence"),
#   cluster_names = c("Cluster 1", "Cluster 2")
# )
# 
# # vignette: less restarts and no parallelization
# set.seed(1011)
# mhmm_fit <- fit_model(init_mhmm,
#   local_step = TRUE, threads = 2,
#   control_em = list(restart = list(times = 10))
# )
# mhmm <- mhmm_fit$model

## ----load_mhmm, cache=FALSE, echo=FALSE-----------------------------------
mhmm <- readRDS("mhmm.rds")

## ----code_summaryMHMM, cache=FALSE----------------------------------------
summary(mhmm, conditional_se = FALSE)

## ----code_plottingHMMbasic, out.width='\\linewidth', fig.height=4, dev.args=list(pointsize=10), echo=TRUE, fig.align='center', fig.keep='last', cache = FALSE, eval = TRUE, fig.cap="A default plot of a hidden Markov model."----
plot(hmm_biofam)

## ----code_plottingHMM, ref.label='plottingHMM', echo=TRUE, eval = FALSE----
# plot(
#   hmm_biofam,
#   vertex.size = 50, vertex.label.dist = 1.5,
#   edge.curved = c(0, 0.6, -0.8, 0.6, 0, 0.6, 0), legend.prop = 0.3,
#   combined.slice.label = "States with prob. < 0.05"
# )

## ----code_graphicalillustrations5, ref.label = 'graphicalillustrations5', echo=TRUE, eval = FALSE----
# vertex_layout <- matrix(c(1, 2, 2, 3, 1, 0, 0.5, -0.5, 0, -1), ncol = 2)
# 
# plot(
#   hmm_biofam,
#   layout = vertex_layout, xlim = c(0.5, 3.5),
#   ylim = c(-1.5, 1), rescale = FALSE, vertex.size = 50,
#   vertex.label.pos = c("left", "top", "bottom", "right", "left"),
#   edge.curved = FALSE, edge.width = 1, edge.arrow.size = 1,
#   with.legend = "left", legend.prop = 0.4, label.signif = 1,
#   combine.slices = 0, cpal = colorpalette[[30]][c(14:5)]
# )

## ----code_ssplotHMM, ref.label = 'ssplotHMM', eval = FALSE, echo = TRUE----
# suppressMessages(
#   p <- stacked_sequence_plot(
#     hmm_biofam,
#     plots = "both", type = "index", sort_by = "mds",
#     sort_channel = "Hidden states"
#   ) +
#     patchwork::plot_annotation("Observed and hidden state sequences") &
#     ggplot2::scale_x_discrete("Age", labels = 15:30)
# )
# p

## ----code_plottingMHMMbasic, fig.width=6.5, fig.height=8, dev.args=list(pointsize=10), echo=TRUE, fig.align='center', fig.keep='last', cache = FALSE, eval = TRUE, fig.cap="Plotting submodels of an MHMM with the \\code{plot} method."----
plot(mhmm,
  interactive = FALSE, nrow = 2, legend.prop = 0.45,
  vertex.size = 50, vertex.label.cex = 1.3, cex.legend = 1.3,
  edge.curved = 0.65, edge.label.cex = 1.3, edge.arrow.size = 0.8
)

## ----code_ssplotMHMMold, eval = FALSE, echo = TRUE------------------------
# mssplot(mhmm, ask = TRUE)

## ----code_ssplotMHMM, eval = FALSE, echo = TRUE---------------------------
# stacked_sequence_plot(mhmm)
Any scripts or data that you put into this service are public.
seqHMM documentation built on June 8, 2025, 10:16 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
seqHMM
Mixture Hidden Markov Models for Social Sequence Data and Other Multivariate, Multichannel Categorical Time Series

inst/doc/seqHMM.R
In seqHMM: Mixture Hidden Markov Models for Social Sequence Data and Other Multivariate, Multichannel Categorical Time Series

Try the seqHMM package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

seqHMM Mixture Hidden Markov Models for Social Sequence Data and Other Multivariate, Multichannel Categorical Time Series

inst/doc/seqHMM.R In seqHMM: Mixture Hidden Markov Models for Social Sequence Data and Other Multivariate, Multichannel Categorical Time Series

Try the seqHMM package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

seqHMM
Mixture Hidden Markov Models for Social Sequence Data and Other Multivariate, Multichannel Categorical Time Series

inst/doc/seqHMM.R
In seqHMM: Mixture Hidden Markov Models for Social Sequence Data and Other Multivariate, Multichannel Categorical Time Series