R/simulate_nhmm.R
In seqHMM: Mixture Hidden Markov Models for Social Sequence Data and Other Multivariate, Multichannel Categorical Time Series

Documented in simulate_nhmm

#' Simulate Non-homogeneous Hidden Markov Models
#'
#' Simulate sequences of observed and hidden states given the parameters of a 
#' non-homogeneous hidden Markov model.
#' 
#' @param data A data frame containing the variables used in the model 
#' formulas. Note that this should also include also the response variable(s), 
#' which are used to define the number of observed symbols (using [levels()]) 
#' and the length of sequences. The actual values of the response variables 
#' does not matter though, as they are replaced by the simulated values. The 
#' exception is the first time point in FAN-HMM case: If the `emission_formula` 
#' contains lagged responses, the response variable values at the first time 
#' point are used to define the emissions at the second time point, and the 
#' simulations are done from the second time point onwards. This matches the 
#' case `prior_obs = "fixed"` in [estimate_nhmm()].
#' @param coefs Same as argument `inits` in [estimate_nhmm()]. If `NULL`,
#' (default), the model parameters are generated randomly. If you want to 
#' simulate new sequences based on an estimated model `fit`, you can use 
#' `coefs = fit$etas` and `init_sd = 0`.
#' @param init_sd Standard deviation of the normal distribution used to 
#' generate random coefficients. Default is `2` when `coefs` is `NULL` and `0` 
#' otherwise.
#' @inheritParams estimate_nhmm
#'
#' @return A list with the model used in simulation as well as the simulated 
#' hidden state sequences.
#' @export
simulate_nhmm <- function(
    n_states, emission_formula, initial_formula = ~1, transition_formula = ~1, 
    data, id, time, coefs = NULL, init_sd = 2 * is.null(coefs)) {
  
  force(init_sd)
  stopifnot_(
    !missing(data),
    "{.arg data} is missing, use {.fn simulate_hmm} instead."
  )
  stopifnot_(
    !missing(emission_formula),
    "Argument {.arg emission_formula} is missing."
  )
  if (inherits(emission_formula, "formula")) {
    responses <- get_responses(emission_formula)
    C <- length(responses)
    if (C > 1L) {
      rhs <- deparse1(emission_formula[[3L]])
      emission_formula <- lapply(
        responses, \(y) stats::as.formula(
          paste(y, " ~ ", rhs), 
          env = environment(emission_formula)
        )
      )
    } else {
      emission_formula <- list(emission_formula)
    }
  } else {
    responses <- vapply(emission_formula, get_responses, allow_mv = FALSE, "")
    C <- length(responses)
  }
  data <- .check_data(data, id, time, responses)
  for (y in responses) {
    l <- levels(data[[y]])
    data[, y := ifelse(is.na(y[1]), l[1], y[1]), by = id, env = list(y = y)]
  }
  if (is.null(coefs)) {
    coefs <- list(
      initial_probs = NULL, 
      transition_probs = NULL, 
      emission_probs = NULL)
  } else {
    if (is.null(coefs$initial_probs)) coefs$initial_probs <- NULL
    if (is.null(coefs$transition_probs)) coefs$transition_probs <- NULL
    if (is.null(coefs$emission_probs)) coefs$emission_probs <- NULL
  }
  model <- build_nhmm(
    n_states, emission_formula, initial_formula, transition_formula, 
    data, id, time, coefs = coefs, scale = FALSE
  )
  model$etas <- create_initial_values(coefs, model, init_sd)
  model$gammas$gamma_pi <- eta_to_gamma_mat(model$etas$eta_pi)
  model$gammas$gamma_A <- eta_to_gamma_cube(model$etas$eta_A)
  model$gammas$gamma_B <- drop(eta_to_gamma_cube_field(model$etas$eta_B))
  if (inherits(model, "fanhmm")) {
    W <- update_W_for_fanhmm(model)
    out <- Rcpp_simulate_fanhmm(
      create_obs(model), model$sequence_lengths, model$n_symbols, 
      model$X_pi, model$X_A, model$X_B, 
      io(model$X_pi), io(model$X_A), io(model$X_B),
      iv(model$X_A), iv(model$X_B), tv(model$X_A), tv(model$X_B),
      model$gammas$gamma_pi, model$gammas$gamma_A, model$gammas$gamma_B, 
      model$prior_obs, model$W_X_B, W$W_A, W$W_B
    )
    idx <- data[, .I[-1], by = id, env = list(id = id)]$V1
    for (i in seq_len(model$n_channels)) {
      y <- unlist(lapply(out$observations, \(y) y[i, ] + 1L))
      set(data, i = idx, j = model$responses[i], value = model$symbol_names[[i]][y])
    }
  } else {
    out <- Rcpp_simulate_nhmm(
      create_obs(model), model$sequence_lengths, model$n_symbols, 
      model$X_pi, model$X_A, model$X_B, 
      io(model$X_pi), io(model$X_A), io(model$X_B),
      iv(model$X_A), iv(model$X_B), tv(model$X_A), tv(model$X_B),
      model$gammas$gamma_pi, model$gammas$gamma_A, model$gammas$gamma_B
    )
    for (i in seq_len(model$n_channels)) {
      y <- unlist(lapply(out$observations, \(y) y[i, ] + 1L))
      set(data, j = model$responses[i], value = model$symbol_names[[i]][y])
    }
  }
  states <- cbind(
    model$data[, list(id, time), env = list(id = id, time = time)], 
    state = model$state_names[unlist(out$states) + 1L]
  )
  
  attr(model$X_pi, "X_mean") <- TRUE
  attr(model$X_A, "X_mean") <- TRUE
  attr(model$X_B, "X_mean") <- TRUE
  attr(model$X_pi, "R_inv") <- NULL
  attr(model$X_A, "R_inv") <- NULL
  attr(model$X_B, "R_inv") <- NULL
  model <- update(model, data)
  tQs <- t(create_Q(n_states))
  if (!attr(model$X_pi, "icpt_only")) {
    model$gammas$gamma_pi <- gamma_to_gamma_std(
      model$gammas$gamma_pi, solve(attr(model$X_pi, "R_inv")), 
      attr(model$X_pi, "coef_names"), attr(model$X_pi, "X_mean")
    )
    model$etas$eta_pi[] <- tQs %*% model$gammas$gamma_pi
  }
  if (!attr(model$X_A, "icpt_only")) {
    model$gammas$gamma_A <- gamma_to_gamma_std(
      model$gammas$gamma_, solve(attr(model$X_A, "R_inv")), 
      attr(model$X_A, "coef_names"), attr(model$X_A, "X_mean")
    )
    for (s in seq_len(n_states)) {
      model$etas$eta_A[, , s] <- tQs %*% model$gammas$gamma_A[, , s]
    }
  }
  for (i in seq_along(responses)) {
    if (!attr(model$X_B[[i]], "icpt_only")) {
      tQm <- t(create_Q(model$n_symbols[i]))
      model$gammas$gamma_B[[i]] <- gamma_to_gamma_std(
        model$gammas$gamma_B[[i]], solve(attr(model$X_B[[i]], "R_inv")), 
        attr(model$X_B[[i]], "coef_names"), attr(model$X_B[[i]], "X_mean")
      )
      for (s in seq_len(n_states)) {
        model$etas$eta_B[[i]][, , s] <- tQm %*% model$gammas$gamma_B[[i]][, , s]
      }
    }
  }
  list(model = model, states = states, data = data)
}

Any scripts or data that you put into this service are public.

seqHMM documentation built on June 8, 2025, 10:16 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

seqHMM
Mixture Hidden Markov Models for Social Sequence Data and Other Multivariate, Multichannel Categorical Time Series

R/simulate_nhmm.R
In seqHMM: Mixture Hidden Markov Models for Social Sequence Data and Other Multivariate, Multichannel Categorical Time Series

Defines functions simulate_nhmm

Documented in simulate_nhmm

Try the seqHMM package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

seqHMM Mixture Hidden Markov Models for Social Sequence Data and Other Multivariate, Multichannel Categorical Time Series

R/simulate_nhmm.R In seqHMM: Mixture Hidden Markov Models for Social Sequence Data and Other Multivariate, Multichannel Categorical Time Series

Defines functions simulate_nhmm

Documented in simulate_nhmm

Try the seqHMM package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

seqHMM
Mixture Hidden Markov Models for Social Sequence Data and Other Multivariate, Multichannel Categorical Time Series

R/simulate_nhmm.R
In seqHMM: Mixture Hidden Markov Models for Social Sequence Data and Other Multivariate, Multichannel Categorical Time Series