Comparison with depmixS4

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

Simple data set where depmix errors for nstates=2

x <- 1:4
if(requireNamespace("depmixS4")){
  model <- depmixS4::depmix(x ~ 1, nstates=2, ntimes=length(x))
  depmixS4::fit(model)
}

Data set where nstates=5 errors

data("buggy.5states", package="plotHMM")
plot(buggy.5states$logratio)
if(requireNamespace("depmixS4")){
  model.spec <- depmixS4::depmix(
    logratio ~ 1, data=buggy.5states, nstates=5)
  set.seed(1)
  model.fit <- depmixS4::fit(model.spec)
}

Fit several data sequences

if(
  require(data.table) && 
  requireNamespace("neuroblastoma") && 
  require(ggplot2) && 
  requireNamespace("depmixS4")
){
  data(neuroblastoma, package="neuroblastoma")
  nb.dt <- data.table(neuroblastoma$profiles)
  one.pro <- nb.dt[profile.id=="4" & chromosome%in%1:10]
  ntimes <- rle(as.integer(one.pro$chromosome))
  n.states <- 4
  model.spec <- depmixS4::depmix(
    logratio ~ 1, data=one.pro,
    nstates=n.states, ntimes=ntimes$lengths)
  set.seed(1)
  unconstrained.fit <- depmixS4::fit(model.spec)
  param.names <- c(mean="(Intercept)", sd="sd")
  par.vec <- depmixS4::getpars(unconstrained.fit)
  matrix(
    par.vec[names(par.vec) %in% param.names],
    ncol=length(param.names),
    byrow=TRUE,
    dimnames=list(state=1:n.states, parameter=names(param.names)))
  one.pro[, viterbi := factor(unconstrained.fit@posterior[,1]) ]
  ggplot()+
    geom_point(aes(
      position/1e6, logratio, color=viterbi),
      data=one.pro)+
    facet_grid(. ~ chromosome, scales="free", space="free")
}

How to constrain common variance parameter?

if(requireNamespace("depmixS4")){
  par.vec <- depmixS4::getpars(model.spec)
  equal.groups <- rep(1, length(par.vec))
  equal.groups[names(par.vec)=="sd"] <- 2
  if(FALSE){
    constrained.fit <- depmixS4::fit(model.spec, equal=equal.groups)
  }
}

Forward-backward algorithm speed comparison

if(requireNamespace("depmixS4")){
  one.chrom <- nb.dt[profile.id=="4" & chromosome=="2"]
  n.states <- 3
  model.spec <- depmixS4::depmix(
    logratio ~ 1,
    data=one.chrom,
    nstates=n.states)
  log.emission.mat <- log(model.spec@dens[,1,])
  log.transition.mat <- log(model.spec@trDens[1,,])
  log.init.vec <- log(model.spec@init[1,])
  microbenchmark::microbenchmark(depmixS4={
    result <- depmixS4::forwardbackward(model.spec)
  }, plotHMM={
    fwd.list <- plotHMM::forward_interface(
      log.emission.mat, log.transition.mat, log.init.vec)
    back.mat <- plotHMM::backward_interface(
      log.emission.mat, log.transition.mat)
    mult.mat <- plotHMM::multiply_interface(
      fwd.list$log_alpha, back.mat)
    pairwise.array <- plotHMM::pairwise_interface(
      log.emission.mat, log.transition.mat,
      fwd.list$log_alpha, back.mat)
  }, times=5)  
}

plotHMM is 2-3x slower than depmixS4. Possibly due to (1) overhead of several function calls rather than just one, and (2) log space computations are slower than scaling.

Viterbi algorithm speed comparison

if(requireNamespace("depmixS4")){
  microbenchmark::microbenchmark(depmixS4={
    depmixS4::viterbi(model.spec)
  }, plotHMM={
    plotHMM::viterbi_interface(
      log.emission.mat, log.transition.mat, log.init.vec)
  }, times=5)
}

plotHMM is about 100x faster than depmixS4, because of the overhead of loops in R (memory allocation in each iteration).



Try the plotHMM package in your browser

Any scripts or data that you put into this service are public.

plotHMM documentation built on Sept. 8, 2023, 5:47 p.m.