Introduction to the construction of Markov models
In rdecision: Decision Analytic Modelling in Health Economics

#| include = FALSE,
#| purl = FALSE
knitr::opts_chunk$set(
  collapse = TRUE,
  echo = FALSE,
  comment = "#>"
)

#| purl = FALSE
#nolint start

library("rdecision")

#| purl = FALSE
#nolint end

Introduction

Sonnenberg and Beck's 1993 practical guide to using Markov models in decision making [@sonnenberg1993] is widely cited, and describes the principles of using Markov models for cohort simulations. As an example, it introduces an idealised three health state model applied to patients who have received a prosthetic heart valve (PHV). This vignette explains how to use rdecision to implement the example case, and replicates the published results.

Model structure

In the example, there are three states: "Well", "Disabled" and "Dead", which are represented by variables of type MarkovState. As a minimum, only the name property of each state must be set; the utility and annual occupancy cost can be set when a state is created or set later. Because we will be setting these properties later, we create the states as named variables. A Markov state object is represented in rdecision as a node in a graph.

#| echo = TRUE
s_well <- MarkovState$new(name = "Well")
s_disabled <- MarkovState$new(name = "Disabled")
s_dead <- MarkovState$new(name = "Dead")

Each allowed transition between states is represented as an object of type Transition. In rdecision, transitions are the directed edges of a graph. Transitions are defined by the source and target states that they join, and can have the optional properties of a cost associated with making the transition, and a label. In this example, we do not need to set any of the optional properties, and can create the transitions as a list of unnamed variables. Unless states are temporary states (occupied for one cycle only), we must also define transitions from each state to itself, to represent people who remain in a state between cycles.

#| echo = TRUE
E <- list(
  Transition$new(s_well, s_well),
  Transition$new(s_dead, s_dead),
  Transition$new(s_disabled, s_disabled),
  Transition$new(s_well, s_disabled),
  Transition$new(s_well, s_dead),
  Transition$new(s_disabled, s_dead)
)

The model itself is created as a variable of type SemiMarkovModel, which represents a directed graph with nodes (states) and edges (transitions). Properties of the model, including the cycle time and discount rates can be set when the model is created. For this example, we leave these as their default values (one year cycle length, no discounting applied to costs or utilities).

#| echo = TRUE
m <- SemiMarkovModel$new(V = list(s_well, s_disabled, s_dead), E)

#| purl = FALSE
# test that state tabulation is as expected
local({
  # check the state tabulation
  st <- m$tabulate_states()
  stopifnot(
    setequal(names(st), c("Name", "Cost", "Utility")),
    all.equal(nrow(st), 3L)
  )
})

The model can be saved as a graph object and rendered as a diagram. Method as_gml() creates a representation of the graph in GML format, which can be opened and plotted using the igraph package, optionally with additional manipulation of the graph's appearance to achieve the desired effect, as below.

local({
  # create an igraph object (requires square plot region)
  gml <- m$as_gml()
  gmlfile <- tempfile(fileext = ".gml")
  writeLines(gml, con = gmlfile)
  ig <- igraph::read_graph(gmlfile, format = "gml")
  # match layout to Sonnenberg and Beck, fig 3
  vxy <- matrix(
    data = c(
      -0.75, +0.75, +0.00,
      +0.75, +0.75, -0.75
    ),
    ncol = 2L,
    dimnames = list(c("Well", "Disabled", "Dead"), c("x", "y"))
  )
  layout <- matrix(
    data = c(
      vapply(X = igraph::V(ig), FUN.VALUE = 1.0, FUN = function(v) {
        lbl <- igraph::vertex_attr(ig, "label", v)
        return(vxy[[lbl, "x"]])
      }),
      vapply(X = igraph::V(ig), FUN.VALUE = 1.0, FUN = function(v) {
        lbl <- igraph::vertex_attr(ig, "label", v)
        return(vxy[[lbl, "y"]])
      })
    ),
    byrow = FALSE,
    ncol = 2L
  )
  # loop angles
  loopa <- vapply(X = igraph::E(ig), FUN.VALUE = 1.0, FUN = function(e) {
    # find source and target labels
    trg <- igraph::head_of(ig, e)
    trgl <- igraph::vertex_attr(ig, name = "label", index = trg)
    src <- igraph::tail_of(ig, e)
    srcl <- igraph::vertex_attr(ig, name = "label", index = src)
    la <- 0.0
    if (trgl == srcl) {
      if (trgl == "Well") {
        la <- pi
      } else if (trgl == "Dead") {
        la <- pi / 2.0
      }
    }
    return(la)
  })
  # plot into png file
  withr::with_par(
    new = list(
      oma = c(0L, 0L, 0L, 0L),
      mar = c(3L, 3L, 3L, 3L),
      xpd = NA
    ),
    code = {
      plot(
        ig,
        rescale = FALSE, asp = 0L,
        vertex.shape = "circle", vertex.size = 60.0,
        vertex.color = "white", vertex.label.color = "black",
        edge.color = "black",
        edge.arrow.size = 0.75,
        frame = FALSE,
        layout = layout,
        loop.size = 0.8,
        edge.loop.angle = loopa
      )
    }
  )
})

Model variables

In the prosthetic heart valve example, there are only 4 model variables: three probabilities of transition during one cycle, and one utility (disabled state).

The default utility of each state is 1, so we have to set the utilities of the disabled and dead states, assuming those in the well state have full utility, as follows:

#| echo = TRUE
s_disabled$set_utility(0.7)
s_dead$set_utility(0.0)

The probabilities of making a transition between states in a semi-Markov model must be defined as a matrix. Specifically, these are the probabilities of starting a cycle in one state and finishing it in another. rdecision requires the matrix to have specific properties:

Its cells must be numerical values between 0 and 1.
There should be as many rows and columns as states.
The rows and columns must have names corresponding to the state names.
The rows represent the source state and the columns represent the target state.
The the sum of probabilities of each row must be 1. We can ensure that the final condition is met by setting at most one value in each row to be NA; rdecision will replace these by a value to ensure the sum of probabilities is correct; normally this is assigned to self transitions.

In this example there are three values for transition probabilities (well to disabled, well to dead, disabled to dead), an assumption that there is no transition from disabled to well, and an assumption that dead is an absorbing state. The matrix is created and set as follows:

#| echo = TRUE
snames <- c("Well", "Disabled", "Dead")
pt <- matrix(
  data = c(NA, 0.2, 0.2, 0.0, NA, 0.4, 0.0, 0.0, NA),
  nrow = 3L, byrow = TRUE,
  dimnames = list(source = snames, target = snames)
)
m$set_probabilities(pt)

with(data = as.data.frame(pt), expr = {
  data.frame(
    Well = round(Well, digits = 3L),
    Disabled = round(Disabled, digits = 3L),
    Dead = round(Dead, digits = 3L),
    row.names = row.names(pt),
    stringsAsFactors = FALSE
  )
})

The transition probability matrix can be extracted from the model using the function transition_probabilities(). The values set as NA are replaced as required, and the order of rows and columns may differ from the one provided. For this example it is as follows:

local({
  ptc <- m$transition_probabilities()
  with(data = as.data.frame(ptc), expr = {
    data.frame(
      Well = round(Well, digits = 3L),
      Disabled = round(Disabled, digits = 3L),
      Dead = round(Dead, digits = 3L),
      row.names = row.names(ptc),
      stringsAsFactors = FALSE
    )
  })
})

#| purl = FALSE
# test that transition probabilities are as expected
local({
  ept <- matrix(
    data = c(0.6, 0.2, 0.2, 0.0, 0.6, 0.4, 0.0, 0.0, 1.0),
    nrow = 3L, byrow = TRUE,
    dimnames = list(source = snames, target = snames)
  )
  opt <- m$transition_probabilities()
  opt <- opt[snames, snames]
  stopifnot(
    all.equal(opt, ept)
  )
})

Running the model

In a cohort Markov model, it is necessary to define the starting populations in each state. The total population size is arbitrary; it is the relative proportions starting in each state that matters. In Sonnenberg and Beck's PHV example, they assume there are 10,000 people who start in the "Well" state. In rdecision this is achieved by resetting the model; the elapsed time and cycle number can be reset with the same call, here we leave them as their default values.

In this case, the state populations are given as integers, but in a cohort simulation, most practical transition probability matrices lead to state occupancies involving fractions of patients as the simulation proceeds. Thus the starting populations can also be given as real numbers.

#| echo = TRUE
m$reset(populations = c(Well = 10000L, Disabled = 0L, Dead = 0L))

To run the model, we call the cycles() method. Following the example, there are 25 yearly cycles, and there is no half cycle correction. This correction can be applied independently to the output to the Markov trace after each cycle for the state population, cycle cost and incremental QALYs.

#| echo = TRUE
mt <- m$cycles(25L, hcc.pop = FALSE, hcc.cost = FALSE, hcc.QALY = FALSE)

#| purl = FALSE
# test that cycle results match S&B table 2
stopifnot(
  # check structure of data frame
  all.equal(m$get_elapsed(), as.difftime(25.0 * 365.25, units = "days")),
  isa(mt, "data.frame"),
  all.equal(nrow(mt), 26L),
  # check cycle numbers and times
  all.equal(mt[, "Cycle"], seq(from = 0L, to = 25L)),
  all.equal(mt[, "Years"], as.numeric(seq(from = 0L, to = 25L))),
  # check costs
  all.equal(mt[, "Cost"], rep(0.0, times = 26L)),
  # spot check one row
  all.equal(round(mt[which(mt[, "Cycle"] == 2L), "Well"]), 3600.0),
  all.equal(round(mt[which(mt[, "Cycle"] == 2L), "Disabled"]), 2400.0),
  all.equal(round(mt[which(mt[, "Cycle"] == 2L), "Dead"]), 4000.0)
)

The cycles() method returns a Markov trace, a data frame which contains one row per cycle with details of state populations, cumulative costs and QALYs. The trace for this example is as follows:

t2 <- with(data = mt, expr = {
  data.frame(
    Cycle = Cycle,
    Well = round(Well, digits = 2L),
    Disabled = round(Disabled, digits = 2L),
    Dead = round(Dead, digits = 2L),
    QALY = round(QALY, digits = 4L),
    cQALY = round(cumsum(QALY), digits = 4L),
    stringsAsFactors = FALSE
  )
})
t2

The column labelled "QALY" is the per-patient quality adjusted life years accumulated at each cycle (in Sonnenberg and Beck's Table 2 this is labelled as "Cycle Sum" and is for the whole cohort of 10,000 people), and the column labelled "cQALY" is the per-patient cumulative quality adjusted life years over the simulation (in Sonnenberg and Beck's Table 2 this is labelled as "Cumulative Utility" and is for the whole cohort).

#| purl = FALSE
# test that reformatted cycle results match S&B table 2
local({
  # cycle 0
  r0 <- which(t2[, "Cycle"] == 0L)
  stopifnot(
    all.equal(t2[[r0, "Well"]], 10000.0),
    all.equal(t2[[r0, "Disabled"]], 0.0),
    all.equal(t2[[r0, "Dead"]], 0.0),
    all.equal(t2[[r0, "QALY"]], 0.0),
    all.equal(t2[[r0, "cQALY"]], 0.0)
  )
  # cycle 1
  r <- which(t2[, "Cycle"] == 1L)
  stopifnot(
    all.equal(t2[[r, "Well"]], 6000.0),
    all.equal(t2[[r, "Disabled"]], 2000.0),
    all.equal(t2[[r, "Dead"]], 2000.0),
    all.equal(10000L * t2[[r, "QALY"]], 7400.0, tolerance = 1.0, scale = 1.0),
    all.equal(10000L * t2[[r, "cQALY"]], 7400.0, tolerance = 1.0, scale = 1.0)
  )
  # cycle 2
  r <- which(t2[, "Cycle"] == 2L)
  stopifnot(
    all.equal(t2[[r, "Well"]], 3600.0),
    all.equal(t2[[r, "Disabled"]], 2400.0),
    all.equal(t2[[r, "Dead"]], 4000.0),
    all.equal(10000L * t2[[r, "QALY"]], 5280.0, tolerance = 1.0, scale = 1.0),
    all.equal(10000L * t2[[r, "cQALY"]], 12680.0, tolerance = 1.0, scale = 1.0)
  )
  # cycle 23
  r <- which(t2[, "Cycle"] == 23L)
  stopifnot(
    all.equal(t2[[r, "Well"]], 0.0, tolerance = 1.0, scale = 1.0),
    all.equal(t2[[r, "Disabled"]], 1.0, tolerance = 1.0, scale = 1.0),
    all.equal(t2[[r, "Dead"]], 9999.0, tolerance = 1.0, scale = 1.0),
    all.equal(10000L * t2[[r, "QALY"]], 0.0, tolerance = 1.0, scale = 1.0),
    all.equal(
      10000L * t2[[r, "cQALY"]], 23752.0, tolerance = 5.0, scale = 1.0
    )
  )
  # cycle 24
  r <- which(t2[, "Cycle"] == 24L)
  stopifnot(
    all.equal(t2[[r, "Well"]], 0.0, tolerance = 1.0, scale = 1.0),
    all.equal(t2[[r, "Disabled"]], 0.0, tolerance = 1.0, scale = 1.0),
    all.equal(t2[[r, "Dead"]], 10000.0, tolerance = 1.0, scale = 1.0),
    all.equal(t2[[r, "QALY"]], 0.0, tolerance = 1.0, scale = 1.0),
    all.equal(
      10000L * t2[[r, "cQALY"]], 23752.0, tolerance = 5.0, scale = 1.0
    )
  )
})