In n8thangreen/treeSimR: treeSimR

# devtools::document(roclets = "vignette")

options(width = 1000)

``` {r setup, echo=FALSE, results="hide"}

define YAML and R code chunk header and footer

lang_output <- function(x, lang) { cat(c(sprintf("%s", lang), x, ""), sep="\n") }

r_output <- function(x) lang_output(x, "r") yaml_output <- function(x) lang_output(x, "yaml")

knitr::opts_chunk$set( fig.width=7, fig.height=5)

## Initiate trees

```r
library(yaml)
library(data.tree)
library(magrittr)
library(rprojroot)

devtools::load_all(".", quiet = TRUE)

Load a tree template from the \code{treeSimR} package.

path_dtree <- system.file("raw data/LTBI_dtree-cost_SIMPLE.yaml", package = "treeSimR")
osList <- yaml.load_file(path_dtree)

The raw decision tree file is a tab-spaced file such as the following:

yaml_output(readLines(path_dtree))

We save this to a .yaml text file and then give it as a yaml file to a data.tree object using the yaml and data.tree packages. This is then represented as a list in R.

# osList <- yaml.load(yaml)
osNode <- as.Node(osList)
osNode

Better still, use the \code{treeSimR} package function to do this, checking for tree integrity and defining an additional costeffectiveness.tree class.

# scenarios_cost <- find_package_root_file("raw data",
#                                          "scenario-parameter-values_cost.csv") %>% 
#                     read.csv()
# # scenarios_cost <- read.csv("raw data/scenario-parameter-values_cost.csv")
# 
# CEtree <- treeSimR::costeffectiveness_tree(yaml_tree = path_dtree,
#                                            data_val = scenarios_cost)
# osNode <- CEtree$osNode
# print(osNode)

A neat way of exploring the tree is with the listviewer package widget.

# library(listviewer)
# l <- ToListSimple(osNode)
# jsonedit(l)

Simulate a scenario

We can now sample values for each branch, given the distributions defined for each. This could be the cost or health detriment.

# rpayoff <- osNode$Get(sampleNode)
# osNode$Set(payoff = rpayoff)
# print(osNode)

Now given the sampled values, e.g. cost, and the probabilities, we can calculate the expected values at each node, from leaf to root.

# osNode$Do(payoff, traversal = "post-order", filterFun = isNotLeaf)
# print(osNode)

Similarly to above, we have created a better wrapper function to perform these steps:

# osNode <- calc_expectedValues(osNode)
# print(osNode)

Monte Carlo forward simulation

We are now in a position to do a probability sensitivity analysis (PSA) and calculate multiple realisations for specific nodes e.g. those at which a decision is to be made.

# MonteCarlo_expectedValues(osNode, n = 10)

Pathway Probabilities

To feed into a compartmental model like a Markov model we need state probabilities. That is, the probability of ending-up in the one of the terminal state of the tree that are also starting states for the other model. These are calculated by taking the product of the probabilities along each pathway from root to leaf.

Once again, we've written a function to do this, which we can append to the the tree. Below we give the terminal states in a dataframe.

# path_probs <- calc_pathway_probs(osNode)
# osNode$Set(path_probs = path_probs)
# 
# terminal_states <- data.frame(pathname = osNode$Get('pathString', filterFun = isLeaf),
#                               path_probs = osNode$Get('path_probs', filterFun = isLeaf))
# terminal_states

Specifically, the starting state probabilities of the subsequent compartmental model are for aggregated sub-populations. We can simply sum over these in an ad-hoc way.

The non-LTBI individuals either never had LTBI or where successfully treated.

# startstate.nonLTBI <- grepl("/Complete Treatment", x = terminal_states$pathname) | grepl("nonLTBI", x = terminal_states$pathname)
# startstate.LTBI <- !startstate.nonLTBI

The expected proportion of individuals in LTBI and non-LTBI after screening is thus,

# healthstatus <- NA
# healthstatus[startstate.nonLTBI] <- "nonLTBI"
# healthstatus[startstate.LTBI] <- "LTBI"
# 
# aggregate(terminal_states$path_probs, by=list(healthstatus), FUN=sum)

Further, we can sample from the terminal state probabilities to give a sample of compartmental model start state proportions. This can capture the variability due to the cohort size.

# samplesize <- 100000
# numsamples <- 10
# 
# sample.mat <- matrix(NA, nrow = nrow(terminal_states), ncol = numsamples)
# for (i in 1:numsamples){
#   
#   sample.mat[,i] <- table(sample(x = 1:nrow(terminal_states), size = samplesize, prob = terminal_states$path_probs, replace = TRUE))/samplesize
# }
# 
# head(sample.mat)
# apply(sample.mat, 2, function(x) aggregate(x, by=list(healthstatus), FUN=sum))

The \code{treeSimR} function to do this is

# get_start_state_proportions(terminal_states$path_probs, healthstatus, samplesize, numsamples)

Risk Profile

Further, the pathway probabilities can be used to give the distribution of the terminal state values e.g. cost or time. This is called the risk profile of the decision tree.

# osNode <- calc_riskprofile(osNode)
# print(osNode, "type", "path_prob", "path_payoff")

# plot(data.frame(osNode$Get('path_payoff', filterFun = isLeaf),
#            osNode$Get('path_prob', filterFun = isLeaf)), type="h",
#      xlab="payoff", ylab="probability")

Deterministic sensitivity analysis

The above methods employ probability sensitivity analysis. We can also do deterministic (or scenario) based sensitivity analysis. That is we simulate the model over a grid of pre-specified parameter values. We have already included these above in the construction of the costeffectiveness_object in data$data_prob and data$data_val.

# print(CEtree)

Select a scenario number and run:

# transform to tidy format
# scenario_parameter_p.melt <- reshape2::melt(data = CEtree$data$data_prob,
#                                             id.vars = "scenario", variable.name = "node", value.name = "p")

# assign_branch_values(osNode.cost = osNode,
#                      osNode.health = osNode,
#                      # parameter_p = subset(scenario_parameter_p.melt, scenario == 1),
#                      parameter_cost = subset(CEtree$data$data_val, scenario == 1)) 
# print(CEtree$osNode)

Optimal decisions

TODO

We can get the software to calculate the optimal decision for us, rather than returning the expections to compare. This can be done from right to left, iteratively.

# ##TODO##
# osNode$Do(decision, filterFun = function(x) x$type == 'decision')
# osNode$Get('decision')[1]

##TODO##
## probabilty of successfully & correctly treating LTBI
# dummy <- rep(0, osNode$totalCount)
# dummy[12] <- 1
# osNode$Set(payoff = dummy)
# print(osNode, "type", "p", "distn", "mean", "sd", "payoff")
# osNode$Do(payoff, traversal = "post-order", filterFun = isNotLeaf)
# print(osNode, "type", "p", "distn", "mean", "sd", "payoff")
# osNode$Get('payoff')[1]