knitr::opts_chunk$set(echo = TRUE)
This package includes some functions that compute the basic properties of phase-type distributions, i.e. the density, distribution and quantile function as well as simulations.
In the following, let $\tau= \inf{t\geq 1 \mid X_t = p+1}$ be discrete phase-type distributed with initial distribution $\pi$ and sub-transition probability matrix $T$, $\tau \sim DPH_{p}(\pi,T)$.\
Then the density for $\tau$ is given by
\begin{align}
\mathbb{P}(\tau= n) &= \sum_{i=1}^{p} \mathbb{P}(\tau=n \mid X_{n-1}=i)\mathbb{P}(X_{n-1}=i)\
&= \sum_{i=1}^{p} \mathbb{P}(\tau=n \mid X_{n-1}=i) \sum_{j=1}^{p} \mathbb{P}(X_{n-1}=i \mid X_0 =j ) \mathbb{P}(X_0=j)\
&= \sum_{i=1}^{p} \sum_{j=1}^{p} \mathbb{P}(\tau=n \mid X_{n-1}=i) \mathbb{P}(X_{n-1}=i \mid X_0 =j ) \mathbb{P}(X_0=j)\
\end{align}
for $n \geq 1$, where the first and second equality are due to the law of total probability. Now we observe that the probability $\mathbb{P}(X_0=j)$ for the Markov jump process $(X_t){t\geq 0}$ to start in $j$ is equal to the $j$'th entry of the initial distribution. Furthermore, the probability $\mathbb{P}(\tau=n \mid X{n-1}=i)$ for the process to be absorbed at time $n$, given that the process is in state $i$ at time $n-1$ is given by the $i$'th entry of the exit probability vector $t = \boldsymbol{e} - T \boldsymbol{e}$. Finally, the probability $\mathbb{P}(X_{n-1}=i \mid X_0 =j )$ for the process to be in state $i$ at time $n-1$ given that the process started in state $j$ (or equivalently the probability of going from state $j$ to state $i$ in time $n-1$) is given by the $(j,i)$'th element of sub-transition probability matrix $T^{n-1}$. Hence, we have that
\begin{align}
\mathbb{P}(\tau= n) &= \sum_{i=1}^{p} \sum_{j=1}^{p} \mathbb{P}(\tau=n \mid X_{n-1}=i) \mathbb{P}(X_{n-1}=i \mid X_0 =j ) \mathbb{P}(X_0=j)\
&= \sum_{i=1}^{p} \sum_{j=1}^{p} t_i (T^{n-1} ){ji} \pi_j\
&= \sum{i=1}^{p} \sum_{j=1}^{p} \pi_j (T^{n-1} )_{ji} t_i\
&= \pi T^{n-1} t.
\end{align}
We also need to take possible defect sizes into account. So let $\tau \sim DPH_{p}(\pi,T)$ with defect size $\pi_{p+1}$, i.e. \begin{equation} \tau \sim \begin{cases} 1 & \text{ with probability $\pi_{p+1}$} \ DPH_{p}(\pi,T) & \text{ with probability $1- \pi_{p+1}$} . \end{cases} \end{equation} Then we have that \begin{align} \mathbb{P}(\tau= n) &= \mathbb{P}(\tau \cdot \mathbb{1}{{\tau \geq 1}}= n)\ &= \mathbb{P}(\tau(\mathbb{1}{{\tau = 1}} + \mathbb{1}{{\tau > 1}}) = n)\ &= \mathbb{P}(\tau\cdot \mathbb{1}{{\tau = 1}} = n) + \mathbb{P}(\tau\cdot \mathbb{1}_{{\tau > 1}} = n) \end{align} Due to the definition of $\tau$, we know that when $\tau > 1$, $\tau \sim DPH_{p}(\pi,T)$. Hence, from the previous computations \begin{equation} \mathbb{P}(\tau\cdot \mathbb{1}_{{\tau > 1}} = n) = \pi T^{n-1} t \quad \text{ for } n\geq 1. \end{equation} On the other hand, \begin{equation} \mathbb{P}(\tau \cdot \mathbb{1}{{\tau = 1}} = n) = \mathbb{P}(1 \cdot \mathbb{1}{{\tau = 1}} = n) = \begin{cases} \mathbb{P}(\tau = 1 )=\pi_{p+1} & \text{ if } n=1\ 0 & \text{ otherwise}. \end{cases} \end{equation} This implies that \begin{align} \mathbb{P}(\tau= n) &= \mathbb{P}(\tau\cdot \mathbb{1}{{\tau = 1}} = n) + \mathbb{P}(\tau\cdot \mathbb{1}{{\tau > 1}} = n)\ &= \begin{cases} \pi T^{n-1} t + \pi_{p+1} & \text{ if } n=1\ \pi T^{n-1} t & \text{ otherwise}. \end{cases} \end{align}
Now let $\tau= \inf{t > 0 \mid X_t = p+1}$ be continuous phase-type distributed with initial distribution $\pi$ and sub-intensity rate matrix $T$, $\tau \sim PH_{p}(\pi,T)$.\ Then the density for $\tau$ is given by the derivative of the distribution function (see next section) \begin{align} f(u) &= \frac{\partial}{\partial u} F(u)\ &= \frac{\partial}{\partial u} 1-\pi e^{Tu} \boldsymbol{e} \ &= -\pi e^{Tu}T \boldsymbol{e}\ &= \pi e^{Tu}t \end{align} for $u \geq 0$, where we have used that $t = -T \boldsymbol{e}$.
Let again $\tau= \inf{t\geq 1 \mid X_t = p+1}$ be discrete phase-type distributed with initial distribution $\pi$ and sub-transition probability matrix $T$, $\tau \sim DPH_{p}(\pi,T)$.\
Then the distribution function for $\tau$ is given by
\begin{align}
F_{\tau}(n) = \mathbb{P}(\tau \leq n) &= 1- \mathbb{P}(\tau > n)\
&= 1-\sum_{i=1}^{p} \mathbb{P}( X_{n}=i)\
&= 1-\sum_{i=1}^{p} \sum_{j=1}^{p} \mathbb{P}( X_{n}=i \mid X_0 =j) \mathbb{P}( X_0 =j)
\end{align}
for $n \geq 1$, where the fourth equality is due to the law of total probability. As before, we observe that the probability $\mathbb{P}(X_0=j)$ for the Markov jump process $(X_t){t\geq 0}$ to start in $j$ is equal to the $j$'th entry of the initial distribution, and that the probability $\mathbb{P}(X{n}=i \mid X_0 =j )$ for the process to be in state $i$ at time $n$ given that the process started in state $j$ (or equivalently the probability of going from state $j$ to state $i$ in time $n$) is given by the $(j,i)$'th element of sub-transition probability matrix $T^{n}$. Hence, we have that
\begin{align}
F_{\tau}(n) &= 1-\sum_{i=1}^{p} \sum_{j=1}^{p} \mathbb{P}( X_{n}=i \mid X_0 =j) \mathbb{P}( X_0 =j)\
&= 1-\sum_{i=1}^{p} \sum_{j=1}^{p} (T^{n}){ji}\pi_j\
&= 1-\sum{i=1}^{p} \sum_{j=1}^{p} \pi_j(T^{n})_{ji}\
&= 1- \pi T^{n}\boldsymbol{e}.
\end{align}
As before, let $\tau= \inf{t > 0 \mid X_t = p+1}$ be continuous phase-type distributed with initial distribution $\pi$ and sub-intensity rate matrix $T$, $\tau \sim PH_{p}(\pi,T)$.\
Then the distribution function for $\tau$ is given by
\begin{align}
F_{\tau}(u) = \mathbb{P}(\tau \leq u) &= 1- \mathbb{P}(\tau > u)\
&= 1-\sum_{i=1}^{p} \mathbb{P}( X_{u}=i)\
&= 1-\sum_{i=1}^{p} \sum_{j=1}^{p} \mathbb{P}( X_{u}=i \mid X_0 =j) \mathbb{P}( X_0 =j)
\end{align}
for $u \geq 0$, where the fourth equality is due to the law of total probability. As in the discrete case, we observe that the probability $\mathbb{P}(X_0=j)$ for the Markov jump process $(X_t){t\geq 0}$ to start in $j$ is equal to the $j$'th entry of the initial distribution, and that the probability $\mathbb{P}(X{u}=i \mid X_0 =j )$ for the process to be in state $i$ at time $u$ given that the process started in state $j$ (or equivalently the probability of going from state $j$ to state $i$ in time $u$) is given by the $(j,i)$'th element of sub-transition probability matrix $e^{Tu}$. Hence, we have that
\begin{align}
F_{\tau}(n) &= 1-\sum_{i=1}^{p} \sum_{j=1}^{p} \mathbb{P}( X_{n}=i \mid X_0 =j) \mathbb{P}( X_0 =j)\
&= 1-\sum_{i=1}^{p} \sum_{j=1}^{p} (e^{Tu}){ji}\pi_j\
&= 1-\sum{i=1}^{p} \sum_{j=1}^{p} \pi_j(e^{Tu})_{ji}\
&= 1- \pi e^{Tu}\boldsymbol{e}.
\end{align}
In this package, we used the inbuild function uniroot
to find the quantile for a given probability $p$. Type help("uniroot")
into your console to find out more about this function.
For an object of type discphasetype
with initital distribution $\pi$ and sub-intensity rate matrix $T$ the simulation is conducted roughly in the following way.
For an object of type contphasetype
with initital distribution $\pi$ and sub-transition probability matrix $T$ the simulation is conducted roughly in the same way.
We want to reproduce Figure 3.4 in John Wakeley (2009): "Coalescent Theory: An Introduction", Roberts and Company Publishers, Colorado, which displays the distributions of $T_{\text{MRCA}}$ and $T_{\text{Total}}$ for $n \in {2,5,10,20,50,100}$. The initial distributions and sub-intensity rate matrices are stored in the datasets T_MRCA
and T_Total
that are provided in the package.
First, we compute the distribution of $T_{\text{MRCA}}$
## Defining the vector of quantiles t.vec <- seq(0,4, by=0.1) ## and the matrix holding all distributions dist.mat <- matrix(nrow = 6, ncol = length(t.vec)) ## Now we can compute the distributions for(x in t.vec){ dist.mat[2,which(t.vec ==x)] <- dphasetype(T_MRCA$n5,x) dist.mat[3,which(t.vec ==x)] <- dphasetype(T_MRCA$n10,x) dist.mat[4,which(t.vec ==x)] <- dphasetype(T_MRCA$n20,x) dist.mat[5,which(t.vec ==x)] <- dphasetype(T_MRCA$n50,x) dist.mat[6,which(t.vec ==x)] <- dphasetype(T_MRCA$n100,x) } ## For n=2, the initial distribution is equal to 1 and ## the sub-intensity rate matrix is T.mat = -1. Hence, ## the distribution is given by dist.mat[1,] <- exp(-t.vec)
Now that we have computed the distribution of $T_{\text{MRCA}}$ for all $n\in {2,5,10,20,50,100}$ and $t \in {0,...,4}$, we are able to reproduce the first figure
plot(t.vec, dist.mat[1,], type = "l", main = expression(paste("The distribution of ", T["MRCA"], " for n=2,5,10,20,50,100")), cex.main = 0.9, xlab = "t", ylab = expression(f[T[MRCA]](x)), xlim = c(0,4), ylim = c(0,1), frame.plot = F) points(t.vec, dist.mat[2,], type = "l") points(t.vec, dist.mat[3,], type = "l") points(t.vec, dist.mat[4,], type = "l") points(t.vec, dist.mat[5,], type = "l") points(t.vec, dist.mat[6,], type = "l")
Now, we can perform the same calculations for $T_{\text{Total}}$
## Defining the vector of quantiles t.vec <- seq(0,15, by=0.1) ## and the matrix holding all distributions dist.mat <- matrix(nrow = 6, ncol = length(t.vec)) ## Now we can compute the distributions for(x in t.vec){ dist.mat[2,which(t.vec ==x)] <- dphasetype(T_Total$n5,x) dist.mat[3,which(t.vec ==x)] <- dphasetype(T_Total$n10,x) dist.mat[4,which(t.vec ==x)] <- dphasetype(T_Total$n20,x) dist.mat[5,which(t.vec ==x)] <- dphasetype(T_Total$n50,x) dist.mat[6,which(t.vec ==x)] <- dphasetype(T_Total$n100,x) } ## For n=2, the initial distribution is equal to 1 and ## the sub-intensity rate matrix is T.mat = -1/2. Hence, ## the distribution is given by dist.mat[1,] <- exp(-t.vec/2)/2
Now that we have computed the distribution of $T_{\text{Total}}$ for all $n\in {2,5,10,20,50,100}$ and $t \in {0,...,15}$, we are able to reproduce the second figure
plot(t.vec, dist.mat[1,], type = "l", main = expression(paste("The distribution of ", T["Total"], " for n=2,5,10,20,50,100")), cex.main = 0.9, xlab = "t", ylab = expression(f[T[Total]](x)), xlim = c(0,15), ylim = c(0,0.5), frame.plot = F) points(t.vec, dist.mat[2,], type = "l") points(t.vec, dist.mat[3,], type = "l") points(t.vec, dist.mat[4,], type = "l") points(t.vec, dist.mat[5,], type = "l") points(t.vec, dist.mat[6,], type = "l")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.