#' Simulate tree and sequences in two-population SIR-model
#'
#' \code{simulatetree} does a deterministic simulation of a two-population SIR-model,
#' takes samples and simulates a phylogenetic tree. For all this, it makes use of phydynR
#' (https://github.com/emvolz-phylodynamics/phydynR). Then it simulates DNA-sequences for these samples,
#' with simSeq (\pkg{phangorn}).
#'
#' @param N the population sizes of the two populations
#' @param beta an array of 2x2 transmission rate matrices. The third dimension is the number of matrices, which is the
#' number of time intervals with different transmission rates. Transmission is modelled with frequency-dependent transmission
#' terms (division by N).
#' @param gamma the recovery rate
#' @param mu the death rate. Births balance deaths.
#' @param I0 initial numbers of infecteds. The rest is assumed susceptible.
#' @param t1 a vector with times at which the transmission rates change to the next matrix in \code{beta}.
#' The last time is the duration of the simulation
#' @param res resolution of the epidemic simulation per transmission rate interval (ODE solver)
#' @param sampletimes indicates if sampling occurs at random times proportional to incidence (default \code{"incidence"}) or
#' at random uniform times (\code{"regular"})
#' @param samplesize the number of samples to be taken
#' @param sampleperiod the time interval from which the sampling times should be taken
#' @param sampledist indicates how samples should be divided over the two populations, apart from bias. The default \code{"incidence"} indicates
#' proportionality to incidence; \code{"popsize"} indicates proportionality to population size; \code{"fixed"} indicates
#' no proportionality, i.e. fifty-fifty
#' @param samplebias indicates the extra weight (multiplication factor) given for sampling the first population
#' relative to the second. If equal to 1, the weight is completely determined by \code{sampledist}.
#' @param ... arguments to be passed to simSeq (\pkg{phangorn})
#'
#' @return A list with a tree (class \code{phylo}), and a set of sequences (class \code{phyDat}), and the
#' time course of the prevalence (and susceptible populations).
#'
#' @author Don Klinkenberg (\email{don@@xs4all.nl})
#'
#' @export
simulatetree <- function(N = c(100, 10000),
beta = array(c(1.5,0,0,2, 1.5,0,0.1,2), dim = c(2,2,2)), gamma = 1, mu = .05,
I0 = c(1, 1),
t1 = c(50, 100), res = 1000,
sampletimes = c("incidence", "regular"),
samplesize = 200,
sampleperiod = c(0, tail(t1, 1)),
sampledist = c("incidence", "popsize", "fixed"),
samplebias = 1, ...) {
if(length(dim(beta)) != 3) {
dim(beta) <- c(2, 2, length(beta)/4)
}
if(length(t1) != dim(beta)[3]) {
stop("number of transmission matrices in beta should match length of t1")
}
### Build the demographic process for constant transmission rates, i.e. the transmission model
dp.piece <- phydynR::build.demographic.process(
births = matrix(c('parms$beta[1,1] * S1 * I1 / parms$N[1]', 'parms$beta[1,2] * S1 * I2 / parms$N[1]',
'parms$beta[2,1] * S2 * I1 / parms$N[2]', 'parms$beta[2,2] * S2 * I2 / parms$N[2]'), nrow = 2,
dimnames = list(c("I1","I2"),c("I1","I2"))),
nonDemeDynamics = c(S1 = 'parms$mu * parms$N[1] - parms$beta[1,1] * S1 * I1 / parms$N[1] - parms$beta[1,2] * S1 * I2 / parms$N[1] - parms$mu * S1',
S2 = 'parms$mu * parms$N[2] - parms$beta[2,1] * S2 * I1 / parms$N[2] - parms$beta[2,2] * S2 * I2 / parms$N[2] - parms$mu * S2'),
deaths = c(I1 = 'parms$gamma * I1 + parms$mu * I1',
I2 = 'parms$gamma * I2 + parms$mu * I2'),
parameterNames = c('beta', 'N', 'gamma', 'mu'),
rcpp = FALSE, sde = FALSE
)
### Make epi-curve, incidence curve, and initial states for each piecewise constant-rate transmission model
epicurve <- dp.piece(theta = list(beta = beta[,,1], gamma = gamma, mu = mu, N = N),
x0 = c(I1 = I0[1], I2 = I0[2], S1 = N[1] - I0[1], S2 = N[2] - I0[2]),
t0 = 0, t1 = t1[1], res = res)[[5]]
initialstates <- c()
if(length(t1) > 1) {
for(i in 2:length(t1)) {
initialstates <- rbind(initialstates, tail(epicurve, 1))
epicurve <- head(epicurve,-1)
epicurve <- rbind(epicurve,
dp.piece(theta = list(beta = beta[,,i], gamma = gamma, mu = mu, N = N),
x0 = c(I1 = initialstates[i-1,2], I2 = initialstates[i-1,3],
S1 = initialstates[i-1,4], S2 = initialstates[i-1,5]),
t0 = t1[i-1], t1 = t1[i], res = res)[[5]])
}
}
epicurveoutput <- epicurve
class(epicurveoutput) <- c("deSolve", "matrix")
epicurve <- head(epicurve[epicurve[,1] >= sampleperiod[1] & epicurve[,1] <= sampleperiod[2],], -1)
incfunc <- function(epirow, b_arr, t) {
b <- b_arr[,,sum(epirow[1]>=t)]
c(sum(epirow[4]*b[,1]*epirow[2:3]), sum(epirow[5]*b[,2]*epirow[2:3]))/N
}
inccurve <- apply(epicurve, 1, incfunc, b_arr = beta, t = c(0, t1))
### Determine sample times at times of curve
if(sampletimes[1] == "incidence") {
sampleTimes <- sample(epicurve[,1], samplesize, replace = TRUE, prob = colSums(inccurve))
} else {
sampleTimes <- sample(epicurve[,1], samplesize, replace = TRUE)
}
### Determine sample population
if(sampledist[1] == "incidence") {
sampleprobs <- inccurve[, match(sampleTimes,epicurve[,1]) ] * c(samplebias, 1)
} else if (sampledist[1] == "popsize") {
sampleprobs <- matrix(rep(N,samplesize),nrow = 2) * c(samplebias, 1)
} else {
sampleprobs <- matrix(.5, nrow = 2, ncol = samplesize) * c(samplebias, 1)
}
sampledpops <- apply(sampleprobs, 2, sample, x = 2, size = 1, replace = FALSE)
sampleStates <- matrix(0, nrow = samplesize, ncol = 2, dimnames = list(NULL, c("I1", "I2")))
sampleStates[cbind(1:samplesize,sampledpops)] <- 1
### Add uniform increment to sample times, and sort times and populations
add_incr <- function(sampleTime) {
runif(1, sampleTime, epicurve[epicurve[,1] > sampleTime, 1][1])
}
sampleTimes <- sapply(sampleTimes, add_incr)
sampleStates <- sampleStates[order(sampleTimes),]
sampleTimes <- sort(sampleTimes)
### Build the complete demographic process, i.e. the transmission model
if(dim(beta)[3] == 1) {
beta <- beta[,,1]
dp <- dp.piece
} else {
dp <- function(theta, x0, t0, t1, res = 1000, integrationMethod = "lsoda") {
# time interval by time interval
dpsections <- list()
# first time interval start with original initial conditions
dpsections[[1]] <- dp.piece(theta = list(beta = theta$beta[,,1], gamma = theta$gamma, mu = theta$mu,
N = theta$N),
x0 = x0, t0 = 0, t1 = theta$t[1], res = res, integrationMethod = integrationMethod)
# other time intervals start with earlier obtained 'initial conditions'
for(i in 2:length(theta$t)) {
dpsections[[i]] <- dp.piece(theta = list(beta = theta$beta[,,i], gamma = theta$gamma, mu = theta$mu,
N = theta$N),
x0 = c(I1 = initialstates[i-1,2], I2 = initialstates[i-1,3],
S1 = initialstates[i-1,4], S2 = initialstates[i-1,5]),
t0 = theta$t[i-1], t1 = theta$t[i],
res = res, integrationMethod = integrationMethod)
}
# build a single object from all sections
result <- list(
times = tail(dpsections,1)[[1]]$times,
births = tail(dpsections,1)[[1]]$births,
migrations = tail(dpsections,1)[[1]]$migrations,
sizes = tail(dpsections,1)[[1]]$sizes,
dpsections[[1]][[5]])
for(i in 2:length(dpsections)) {
result$times <- c(result$times, tail(dpsections,i)[[1]]$times[-1])
result$births <- c(result$births, tail(dpsections,i)[[1]]$births[-1])
result$migrations <- c(result$migrations, tail(dpsections,i)[[1]]$migrations[-1])
result$sizes <- c(result$sizes, tail(dpsections,i)[[1]]$sizes[-1])
result[[5]] <- rbind(result[[5]], dpsections[[i]][[5]][-1,])
}
return(result)
}
}
### Simulate the phylogenetic tree
cotree <- phydynR::sim.co.tree(theta = list(beta = beta, gamma = gamma, mu = mu, N = N,
initialstates = initialstates, t = t1),
dp,
x0 = c(I1 = I0[1], I2 = I0[2], S1 = N[1] - I0[1], S2 = N[2] - I0[2]),
t0 = 0, res = res, sampleTimes = sampleTimes,
sampleStates = sampleStates)
class(cotree) <- "phylo"
### Simulate the sequences
seqs <- phangorn::simSeq(cotree, ...)
return(list(tree = cotree,
sequences = seqs,
epicurve = epicurveoutput))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.