#' Time explicit simulator
#'
#'
#' This function starts with a root nucelotide sequence and evolves it via
#' an epidemic generational model. The result can then be used elsehere to
#' generate newick trees and necessary enxus files for BEAUti/BEAST etc.
#'
#'
#' @param fasta.file path to a fasta sequence to be used as the root, otherwise random sequence used
#' @param seq.length length of root sequence if not specified by fasta.file
#' @param N total popualation size, such that N-I = S
#' @param mu mutation root in base changes per generation time
#' @param R0 basic reproduction number
#' @param Tg mean generation time
#' @param kappa the rate of transitions and transversions
#' @param model substitution model - avaialble so far JC69, HKY85
#' @param I Initial infected population
#' @param scale rgamma scale parameter
#' @param size rnbinom size parameter
#' @param parallel if evolution occurs between parent and all children. i.e. each sequence inherited is evolved
#' over a period of time equal to its branch length from the parent.
#' @param true_perfection always 1 mutation
#' @return Returns a list of 11, with the first 7 lists giving the full infection network. As such
#' res$ID[n] corresponds to res$Time/Parent/IsFather/Offspring/Branch.Length/Sequence/[n]. res$Seq and
#' res$Seq.ID relate to eachother and the value within res$Sequence relates to res$Seq.ID and thus can be
#' used to map a sequence to each individual, and parent.id gives the id of the next individual that would be
#' considered for infection. new.id is a counter and will always be equal to 2.
#'
#'
#' @export
#'
#' @examples
#'
#'
Time_Sim <- function(fasta.file=NULL,seq.length=1000,N=1000,mu=0.0001,R0=2,Tg=50,scale=1,size=1,kappa=2,
model="HKY85",I=1,withinhost=TRUE,fixed.offspring=FALSE,fixed.generations=FALSE,t.cont=FALSE,outgroup=FALSE,
parallel=FALSE,true_perfection=FALSE){
## HANDLE VARIABLES ##
##########################################################
ptm <- proc.time()
## check declared model is valid
model.list <- c("JC69","HKY85");
if(!model %in% model.list ) stop("Model given is not valid")
## handle arguments and create variables
susceptibles <- N-I;
nuc <- as.DNAbin(c("a","c","g","t"))
## EXTRA FUNCTIONS ##
##########################################################
## function to create sequence from scratch
seq_generate <- function(){
res <- sample(nuc, size=seq.length, replace=TRUE)
class(res) <- "DNAbin"
return(res)
}
## function to update results table
update_results <- function(Results,offspring,parent.id){
# Update Boolean showing ID is now a node
Results$IsFather[parent.id] <- TRUE;
Results$Offspring[parent.id] <- offspring
# Allocate children by first creating distribution of generation times
# Timing of new infections is drawn randomly from a poisson distribution with mean Tg
if(fixed.generations==TRUE){
times <- sort(rep(Tg,offspring))
} else {
if(t.cont==TRUE){
times <- sort(rgamma(n=offspring,shape=Tg,scale=scale))
} else {
times <- sort(rpois(n=offspring,lambda=Tg))
}
}
# Assign the parent and generation times to the children in order
# i.e. if node 4 & 5 are siblings, Tg[4] < Tg[5]
Results$Parent[child.id:(child.id+offspring-1)] <- parent.id
Results$Branch.Length[child.id:(child.id+offspring-1)] <- times
Results$Time[child.id:(child.id+offspring-1)] <- times+Results$Time[Results$Parent[child.id:(child.id+offspring-1)]]
## Sequence Evolution
for (i in 1:offspring){
## catach the vector vs matrix issue
if(Results$new.id==1){
new.seq <- seq_time_evolve(sequence = Results$Seq[],time=times[i])
gen.seq <- new.seq
Results$Seq <- rbind.DNAbin(t(Results$Seq),new.seq)
Results$new.id <- 2
} else {
if(withinhost==TRUE){
if(i==1){
new.seq <- seq_time_evolve(sequence = Results$Seq[parent.id,],time=times[i])
Results$Seq <- rbind.DNAbin(Results$Seq,new.seq)
} else {
new.seq <- seq_time_evolve(sequence = Results$Seq[child.id-2+i,],time=(times[i]-times[i-1]))
Results$Seq <- rbind.DNAbin(Results$Seq,new.seq)
}
} else {
if(i==1){
new.seq <- seq_time_evolve(sequence = Results$Seq[parent.id,],time=mean(times))
gen.seq <- new.seq
} else {
if(parallel==TRUE){
new.seq <- seq_time_evolve(sequence = Results$Seq[parent.id,],time=mean(times))
} else {
new.seq <- gen.seq
}
}
Results$Seq <- rbind.DNAbin(Results$Seq,new.seq)
}
}
}
return(Results)
}
## function to evolve sequence in time explicit manner
seq_time_evolve <- function(sequence,time){
# first create the transition matrix P
if (model == "JC69"){
P <- JC69(mu,Tg)
} else if (model == "HKY85"){
P <- HKY85(mu = mu, t = time, kappa = kappa, pi = base.freq(sequence))
}
if(true_perfection==TRUE){
mut.P <- P
diag(mut.P) <- 0
mut.mat <- Matrix::t(Matrix::t(mut.P)/Matrix::colSums(mut.P))
# randomly find nucelotide position and mutate it accordingly
nuc.pos <- round(runif(1,1,seq.length))
sequence <- s2n(as.character(sequence),base4 = FALSE)
if(true_perfection_outgroup==TRUE){
sequence[nuc.pos] <- which(rmultinom(n=runif(max(case.tree$Time),1,seq.length),size=1,p=(mut.mat[,sequence[nuc.pos]]))==1)
} else {
sequence[nuc.pos] <- which(rmultinom(n=1,size=1,p=(mut.mat[,sequence[nuc.pos]]))==1)
}
sequence <- t(nuc[sequence])
res <- sequence
} else {
res <- t(nuc[(which(apply(P[(s2n(as.character(sequence),base4 = FALSE)),], 1, rmultinom, n=1, size=1)==1)-(0:(sequence.length-1))*4)])
}
return(res)
}
## MAIN FUNCTION ##
##########################################################
## if no sequence is provided, generate random sequence
if(is.null(fasta.file)) {
load("Data/root.RData")
} else {
## convert given fasta file path into DNAbin root
f <- read.fasta.format(fasta.file)
root <- nuc[s2n(f$org)+1]
}
sequence.length <- length(root)
## initialise results
if(outgroup==FALSE){
Results <- list(ID=1:N, Time=rep(0,N), Parent=NA, IsFather = rep(FALSE,N), Offspring = NULL,
Branch.Length = NULL, Sequence=1:N,Seq = root,Seq.ID=1:N,new.id=1)
} else {
## initialise results
Results <- list(ID=1:(N+1), Time=rep(0,(N+1)), Parent=NA, IsFather = rep(FALSE,(N+1)), Offspring = NULL,
Branch.Length = NULL, Sequence=1:(N+1),Seq = root,Seq.ID=1:(N+1),new.id=1)
}
## set tickers, i.e. the first infected individual
## must be id #2 with parent id #1
parent.id <- 1
child.id <- 2
true_perfection_outgroup <- FALSE
## create progress bar
#pb <- winProgressBar(title="Percentage of population infected", label="0% done", min=0, max=N, initial=1)
## INITIATE LOOP ##
##########################################################
while (susceptibles>0){
## Check to see if epidemic hasn't died out
if(parent.id==child.id) stop("Epidemic died out!")
## Calculate the number of new infections = R0
if(fixed.offspring==TRUE){
offspring = R0
} else {
offspring = rnbinom(1,size=size,mu = R0)
}
## If the infected indivdual is the root or the last parent ensure there are new infections
if (parent.id == 1 | (child.id-parent.id ==1)){
while (offspring == 0){
if(fixed.offspring==TRUE){
offspring = R0
} else {
offspring = rnbinom(1,size=size,mu = R0)
}
}
}
## If the infected individual is not the root 0 new infections are possible
if (offspring == 0) {
Results$Offspring[parent.id] = offspring
}
## Consider case when there are more susceptibles than offspring
else if (offspring > 0 && offspring < susceptibles){
# update infection network results
Results <- update_results(Results=Results,offspring=offspring,parent.id=parent.id)
## update the ID number of the next child
child.id <- child.id + offspring
}
## Consider case when there are more offspring than susceptibles
else {
## Cap number of offspring equal to available susceptibles
offspring <- susceptibles
# update infection network results
Results <- update_results(Results=Results,offspring=offspring,parent.id=parent.id)
## update the ID number of the next child
child.id <- child.id + offspring
}
## update the change in susceptibles
susceptibles <- susceptibles - offspring
## update the ID number of the next parent node to be considered
parent.id <- parent.id + 1
## update progress bar
#info <- sprintf("%d%% done", round((((N-susceptibles)/N)*100)))
#setWinProgressBar(pb, round((((N-susceptibles)/N)*N)), label=info)
}
if (outgroup==TRUE){
true_perfection_outgroup <- TRUE
correct.parent.id <- parent.id
parent.id <- 1
child.id <- N+1
offspring <- 1
times <- max(Results$Time)
Results$Parent[child.id:(child.id+offspring-1)] <- parent.id
Results$Branch.Length[child.id:(child.id+offspring-1)] <- times
Results$Time[child.id:(child.id+offspring-1)] <- times+Results$Time[Results$Parent[child.id:(child.id+offspring-1)]]
## Sequence Evolution
new.seq <- seq_time_evolve(sequence = Results$Seq[parent.id,],time=times)
Results$Seq <- rbind.DNAbin(Results$Seq,new.seq)
parent.id <- correct.parent.id
}
row.names(Results$Seq)= makeLabel(rep("seq",max(Results$Seq.ID)),make.unique = TRUE)
## turn parent.id into a list to put into the result for the purposes of being able to figure out while tips are extant
Parent.ID <- list(parent.id=parent.id)
res <- append(Results,Parent.ID)
## print times
#close(pb)
print(paste(N,"individuals analysed, concerning a",length(root),"long sequence within",((proc.time() - ptm)[3]),"secs"))
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.