#' Spatial Generation epidemic genetic simulator
#'
#' This function starts with a root nucelotide sequence and evolves it via
#' a spatial epidemic generational model. The result can then be used elsehere to
#' generate newick trees and necessary enxus files for BEAUti/BEAST etc
#'
#'
#' @param fasta.file path to a fasta sequence to be used as the root, otherwise random sequence used
#' @param seq.length length of root sequence if not specified by fasta.file
#' @param N - total popualation size, such that N-I = S
#' @param mu - mutation root in base changes per generation time
#' @param R0 - basic reproduction number
#' @param Tg - mean generation time
#' @param kappa - the rate of transitions and transversions
#' @param model - substitution model - avaialble so far JC69, HKY85
#' @param I - Initial infected space
#' @param samples - 1/samples is equal to the random sample size of tips stored
#'
#' @return Returns a list of 9, with the first 7 lists giving the full infection network. As such
#' res$ID[n] corresponds to res$Time/Parent/IsFather/Offspring/Branch.Length/Sequence/[n]. res$Seq and
#' res$Seq.ID relate to eachother and the value within res$Sequence relates to res$Seq.ID and thus can be
#' used to map a sequence to each individual
#'
#'
#' @export
#'
#' @examples
#' ## simulate data
#' TODO(OJ)
#'
#'
Spatial_Generation_Sim <- function(fasta.file=NULL,seq.length=1000,spaces=3,Nv=rep(100,spaces),
dm=matrix(c(1, 1/100, 1/10000, 1/100, 1, 1/100, 1/10000, 1/100, 1), nrow=3, ncol=3),
mu=0.0001,R0=3,Tg=2,kappa=2,model="HKY85",I=1,samples=10){
## HANDLE VARIABLES ##
##########################################################
ptm <- proc.time()
## check declared model is valid
model.list <- c("JC69","K80","HKY85");
if(!model %in% model.list ) stop("Model given is not valid")
## handle arguments and create variables
##### ADJUST SUSCEPTIBLES
Sv <- Nv
Sv[I] <- Sv[I]-1
Sv <- matrix(data = Sv, nrow = 1,ncol = 3)
Iv <- rep(0,spaces)
Iv[I] <- 1
Iv <- matrix(data = Iv, nrow = 1,ncol = 3)
N <- sum(Nv)
nuc <- as.DNAbin(c("a","c","g","t"))
## EXTRA FUNCTIONS ##
##########################################################
## function to create sequence from scratch
seq_generate <- function(){
res <- sample(nuc, size=seq.length, replace=TRUE)
class(res) <- "DNAbin"
return(res)
}
## function to update results table
update_results <- function(Results,offspring){
# Update Boolean showing ID is now a node
Results$IsFather[parent.id] <- TRUE;
Results$Offspring[parent.id] <- offspring
# Allocate children by first creating distribution of generation times
# Timing of new infections is drawn randomly from a poisson distribution with mean Tg
times <- sort(rpois(offspring,Tg))
######################################
## CALCULATE MATRIX AND WORK OUT LOCATION OF INFECTION
location.matrix <- cumsum((t((dm*(tail(Sv,1)[1,])))*(tail(Iv,1)[1,]))[Results$Space[parent.id],])/max(cumsum((t((dm*(tail(Sv,1)[1,])))*(tail(Iv,1)[1,]))[Results$Space[parent.id],]))
location <- runif(1,0,1) >= location.matrix
inf.location <- min(which(location==FALSE))
## Assign location of children
# Assign the parent and generation times to the children in order
# i.e. if node 4 & 5 are siblings, Tg[4] < Tg[5]
Results$Parent[child.id:(child.id+offspring-1)] <- parent.id
Results$Space[child.id:(child.id+offspring-1)] <- inf.location
Results$Branch.Length[child.id:(child.id+offspring-1)] <- times
Results$Time[child.id:(child.id+offspring-1)] <- times+Results$Time[Results$Parent[child.id:(child.id+offspring-1)]]
return(Results)
}
## function to evaluate probability of mutation event
mutation_prob <- function(sequence){
# first assign the class of DNAbin to sequence (funny)
class(sequence)<-"DNAbin"
# second create the transition matrix P
if (model == "JC69"){
P <- JC69(mu,Tg)
} else if (model == "HKY85"){
P <- HKY85(mu, t = Tg, kappa=kappa, pi=base.freq(sequence))
} else if (model == "K80") {
P <- K80(mu, t = Tg, kappa=kappa)
}
# using the transition matrix and the absolute numbers of nucelotides caluclate the probability
# that no mutations occur
abs.freq <- base.freq(sequence,freq=TRUE)
res <- (P[1,1])^abs.freq[1]*(P[2,2])^abs.freq[2]*(P[3,3])^abs.freq[3]*(P[4,4])^abs.freq[4]
return(res)
}
## function to evolve sequence given succesful mutation event
seq_evolve <- function(sequence){
# first create the transition matrix P
if (model == "JC69"){
P <- JC69(mu,Tg)
} else if (model == "HKY85"){
P <- HKY85(mu = mu, t = Tg, kappa = kappa, pi = base.freq(sequence))
} else if (model =="K80") {
P <- K80(mu, t = Tg, kappa=kappa)
}
# using the transition matrix create a mutation matrix given we know a mutation occurs
mut.P <- P
diag(mut.P) <- 0
mut.mat <- Matrix::t(Matrix::t(mut.P)/Matrix::colSums(mut.P))
# randomly find nucelotide position and mutate it accordingly
nuc.pos <- round(runif(1,1,seq.length))
sequence <- s2n(as.character(sequence),base4 = FALSE)
sequence[nuc.pos] <- which(rmultinom(n=1,size=1,p=mut.mat[,sequence[nuc.pos]])==1)
sequence <- nuc[sequence]
# carry out any other mutations that may occur
res <- t(nuc[(which(apply(P[(s2n(as.character(sequence),base4 = FALSE)),], 1, rmultinom, n=1, size=1)==1)-(0:(sequence.length-1))*4)])
return(res)
}
## MAIN FUNCTION ##
##########################################################
## if no sequence is provided, generate random sequence
if(is.null(fasta.file)) {
root <- seq_generate()
} else {
## convert given fasta file path into DNAbin root
root <- read.FASTA(fasta.file)
}
sequence.length <- length(root)
## initialise results
######################################
Results <- list(ID=1:N, Time=rep(0,N), Parent=NA, IsFather = rep(FALSE,N), Offspring = NULL,
Branch.Length = NULL, Sequence=1, Space = rep(0,N))
## Assign location of root
Results$Space[1] <- I
Sequence.Results <- list(Seq = root,Seq.ID=1)
## set tickers, i.e. the first infected individual
## must be id #2 with parent id #1
parent.id <- 1
child.id <- 2
new.id <- 1
## create starting mutation probability
Prob.Mutation <- mutation_prob(sequence=root)
## create progress bar
pb <- winProgressBar(title="Percentage of population infected", label="0% done", min=0, max=N, initial=1)
## INITIATE LOOP ##
##########################################################
while (sum(tail(Sv,1))>0){
## Check to see if epidemic hasn't died out
if(parent.id==child.id) stop("Epidemic died out!")
## Calculate the number of new infections, randomly choosing from a negative binomial with mean R0
offspring = rnbinom(1,1,mu = R0)
## If the infected indivdual is the root or the last parent ensure there are new infections
if (parent.id == 1 | (child.id-parent.id ==1)){
while (offspring == 0){
offspring = rnbinom(1,1,mu = R0)
}
}
## first check what the smallest non zero susceptible count is
if(min(tail(Sv,1))==0){
snzs <- min(tail(Sv,1)[tail(Sv,1)>0])
} else {
snzs <- min(tail(Sv,1))
}
## If the infected individual is not the root 0 new infections are possible
if (offspring == 0) {
Results$Offspring[parent.id] = offspring
}
## Consider case when there are more susceptibles than offspring
else if (offspring > 0 && offspring < snzs){
# update infection network results
Results <- update_results(Results,offspring)
# determine if there has been a mutation event and if so evolve the sequence and calculate new prob.mutation
if (runif(1,0,1) > Prob.Mutation[Results$Sequence[parent.id]]){
new.id <- max(Sequence.Results$Seq.ID)+1
Results$Sequence[child.id:(child.id+offspring-1)] <- new.id
if (new.id==2){
new.seq <- seq_evolve(sequence = Sequence.Results$Seq[])
Sequence.Results$Seq <- rbind.DNAbin(t(Sequence.Results$Seq),new.seq)
} else {
new.seq <- seq_evolve(sequence = Sequence.Results$Seq[Results$Sequence[parent.id],])
Sequence.Results$Seq <- rbind.DNAbin(Sequence.Results$Seq,new.seq)
}
Prob.Mutation[new.id] <- mutation_prob(sequence = Sequence.Results$Seq[new.id,])
Sequence.Results$Seq.ID[new.id] <- new.id
} else {
Results$Sequence[child.id:(child.id+offspring-1)] <- Results$Sequence[parent.id]
}
## update the ID number of the next child
child.id <- child.id + offspring
}
## Consider case when there are more offspring than susceptibles
else {
## Cap number of offspring equal to available susceptibles
offspring <- snzs
## update infection network results
Results <- update_results(Results,offspring)
## determine if there has been a mutation event and if so evolve the sequence anc calculate new prob.mutation
if (runif(1,0,1) > Prob.Mutation[Results$Sequence[parent.id]]){
new.id <- max(Sequence.Results$Seq.ID)+1
Results$Sequence[child.id:(child.id+offspring-1)] <- new.id
if (new.id==2){
new.seq <- seq_evolve(sequence = Sequence.Results$Seq[])
Sequence.Results$Seq <- rbind.DNAbin(t(Sequence.Results$Seq),new.seq)
} else {
new.seq <- seq_evolve(sequence = Sequence.Results$Seq[Results$Sequence[parent.id],])
Sequence.Results$Seq <- rbind.DNAbin(Sequence.Results$Seq,new.seq)
}
Prob.Mutation[new.id] <- mutation_prob(sequence = Sequence.Results$Seq[new.id,])
Sequence.Results$Seq.ID[new.id] <- new.id
} else {
Results$Sequence[child.id:(child.id+offspring-1)] <- Results$Sequence[parent.id]
}
## update the ID number of the next child
child.id <- child.id + offspring
}
## update the change in susceptibles
#################CHANGE HERE TO Offspring * c(1,0,0) from location section
if(offspring > 0){
change <- rep(0,spaces)
change[Results$Space[max(which(Results$Space>0))]] <- 1*offspring
#if(parent.id==1){
# Sv <- rbind(Sv,Sv-Change)
# Iv <- rbind(Iv,Iv+Change)
#} else {
Sv <- rbind(Sv,tail(Sv,1)-change)
Iv <- rbind(Iv,tail(Iv,1)+change)
#}
}
## update the ID number of the next parent node to be considered
parent.id <- parent.id + 1
## update progress bar
info <- sprintf("%d%% done", round((((N-sum(tail(Sv,1)))/N)*100)))
setWinProgressBar(pb, round((((N-sum(tail(Sv,1)))/N)*N)), label=info)
}
## Check to see evolution as occurred!
if(new.id==1) stop("No Evolution - Perhaps root sequence is not realistic?")
row.names(Sequence.Results$Seq)= makeLabel(rep("seq",max(Sequence.Results$Seq.ID)),make.unique = TRUE)
## turn parent.id into a list to put into the result for the purposes of being able to figure out while tips are extant
Parent.ID <- list(parent.id=parent.id)
res <- append(Results,Sequence.Results)
res <- append(res,Parent.ID)
## print times
close(pb)
print(paste(N,"individuals analysed, concerning a",length(root),"long sequence within",((proc.time() - ptm)[3]),"secs"))
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.