phydynR: Model-based coalescent simulation and likelihood for phylodynamic inference

Documented in build.demographic.process DatedTree show.demographic.process sim.co.tree

require(Rcpp)
require(inline)
#~ sourceCpp( 'dAL1.cpp')
#~ sourceCpp( 'treeSimulatorCpp2.cpp')
#~ sourceCpp( 'treeSimulatorCpp3.cpp')
#NOTE  co sim method for sampling co heights not ensure A>2 for each coheight



# Generate a demographic model from input strings, optionally compiling as Cpp
# using Rcpp and optionally solving as SDEs
# should return func(theta,  t0, t1, res=1e3) -> list(times, F, G, Y)

#' Build a demographic process model (ODE or SDE)
#'
#' A model is constructed by supplying birth rates, migration rates, and death rates.
#' The model can be used to simulate demographic histories or as an input to coalescent
#' simulation or likelihood calculation.
#' The demographic model can be created using ordinary differential equations (ODEs)
#' or stochastic differential equations (SDEs) 
#'
#' @param births A named character vector or matrix of mathematical expressions
#'   that describe model birth rates. Names correspond to each deme. If there is
#'   more than one deme, a matrix must be supplied describing birth rates between
#'   each pair of demes. Either R or C code can be supplied (see rcpp option).
#' @param nonDemeDynamics A named character vector of mathematical expressions
#'   that describe rate of change for dynamic variables which are not demes;
#'   for example: an equation for the number susceptible in an SIR model.
#' @param migrations A named character vector or matrix of mathematical expressions
#'   that describe model migration rates. Names correspond to each deme. If there
#'   is more than one deme, a matrix must be supplied describing migration rates
#'   between each pair of demes. Either R or C code can be supplied
#'   (see rcpp option).
#' @param deaths A named character vector of mathematical expressions that
#'   describe death rates in each deme.
#' @param parameterNames A character vector providing the names of static
#'   parameters used in the model.
#' @param rcpp If TRUE, the expressions are interpreted as C code using the Rcpp
#'   package.
#' @param sde If TRUE, a stochastic differential equation model is constructed;
#'   if FALSE, an ordinary differential equation model is constructed.
#'
#' @return An object of class \code{demographic.process}, which is a function
#'   that can be used to simulate the model. The model can also be used as an
#'   input to tree simulation or likelihood calculation.
#' @author Erik Volz
#' @export
#'
#' @examples
#' # A simple exponential growth model with birth rates beta, and death rates gamma:
#' # I is the number of infected individuals.
#' dm <- build.demographic.process(births=c(I = 'parms$beta * I'),
#'                                 deaths = c(I = 'parms$gamma * I'),
#'                                 parameterNames=c('beta', 'gamma'),
#'                                 rcpp=FALSE,
#'                                 sde = TRUE)
#' # Do a simulation and plot the trajectory:
#' show.demographic.process(dm,
#'                          theta = list(beta = 1.5, gamma = 1),
#'                          x0  = c(I = 1),
#'                          t0 = 0,
#'                          t1 = 10)
build.demographic.process <- function( births,  nonDemeDynamics=NA,  migrations=NA, deaths=NA,  parameterNames = c(), rcpp = TRUE, sde=FALSE)
{
	if (is.vector(births) & length(births) > 1) stop('there must be exactly one expr for births or a square matrix of expressions for births')
	#~ 	if (is.vector(births)){
	#~ 		m <- 1
	#~ 	} else{
	#~ 		m <- nrow(births)
	#~ 	}
	if (is.vector(births) & (length(births)==1) ){
		bn <- names(births)
		births <- matrix( c( births, '0.', '0.', '0.'), nrow = 2, ncol = 2)
		colnames(births) = rownames(births) <- c(bn, 'V2')
	}
	if (!any(is.na(migrations))){
		if (is.vector(migrations) & (length(migrations)==1))
		{
			bn <- names(migrations)
			migrations <- matrix( c( migrations, '0.', '0.', '0.'), nrow = 2, ncol = 2)
			colnames(migrations) = rownames(migrations) <- c(bn,  'V2')
		}
	}
	if (is.vector(migrations) & !(length(migrations)==1)) stop('Migrations must be matrix or length 1 vector')
	if (is.vector(births) & !(length(births)==1) ) stop( 'Births must be matrix or length 1 vector' )
	demeNames <- rownames(births)
	m <- nrow(births)
	if ( any (is.na( nonDemeDynamics )) & length(nonDemeDynamics)==1 ){
		mm <- 0
		nonDemeNames <- NULL
	} else if ( any (is.na(nonDemeDynamics)) & length(nonDemeDynamics)>1){
		stop('NA values in nonDemeDynamics')
	} else{
		nonDemeNames <- names(nonDemeDynamics)
		mm <- length(nonDemeNames)
	}
	
	if (m==2 & length(deaths)==1 & demeNames[2]=='V2'){
		deaths <- c(deaths, V2='0.')
	}
	if (any(is.na(migrations))){
		migrations <- matrix('0.', nrow=m, ncol=m)
		rownames(migrations)=colnames(migrations)<- demeNames
	}
	if (any(is.na(deaths))){
		deaths <- rep('0.', m)
		names(deaths) <- demeNames
	}
	
	if (rcpp)
	{
		print(paste(date(), 'Compiling model...'))
		macros <- paste(collapse='\n', sapply(1:(m+mm) ,function(i){
			ii <- i -1 
			paste('#define' , c(demeNames, nonDemeNames)[i], paste(sep='', '(double)(X[', ii, '])' )) 
		}))
		
		if (length(parameterNames) > 0){
			macros <- paste(macros, sep='\n', 
			paste(collapse='\n'
				, sapply(1:length(parameterNames), function(i){
					ii <- i -1
					paste('#define' , parameterNames[i], paste(sep='', '(double)(PARMS[', ii, '])' )) 
				}))
				, '\n'
			)
		}
		
		## build F
		# also return row sum and col sum
		Fexprs <- as.vector(sapply(1:m, function(k) sapply(1:m, function(l)  
		{
			paste( sep=''
			 , 'F(', k-1, ',', l-1, ') = std::max(0., (double)('
			 , births[k,l]
			 , '))'
			)
		}
		) ))
		Fheader <- "
NumericVector X(x);
NumericVector PARMS(parms);
double T = as<double>(t);
int M = as<int>(m);
NumericMatrix F(M,M);
CharacterVector rcnames(demenames);
rownames(F) = rcnames;
colnames(F) = rcnames;"
		Fbody <- paste( sep='\n', Fheader , macros 
			, paste( paste(collapse=';\n', Fexprs)
				, ';\n NumericVector rsums(M); for(int k = 0; k <M; k++) rsums(k) = sum(F(k,_));\n'
				, 'NumericVector csums(M); for(int k = 0; k <M; k++) csums(k) = sum(F(_,k));\n'
				, 'return List::create(Named("F") = F, Named("rowsum") = rsums, Named("colsum") = csums);'
			)
		)
		
		Fcpp <<-  cxxfunction(
			signature(x="numeric"
				, t="numeric"
				, m = "integer"
				, parms="numeric"
				, demenames="character"),
			, plugin = 'Rcpp'
			, body = Fbody
		)
		
		
		
		## build G
		Gexprs <- as.vector(sapply(1:m, function(k) sapply(1:m, function(l)  
		{
			paste( sep=''
			 , 'G(', k-1, ',', l-1, ') = std::max(0., (double)('
			 , migrations[k,l]
			 , '))'
			)
		}
		) ))
		Gheader <- "
NumericVector X(x);
NumericVector PARMS(parms);
double T = as<double>(t);
int M = as<int>(m);
NumericMatrix G(M,M);
CharacterVector rcnames(demenames);
rownames(G) = rcnames;
colnames(G) = rcnames;"
		Gbody <- paste( sep='\n' , Gheader, macros
				, paste( paste(collapse=';\n', Gexprs)
				, ';\n NumericVector rsums(M); for(int k = 0; k <M; k++) rsums(k) = sum(G(k,_));\n'
				, 'NumericVector csums(M); for(int k = 0; k <M; k++) csums(k) = sum(G(_,k));\n'
				, 'return List::create(Named("G") = G, Named("rowsum") = rsums, Named("colsum") = csums);')
		)
		
		Gcpp <<-  cxxfunction(
			signature(x="numeric"
				, t="numeric"
				, m = "integer"
				, parms="numeric"
				, demenames="character"),
			, plugin = 'Rcpp'
			, body = Gbody
		)
		
		
		
		## deaths
		death_header <- '
NumericVector X(x);
NumericVector PARMS(parms);
double T = as<double>(t);
int M = as<int>(m);;
CharacterVector rcnames(demenames);
NumericVector deaths(M);
deaths.attr("names") = rcnames;
'
		death_exprs <- sapply( 1:length(deaths), function(i) {
			paste( sep='', 'deaths(', i-1, ') = std::max(0., ', deaths[i], ')' )
		})
		death_body <-paste(sep='\n', death_header, macros
		  ,  paste(collapse=';\n', death_exprs)
		  , ';\n return deaths;'
		)
		death.cpp <<-  cxxfunction(
			signature(x="numeric"
				, t="numeric"
				, m = "integer"
				, parms="numeric"
				, demenames="character"),
			, plugin = 'Rcpp'
			, body = death_body
		)
		
		
		## non deme dynamics
		if (mm > 0)
		{
			ndd_header <- '
	NumericVector X(x);
	NumericVector PARMS(parms);
	double T = as<double>(t);
	int MM = as<int>(mm);;
	CharacterVector rcnames(nondemenames);
	NumericVector ndd(MM);
	ndd.attr("names") = rcnames;
	'
			ndd_exprs <- sapply(1:length(nonDemeDynamics), function(i){
				paste(sep='', 'ndd(', i-1, ') = ', nonDemeDynamics[i] )
			})
			ndd_body <- paste(sep='\n', ndd_header, macros
			  , paste(collapse=';\n', ndd_exprs)
			  , ';\n return ndd; '
			)
			nonDemeDynamics.cpp <<-  cxxfunction(
				signature(x="numeric"
					, t="numeric"
					, mm = "integer"
					, parms="numeric"
					, nondemenames="character"),
				, plugin = 'Rcpp'
				, body = ndd_body
			)
		}
		
		##
		
		if (!sde) #ODE
		{
			.dx__solve.demographic.process <<- function(t, y, parms, ...) 
			{ #note parms is list
				.F <- Fcpp(y, t, m, unlist(parms), demeNames)
				.G <- Gcpp(y, t, m, unlist(parms), demeNames)
				.deaths <- death.cpp(y, t, m, unlist(parms), demeNames)
				dxdeme <- setNames(
				  .F$colsum + .G$colsum - .G$rowsum - .deaths
				 , demeNames
				)
				if (mm > 0)
				{
					.ndd <- nonDemeDynamics.cpp( y, t, mm, unlist(parms), nonDemeNames)
					dxnondeme <- setNames( 
					  nonDemeDynamics.cpp( y, t, mm, unlist(parms), nonDemeNames)
					  , nonDemeNames 
					)
				}else{
					dxnondeme <- NULL
				}
				
				list( c(dxdeme, dxnondeme) )
			}
			
			solve.demographic.process <- function( theta, x0, t0, t1, res = 1e3, integrationMethod='lsoda')
			{ # value : list(times, births, migrations, sizes )
				# NOTE x0 should be passed here in case there are estimated parameters related to initial conditions
				if (m == 2 & length(x0) == (1+mm) & demeNames[2]=='V2') x0 <- c(x0, V2 = 0)
				#reorder x0 if necessary
				if (length(x0)!=m + mm) stop(paste('initial conditons incorrect dimension', x0, m, mm) )
				if ( sum( !(c(demeNames, nonDemeNames) %in% names(x0)) )  > 0)  stop(paste('initial conditions vector incorrect names', names(x0), demeNames, nonDemeNames))
				y0 <- x0[c(demeNames, nonDemeNames)]
				
				#reorder theta if necessary
				#if ( length( setdiff( names(theta), parameterNames) ) > 0) stop(paste('Incorrect parameters included: ', setdiff( names(theta), parameterNames) )) #NOTE will allow extraneous parameters
				if ( length( setdiff(  parameterNames, names(theta)) ) > 0) stop(paste('Missing parameters: ', setdiff(  parameterNames, names(theta)) ))
				theta <- theta[parameterNames]
				
				times <- seq(t0, t1, length.out=res)
				ox <- ode(y=y0
				 , times
				 , func=.dx__solve.demographic.process
				 , parms = as.list(theta)
				 , method = integrationMethod)
				# note does not include first value, which is t0; 2nd value corresponds to root of tree
				Ys <- lapply( nrow(ox):1, function(i) setNames(pmax(0,ox[i, demeNames]), demeNames) ) 
				Fs <- lapply( nrow(ox):1, function(i) {
					Fcpp( ox[i,2:ncol(ox)], ox[i,1], m, unlist(theta), demeNames)$F
				}) 
				Gs <- lapply( nrow(ox):1, function(i) {
					Gcpp( ox[i,2:ncol(ox)], ox[i,1], m, unlist(theta), demeNames)$G
				})
				dths <- lapply( nrow(ox):1, function(i) {
					death.cpp(ox[i,2:ncol(ox)], times[i], m, unlist(theta), demeNames)
				})
				# include ox for debugging
				o <- list( times=rev(times), births=Fs, migrations=Gs, sizes=Ys , ox, deaths=dths )
				class(o) <- c('tfgy', 'list')
				o
			}
		} else{ # SDE
			# solve using given timestep and euler method
			# interpret F & G as rates of process
			solve.demographic.process <- function( theta, x0, t0, t1, res = 1e3, integrationMethod=NA)
			{ # value : list(times, births, migrations, sizes )
				# NOTE x0 should be passed here in case there are estimated parameters related to initial conditions
				#reorder x0 if necessary
				if (m == 2 & length(x0) == 1 & demeNames[2]=='V2') x0 <- c(x0, V2 = 0)
				if (length(x0)!=m + mm) stop('initial conditons incorrect dimension', x0, m, mm) 
				if ( sum( !(c(demeNames, nonDemeNames) %in% names(x0)) )  > 0)  stop('initial conditions vector incorrect names', names(x0), demeNames, nonDemeNames)
				y0 <- x0[c(demeNames, nonDemeNames)]
				
				#reorder theta if necessary
				if ( length( setdiff( names(theta), parameterNames) ) > 0) stop('Incorrect parameters included: ', setdiff( names(theta), parameterNames) )
				if ( length( setdiff(  parameterNames, names(theta)) ) > 0) stop('Missing parameters: ', setdiff(  parameterNames, names(theta)) )
				theta <- theta[parameterNames]
				
				Fs <- list()
				Gs <- list()
				Ys <- list()
				deaths <- list()
				times <- seq(t0, t1, length.out = res )
				Dt <- times[2] - times[1]
				x <- y0
				ox <- matrix(NA, nrow = res, ncol = 1 + m + mm )
				colnames(ox) <- c("time", demeNames, nonDemeNames)
				for (it in 1:res){
					Ys[[it]] <- setNames( pmax(0., x[demeNames] ), demeNames )
					t <- times[it]
					FF <- Fcpp( x, t, m, theta, demeNames)$F
					rF <- matrix( nrow=m, ncol = m, pmax(0, rnorm(m*m, mean = as.vector(FF)*Dt, sd = sqrt( as.vector(FF)*Dt)  ) ) )
					Fs[[it]] <- rF / Dt
					GG <- Gcpp( x, t, m, theta, demeNames)$G
					rG <- matrix( nrow=m, ncol = m, pmax(0, rnorm(m*m, mean = as.vector(GG)*Dt, sd = sqrt( as.vector(GG)*Dt)  ) ) )
					Gs[[it]] <- rG / Dt
					.deaths <- death.cpp(x, t, m, theta, demeNames)
					deaths[[it]] <- .deaths
					rdeaths <- pmax(0, rnorm(m, mean = .deaths * Dt, sd = sqrt(.deaths * Dt)   ) )
					stepx_demes <- colSums( rF) + colSums( rG ) - rowSums( rG ) - rdeaths
					if (mm > 0){
						.ndd <- nonDemeDynamics.cpp( x, t, mm, theta, nonDemeNames)
						r_ndd <- rnorm(mm, mean = .ndd*Dt, sd = sqrt(abs(.ndd*Dt)) ) 
					} else{
						.ndd <- c()
						r_ndd <- c()
					}
					stepx_nondemes <- r_ndd
					x <- setNames( pmax(0, x + c(stepx_demes, stepx_nondemes))
					  , c(demeNames, nonDemeNames) )
					ox[it, ] <- c( t, x )
				}
				
				
				# include ox for debugging
				o <- list( times=rev(times), births=rev(Fs), migrations=rev(Gs), sizes=rev(Ys) , ox, deaths=rev(deaths) )
				class(o) <- c('tfgy', 'list')
				o
			}
		}
		
		print(paste(date(), 'Model complete'))
	} else{ # inputs are R expressions (slow/avoid)
		#parse equations
		pbirths <- sapply( 1:m, function(k) 
			   sapply(1:m, function(l)
				 parse(text=births[k,l])
			))
		migrations[is.na(migrations)] <- '0'
		if (length(migrations)==1) migrations <- matrix('0', nrow=m, ncol=m)
		colnames(migrations)=rownames(migrations) <- demeNames
		pmigrations <- sapply( 1:m, function(k) 
			   sapply(1:m, function(l)
				 parse(text=migrations[k,l])
			))
		pdeaths <- sapply(1:m, function(k) parse(text=deaths[k]) )
		if (mm > 0) {
			pndd <- sapply(1:mm, function(k) parse(text=nonDemeDynamics[k]) )
		} else {
			pndd <- NA
		}
		
		.birth.matrix <- function( x, t, parms) 
		{
			with(as.list(x), 
			 t(matrix( sapply( 1:m^2, function(k){
					epb <- eval(pbirths[k])
					if (any(is.na(epb))) warning('NA/NaN values in births')
					epb[is.na(epb)] <- 0
					epb
				})
			  , nrow=m, ncol=m
			))) -> FF
			colnames(FF) = rownames(FF) <- demeNames
			FF
		}
		.migration.matrix <- function( x, t, parms) 
		{
			with(as.list(x), 
			 t(matrix( sapply( 1:m^2, function(k){
					epb <- eval(pmigrations[k])
					if (any(is.na(epb))) warning('NA/NaN values in migrations')
					epb[is.na(epb)] <- 0
					epb
				  })
				 , nrow=m, ncol=m
			))) -> GG
			colnames(GG) = rownames(GG) <- demeNames
			GG
		}
		tBirths <- function(x, t, parms)
		{
			colSums( .birth.matrix(x,t, parms) )
		}
		tMigrationsIn <- function(x,t, parms)
		{
			colSums( .migration.matrix(x, t, parms) )
		}
		tMigrationsOut <- function(x,t, parms)
		{
			rowSums( .migration.matrix(x, t, parms))
		}
		tDeaths <- function(x, t, parms) 
		{
			with(as.list(x, t), 
			  sapply(1:m, function(k) {
				epb <- eval(pdeaths[k]) 
				if (any(is.na(epb))) warning('NA/NaN values deaths')
				epb[is.na(epb)] <- 0
				epb
			  })
			) 
		}
		dNonDeme <- function(x, t, parms) 
		{
			with(as.list(x, t), 
			  sapply(1:mm, function(k) {
				epb <- eval(pndd[k]) 
				if (any(is.na(epb))) warning('NA/NaN values in dNonDeme')
				epb[is.na(epb)] <- 0
				epb
			  })  
			)
		}
		if (!sde){
			solve.demographic.process <- function( theta, x0, t0, t1, res = 1e3, integrationMethod='lsoda')
			{ # value : list(times, births, migrations, sizes )
				#reorder x0 if necessary
				## NOTE x0 should be passed here in case there are estimated parameters related to initial conditions
				if (m == 2 & length(x0) == (1+mm) & demeNames[2]=='V2') x0 <- c(x0, V2 = 0)
				#reorder x0 if necessary
				if (length(x0)!=m + mm) stop(paste('initial conditons incorrect dimension', paste(x0, m, mm) ) )
				if ( sum( !(c(demeNames, nonDemeNames) %in% names(x0)) )  > 0)  stop(paste('initial conditions vector incorrect names', names(x0), demeNames, nonDemeNames))
				y0 <- x0[c(demeNames, nonDemeNames)]
								
				#reorder theta if necessary
				#if ( length( setdiff( names(theta), parameterNames) ) > 0) stop('Incorrect parameters included: ', setdiff( names(theta), parameterNames) )
				if ( length( setdiff(  parameterNames, names(theta)) ) > 0) stop('Missing parameters: ', setdiff(  parameterNames, names(theta)) )
				theta <- theta[parameterNames]
				parms <- as.list( theta  )
				
				dx <- function(t, y, parms, ...) 
				{
					dxdeme <- setNames( tBirths(y, t, parms) + tMigrationsIn(y, t, parms) - tMigrationsOut(y,t, parms) - tDeaths(y,t, parms), demeNames)
					if (mm > 0)
					{
						dxnondeme <- setNames( dNonDeme(y, t, parms), nonDemeNames )
					}else{
						dxnondeme <- NULL
					}
					
					list( c(dxdeme, dxnondeme) )
				}
				
				times <- seq(t0, t1, length.out=res)
				ox <- ode(y=y0, times, func=dx, parms=parms, method=integrationMethod)
				Ys <- lapply( nrow(ox):1, function(i) ox[i, demeNames] )
				Fs <- lapply( nrow(ox):1, function(i) .birth.matrix(ox[i,], ox[i,1], parms)  ) 
				Gs <- lapply( nrow(ox):1, function(i) .migration.matrix(ox[i,], ox[i,1], parms)  ) 
				deaths <- lapply( nrow(ox):1, function(i) tDeaths(ox[i,], ox[i,1], parms)  ) 
				# include ox for debugging
				o <- list( times=rev(times), births=Fs, migrations=Gs, sizes=Ys , ox, deaths=deaths )
				class(o) <- c('tfgy', 'list')
				o
			}
		} else{
			solve.demographic.process <- function( theta, x0, t0, t1, res = 1e3, integrationMethod=NA)
			{ # value : list(times, births, migrations, sizes )
				# NOTE x0 should be passed here in case there are estimated parameters related to initial conditions
				#reorder x0 if necessary
				if (m == 2 & length(x0) == (1+mm) & demeNames[2]=='V2') x0 <- c(x0, V2 = 0)
				if (length(x0)!=m + mm) stop('initial conditons incorrect dimension', paste(x0, m, mm)  )
				if ( sum( !(c(demeNames, nonDemeNames) %in% names(x0)) )  > 0)  stop('initial conditions vector incorrect names', names(x0), demeNames, nonDemeNames)
				y0 <- x0[c(demeNames, nonDemeNames)]
				
				#reorder theta if necessary
				if ( length( setdiff( names(theta), parameterNames) ) > 0) stop('Incorrect parameters included: ', setdiff( names(theta), parameterNames) )
				if ( length( setdiff(  parameterNames, names(theta)) ) > 0) stop('Missing parameters: ', setdiff(  parameterNames, names(theta)) )
				theta <- theta[parameterNames]
				parms <- as.list( theta  )
				
				Fs <- list()
				Gs <- list()
				Ys <- list()
				deaths<- list()
				times <- seq(t0, t1, length.out = res )
				Dt <- times[2] - times[1]
				x <- y0
				ox <- matrix(NA, nrow = res, ncol = 1 + m + mm )
				colnames(ox) <- c("time", demeNames, nonDemeNames)
				for (it in 1:res){
					Ys[[it]] <- setNames( pmax(0., x[demeNames] ), demeNames )
					t <- times[it]
					FF <- .birth.matrix(x, t, parms )
					rF <- matrix( nrow=m, ncol = m, pmax(0, rnorm(m*m, mean = as.vector(FF)*Dt, sd = sqrt( as.vector(FF)*Dt)  ) ) )
					Fs[[it]] <- rF / Dt
					GG <- .migration.matrix(x, t, parms)
					rG <- matrix( nrow=m, ncol = m, pmax(0, rnorm(m*m, mean = as.vector(GG)*Dt, sd = sqrt( as.vector(GG)*Dt)  ) ) )
					Gs[[it]] <- rG / Dt
					.deaths <- tDeaths( x, t, parms )
					rdeaths <- pmax(0, rnorm(m, mean = .deaths * Dt, sd = sqrt(.deaths * Dt)   ) )
					deaths[[it]] <- rdeaths
					stepx_demes <- colSums( rF) + colSums( rG ) - rowSums( rG ) - rdeaths
					stepx_demes[is.na(stepx_demes)] <- 0
					if (mm > 0){
						.ndd <- dNonDeme( x, t, parms)
						r_ndd <- rnorm(mm, mean = .ndd*Dt, sd = sqrt(abs(.ndd*Dt)) ) 
						stepx_nondemes <- r_ndd
						stepx_nondemes[is.na(stepx_nondemes)] <- 0
					} else{
						.ndd <- c()
						r_ndd <- c()
						stepx_nondemes <- r_ndd
					}
					
					x <- setNames( pmax(0, x + c(stepx_demes, stepx_nondemes) )
					  , c(demeNames, nonDemeNames) )
					ox[it, ] <- c( t, x )
				}
				
				
				# include ox for debugging
				o <- list( times=rev(times), births=rev(Fs), migrations=rev(Gs), sizes=rev(Ys) , ox, deaths=rev(deaths) )
				class(o) <- c('tfgy', 'list')
				o
			}
		}
	}

	# return value is func
	class(solve.demographic.process) <- c('demographic.process', 'function')
	solve.demographic.process
}


#' DatedTree function 
#' 
#' Create a DatedTree class object; it includes heights for each node and other 
#'   helper variables
#'
#' @param phylo A phylogenetic tree of class ape::phylo. Note that this tree
#'    should be a timetree. 
#' @inheritParams sim.co.tree
#' @param sampleStatesAnnotations Vector of possible discrete character states 
#'   for taxa. If inferring taxon state from label, this provides the possible 
#'   matches for taxon annotations.
#' @param tol Numeric to set the amount of error allowed in the edge length of 
#'    the phylogenetic tree.
#' @param minEdgeLength Numeric and it is the minimum length in the returned
#'    phylogenetic tree.
#' 
#' @details tol and minEdgeLength are necessary because it will adjust the edge
#'    length of the tree to ensure they are greater than 0 and taking into 
#'    consideration the sample times.
#'
#' @return A dated phylogenetic tree with additional metadata to use with 
#'    phydynR functions.
#' @author Erik Volz
#' @export
#'
#' @examples 
#' # read the tree
#' #usually this is a tree that the branch lengths are in unit of calendar time
#' tree <- read.tree(system.file('extdata/hivSimulation.nwk', package='phydynR'))
#'
#' # the sample times are the same, because it is a homochronous sample at 50 years
#' sampleTimes <- rep(50, length(tree$tip.label))
#' names(sampleTimes) <- tree$tip.label
#'
#' # create a tree with dated tips and internal nodes,
#' # will infer the sample states from tip labels
#' bdt <- DatedTree(phylo = tree, 
#'                 sampleTimes = sampleTimes,
#'                 minEdgeLength = 0.01)
#' bdt
#' class(bdt)
DatedTree <- function( phylo, sampleTimes, sampleStates=NULL, sampleStatesAnnotations=NULL, tol = 1e-6
 , minEdgeLength = 0
 ){
	if (is.null(names(sampleTimes))) stop('sampleTimes vector must have names of tip labels')
	if (is.null(sampleStates) & !is.null(sampleStatesAnnotations) ) sampleStates <- .infer.sample.states.from.annotation(phylo, sampleStatesAnnotations)
	if (is.null(sampleStates) & is.null(sampleStatesAnnotations)) { sampleStates <- t(t( rep(1, length(phylo$tip.label)))) ; rownames( sampleStates) <- phylo$tip.label }
	if (is.null( rownames( sampleStates)))  rownames(sampleStates ) <- phylo$tip.label
	if (any(is.na(rownames(sampleStates)))) stop('sampleStates matrix must have row names of tip labels')
	if (!any(is.na(sampleStates))) if (!is.matrix( sampleStates)) stop('sampleStates must be a matrix (not a data.frame)')
	
	# resolve any multifurcations 
	phylo <- tryCatch( { multi2di( phylo ) }, error = function(e) phylo )
	
	phylo$sampleTimes <- sampleTimes[phylo$tip.label]
	phylo$sampleStates <- sampleStates[phylo$tip.label, ] 
	if (is.vector(phylo$sampleStates)) phylo$sampleStates <- t(t( phylo$sampleStates))
	
	phylo$edge.length <- pmax( minEdgeLength, phylo$edge.length )
	
	phylo$n = n <- length(sampleTimes)
	Nnode <- phylo$Nnode
	# compute heights, ensure consistency of sample times and branch lengths
	phylo$maxSampleTime   <- max(phylo$sampleTimes)
	heights <- rep(NA, (phylo$Nnode + length(phylo$tip.label)) )
	heights[1:length(phylo$sampleTimes)] <- phylo$maxSampleTime - phylo$sampleTimes
	curgen <- 1:length(phylo$sampleTimes)
	edgeLengthChange <- TRUE 
	while (edgeLengthChange)
	{
		edgeLengthChange <- FALSE
		while( length(curgen) > 0) { 
			nextgen <- c()
			icurgenedges <- which(  phylo$edge[,2] %in% curgen  )
			for (i in icurgenedges){
				u<- phylo$edge[i,1]
				v<- phylo$edge[i,2]
				if (!is.na(heights[u])){ # ensure branch lengths consistent
					if ( heights[u] > 0 & abs(heights[u] - (phylo$edge.length[i] + heights[v])) > (minEdgeLength+tol) )
					{ #
					  stop( 'Tree is poorly formed. Branch lengths incompatible with sample times.')
					} else if ( 0!=(heights[u] - (phylo$edge.length[i] + heights[v]) ) ){
						edgeLengthChange <- TRUE 
					}
					phylo$edge.length[i] <- max(0, max(minEdgeLength, heights[u] - heights[v] ) )
					heights[u] <- heights[v]  + phylo$edge.length[i]
				} else{
					heights[u] <- phylo$edge.length[i] + heights[v]
				}
				
				nextgen <- c(nextgen, u)
			}
			curgen <- unique(nextgen)
		}
	}
	if (any(is.na(heights))) stop('Could not compute internal node heights. Check sample times and names of sample time vector.')
	phylo$heights <- heights
	phylo$maxHeight <- max(phylo$heights)
	#phylo$heights <- signif( phylo$heights, digits = floor( 1 / phylo$maxHeight /10 )  +  6 ) #
	phylo$parentheights = phylo$parentheight <- sapply( 1:(n+Nnode), function(u){
		i <- which( phylo$edge[,2]== u)
		if (length(i)!=1) return( NA )
		phylo$heights[ phylo$edge[i,1] ]
	})
	
	phylo$root <- which.max( phylo$heights)
	
	ix <- sort( phylo$sampleTimes, decreasing = TRUE, index.return=TRUE)$ix
	phylo$sortedSampleHeights <- phylo$maxSampleTime - phylo$sampleTimes[ix]
	phylo$sortedSampleStates <- phylo$sampleStates[ix,] 
	# parents and daughters 
	phylo$parent = phylo$parents <- sapply(1:(phylo$n+phylo$Nnode), function(u) {
		i <-  which(phylo$edge[,2]==u)
		if (length(i) == 0) return (NA)
		a <- phylo$edge[i ,1]
		if (length(a) > 1) stop("Tree poorly formed; node has more than one parent.")
		a
	}) 
	phylo$daughters <- t(sapply( 1:(phylo$n+phylo$Nnode), function(a){
		uv <- phylo$edge[which(phylo$edge[,1]== a),2]
		if (length(uv)==0) uv <- c(NA, NA)
		#if (length( uv)!=2) print( uv )
		uv
	}))
	
	phylo$daughter2edge <- sapply( 1:(phylo$n+phylo$Nnode), function(u) which( phylo$edge[,2]==u))
	phylo$parent2edges <- sapply( 1:(phylo$n+phylo$Nnode), function(u) which( phylo$edge[,1]==u))
	
	class(phylo) <- c("DatedTree", "phylo")
	phylo
}

#' Simulate a coalescent genealogy based on a demographic process.
#' 
#' A tree is simulated based on a demographic process and user-supplied parameters 
#'   and initial conditions. The times and types of samples must also be supplied.
#'
#' @param theta A named vector of parameter values required by the model
#' @param demographic.process.model An object of class demographic.process. 
#'  See \code{\link[phydynR]{build.demographic.process}}
#' @inheritParams colik
#' @param t0 The time of origin of the process
#' @param sampleTimes A numeric vector providing the times of samples. If named, 
#'   taxon labels will be based on names of the corresponding sample times
#' @param sampleStates For models with more than one deme, a matrix must be 
#'  supplied describing the probability of sampling from each deme. Each row 
#'  corresponds to a sample in the same order as sampleTimes. Each column 
#'  corresponds to probability of sampling each deme. Column names should be 
#'  defined which correspond to deme names in the model. Rows should sum to one.
#' @param finiteSizeCorrections to do
#' @param substitutionRates to do
#' @param sequenceLength to do
#'
#' @return An object of class DatedTree, which subclasses ape::phylo. 
#'  The $heights attribute provides the time of each node in the tree. 
#'  The $maxHeight attribute provides the time of the root.
#' @author Erik Volz
#' @export
#'
#' @examples
#' # A simple exponential growth model with birth rates beta, and death rates gamma: 
#' # I is the number of infected individuals
#'dm <- build.demographic.process(births = c(I = 'parms$beta * I'),
#'                                deaths = c(I = 'parms$gamma * I'),
#'                                parameterNames = c('beta', 'gamma'),
#'                                rcpp = FALSE,
#'                                sde = FALSE)
#' # simulate a tree based on the model: 
#'tre <- sim.co.tree(list(beta = 1.5, gamma = 1),
#'                        dm,
#'                        x0  = c(I = 1),
#'                        t0 = 0,
#'                        sampleTimes = seq(10, 15, length.out = 50),
#'                        res = 1000)
#' # plot the tree 
#'plot(tre)
sim.co.tree <- function(theta, demographic.process.model, x0, t0, sampleTimes
  , sampleStates=NULL
  , res = 1e3
  , integrationMethod='lsoda'
  , finiteSizeCorrections = TRUE
  , substitutionRates = NULL
  , sequenceLength = 0
)
{
	maxSampleTime <- max(sampleTimes)
	sim.co.tree.fgy ( 
	  demographic.process.model( theta, x0, t0, maxSampleTime, res = res, integrationMethod=integrationMethod) 
	  , sampleTimes, sampleStates
	  , substitutionRates = substitutionRates
	  , sequenceLength = sequenceLength
	)
}

#' @export
sim.co.tree.fgy <- function(tfgy,  sampleTimes, sampleStates, step_size_multiplier= NA, finiteSizeCorrections=TRUE
  , substitutionRates = NULL
  , sequenceLength = 0
)
{# res = 1e3, 
	# note sampleStates must be in same order as sampleTimes
	# note may return multiple trees with polytomous root
	if (is.na(step_size_multiplier)) step_size_multiplier <- .10
	times <- tfgy[[1]]
	Fs <- tfgy[[2]]
	Gs <- tfgy[[3]]
	Ys <- tfgy[[4]]
	
	m <- nrow(Fs[[1]])
	if (m < 2)  stop('Error: currently only models with at least two demes are supported')
	
	# check inputs if simulated evo distances
	if (is.null(substitutionRates)){
		substitutionRates <- rep(1, m)
		sequenceLength <- 0
	} else{
		if (length(substitutionRates) == 1){
			substitutionRates <- rep( substitutionRates, m )
		} else if (length(substitutionRates)!=m){
			stop('Number of substitutionRates should be one or equal to the number of demes')
		}
	}
	
	DEMES <- names( tfgy[[4]][[1]] ) 
	if (length(DEMES)!=m){
		DEMES <- as.character( 1:m )
	}
	
	if (step_size_multiplier < 0 | step_size_multiplier > 1) {
		step_size_multiplier <- 1
		warning('step_size_multiplier should be in (0, 1). This parameter has been set to 1.')
	}
	delta_times <- abs(times[2]-times[1]) 
	
	n <- length(sampleTimes)
	if (is.null( sampleStates ) ) {
		sampleStates <- matrix( 0, nrow = length(sampleTimes), ncol = m )
		sampleStates[,1] <- 1
	}
	
	#demographic model should be in order of decreasing time:
	fgyi <- 1:length(Fs)
	if (times[2] > times[1])
		fgyi <- length(Fs):1
	
	if (length(names(sampleTimes))==0){
		names(sampleTimes) <- paste(sep='_', 't', 1:length(sampleTimes))
	}
	names(sampleTimes) <- paste(sep='', 'sim', names(sampleTimes))
	maxSampleTime <- max(sampleTimes)
	ix <- sort( sampleTimes, decreasing = TRUE, index.return=TRUE)$ix
	sortedSampleHeights <- maxSampleTime - sampleTimes[ix]
	sortedSampleStates <- sampleStates[ix,] 
	sortedSampleTimes <- sampleTimes[ix]
	tlabs <- names(sampleTimes)[ix] 
	if (length(tlabs)==0){
		tlabs <- 1:n
		names(sortedSampleTimes) = names(sortedSampleHeights)  <- tlabs
	}
	
	o <- simulateTreeCpp3x0( times[fgyi],  Fs[fgyi],  Gs[fgyi],  Ys[fgyi]
	 , sortedSampleHeights #2
	 , t(sortedSampleStates)
	 , maxSampleTime
	 , m
	 , finiteSizeCorrections #TRUE #fsc
	 , substitutionRates
	 , sequenceLength
	)
	# clean up edge, edge.length and Nnode
	iedge <- which(!is.na(o$edge[,1]))
	o$edge <- o$edge[iedge, ]
	o$edge.length <- o$edge.length[!is.na(o$edge.length)]
	o$Nnode <- length(unique( as.vector(o$edge))) - o$n
	
	o$tip.label <- tlabs
	o$edge <- o$edge + 1
	class(o) <- 'phylo'
	obdt <- NULL
	# NOTE read.tree(text=0): broken with change from ape 5.6 to 5.7
	obdt <- tryCatch({
		rownames(sortedSampleStates) <- names(sortedSampleTimes )
		(  DatedTree( ape::read.tree(text=.phylostructure2newick(o)) , sortedSampleTimes, sortedSampleStates, tol = Inf) )
	}, error = function(e) e )
	
	if (sequenceLength > 0){
		#also return evo distance tree
		oo <- o
		oo$edge.length <- as.vector(oo$edge.length.sps)[iedge]
		oo$Nnode <- length(unique( as.vector(oo$edge))) - oo$n
		oo$tip.label <- tlabs
		oo$edge <- o$edge 
		class(oo) <- 'phylo'
		otree <- NULL
		otree <- tryCatch({
			ape::read.tree(text=ape::write.tree(oo)) 
		}, error = function(e) e )
		return( list( obdt, otree ))
	} else{
		return (obdt)
	}
}

#' Show demographic process
#' 
#' Shows a graphical representation of the demographic process given values 
#' for each parameter of interest, and initial population sizes. The user should
#' also provide the initial size for the population of interest in the their model.
#' 
#' @param demo.model An object of class \code{demographic.process}, which is a function
#'   that can be used to simulate the model. 
#' @inheritParams colik
#' @param t0 Initial time for the simulation (to show the demographic process)
#' @param t1 Final time for tge simulation (to show the demographic process)
#' @param legend_position String for position of legend in final plot. Default
#'   is set to "bottomright"
#' @param ... Additional arguments that can be passed to the function, 
#'   such as graphical parameters.
#' @seealso \code{\link[phydynR]{build.demographic.process}}
#' 
#' @export
#'
#' @examples
#' # A simple exponential growth model with birth rates beta and death rates gamma:
#' # I is the number of infected individuals.
#' dm <- build.demographic.process(births=c(I = 'parms$beta * I'),
#'                                 deaths = c(I = 'parms$gamma * I'),
#'                                 parameterNames=c('beta', 'gamma'),
#'                                 rcpp=FALSE,
#'                                 sde = TRUE)
#'# Do a simulation and plot the trajectory:
#' show.demographic.process(dm,
#'                          theta = list(beta = 1.5, gamma = 1),
#'                          x0  = c(I = 1),
#'                          t0 = 0,
#'                          t1 = 10)
show.demographic.process <- function(demo.model, theta, x0, t0, t1, res = 1e3, integrationMethod='lsoda', legend_position = "bottomright", ...)
{
	tfgy <- demo.model(theta, x0, t0, t1, res = 1e3, integrationMethod=integrationMethod)
	o <- tfgy[[5]] 
	t <- o[,1]
	if ( ((ncol(o)-1)==2) & tail(colnames(o),1)=='V2'){
		# test if this is a single deme model 
		plot( t, o[, 2], type = 'l', xlab = 'Time' , ylab = colnames(o)[2], ...)
	} else{
		matplot( t, o[, 2:ncol(o)], type = 'l' , xlab = 'Time', ylab = '', ...)
	  legend(legend_position, inset = 0.05, legend = colnames(o)[2:ncol(o)], 
	         pch = 1, col = c(1:ncol(o)), horiz = TRUE)
	}
}

# Write a newick string for an ape::phylo structure regardless of internal node order
# 
# Ideally this would be the behavior of ape::write.tree, however that function fails for 
# internal node orderings that do not follow an undocumented standard. 
# 
# examples 
# print( system.time( ( tr0 <- rcoal( 10000 ) )))
# st0 <- Sys.time() 
# tr <- .phylostructure2newick ( tr0 )
# st1 <- Sys.time() 
# tr1 <- read.tree(text = tr )
# print( st1 - st0 )
# ape::ltt.plot( tr0 )
# ape::ltt.lines( tr1, col = 'red', lty = 2 )
.phylostructure2newick <- function(o){
	n <- length( o$tip.label )
	nnode <- o$Nnode 
	nwks <- rep('', n+nnode )
	nwks[ 1:n ] <- sapply( 1:n, function(u) glue::glue( '{o$tip.label[u]}:{o$edge.length[o$edge[,2]==u]}' ))
	queue <- 1:n 

	# compute max distance to tip 
	mdt <- rep(-Inf, n + nnode ) 
	mdt[1:n] <- 0 
	edge0 <- o$edge[ order( o$edge[,2] ), ]
	queue <- 1:n #edge0[1:n,1] 
	anc <- rep(NA, n+nnode )
	anc[ edge0[,2] ] <- edge0[,1]
	repeat{
		for ( u in queue ){
			a <- anc[u] 
			if (!is.na( a )){
				mdt[a] <- max( mdt[a], mdt[u]+1 ) 
			}
		}
		queue <- na.omit( anc[ queue] )
		if ( length( queue ) == 0 )
			break 
	}

	ord <- order( mdt ) |> setdiff( 1:n )
	for (a in ord){
		dgtrs <- o$edge[ o$edge[,1]==a, 2]
		nwks[a] <- paste(collapse=',', nwks[dgtrs])
		edle <- ifelse(a %in% o$edge[,2]
			, o$edge.length[o$edge[,2]==a]
			, 0
			)
		nwks[a] <- glue::glue( '({nwks[a]}):{edle}' )
		NWK <- nwks[a] 
	}
	nwk <- paste(NWK, ';', sep = '' )
	nwk
}
emvolz-phylodynamics/phydynR documentation built on June 12, 2025, 1:02 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
emvolz-phylodynamics/phydynR
Model-based coalescent simulation and likelihood for phylodynamic inference

R/treeSimulatorCpp2.R
In emvolz-phylodynamics/phydynR: Model-based coalescent simulation and likelihood for phylodynamic inference

Defines functions .phylostructure2newick show.demographic.process sim.co.tree.fgy sim.co.tree DatedTree build.demographic.process

Documented in build.demographic.process DatedTree show.demographic.process sim.co.tree

R Package Documentation

Browse R Packages

We want your feedback!

emvolz-phylodynamics/phydynR Model-based coalescent simulation and likelihood for phylodynamic inference

R/treeSimulatorCpp2.R In emvolz-phylodynamics/phydynR: Model-based coalescent simulation and likelihood for phylodynamic inference

Defines functions .phylostructure2newick show.demographic.process sim.co.tree.fgy sim.co.tree DatedTree build.demographic.process

Documented in build.demographic.process DatedTree show.demographic.process sim.co.tree

R Package Documentation

Browse R Packages

We want your feedback!

emvolz-phylodynamics/phydynR
Model-based coalescent simulation and likelihood for phylodynamic inference

R/treeSimulatorCpp2.R
In emvolz-phylodynamics/phydynR: Model-based coalescent simulation and likelihood for phylodynamic inference