#2/13/23 Parameter formatting:
# - Generally expects list arrays with each column corresponding to a state (mu, Xsig2), tip (Ysig2, nobs), or trait (X0)
# - Also accepts matrices in the case of X0/nobs
# - Also accepts lists of lists, where each sublist represents a column
# - Unnamed elements readily recycled to fill missing portions of inputs, but if no unnamed elements are found, will be
# filled in with defaults (identity for Xsig2, 0s for mu/Ysig2/nobs/X0)
# - Entries of nobs corresponding to internal nodes are never derived from recycling and always default to 0 if unspecified
#' @rdname make.contsimmap
#'
#' @order 2
#'
#' @export
sim.conthistory<-function(tree,ntraits=1,traits=paste0('trait_',seq_len(ntraits)),nobs=NULL,
nsims=100,res=100,
X0=NULL,Xsig2=NULL,Ysig2=NULL,mu=NULL,
verbose=FALSE){
#takes care of all the necessary formatting work...
list2env(.prep.contsimmap(tree,nsims,res,X0,Xsig2,Ysig2,mu,conditional=FALSE,ntraits,traits,nobs),envir=environment())
#at some point want to generalize to have multiple nobs per call to simulate differently-structured trait.data...
#a little confusing at this point since the trait.data lookup element refers to X0 here, but trait.data for make.contsimmap
#but that can be resolved manually for now
#^2/14/23: need to look up if the above still holds
#Can now handle multiple nobs, but things are still a bit confusing to me with how lookups are handled
#Should eventually become more concrete/organized as code matures
tmp<-.uncond.traversals(prune.seq,anc,tree[[1]][['edge']],Ntip(tree[[1]]),
maps,
X0,nobs,Xsig2,Ysig2[,Yperm,drop=FALSE],mu,lookup,
nts,seed,
verbose=verbose)
#format output
x<-tmp[[1]]
trait.data<-tmp[[2]]
attr(x,'ts')<-ts
attr(x,'tree')<-tree
attr(x,'maps')<-maps
attr(x,'treeID')<-treeID
traits<-colnames(Xsig2[[1]])
attr(x,'traits')<-setNames(rep(1,length(traits)),traits)
#need to first reconfigure lookup to refer to include newly simulated trait.data...
counter<-1
for(i in seq_along(lookup)){
tmp.n<-length(lookup[[i]][['matches']])
lookup[[i]][['table']]<-lookup[[i]][['table']][lookup[[i]][['matches']],,drop=FALSE]
lookup[[i]][['matches']]<-seq_len(tmp.n)
lookup[[i]][['table']][,1]<-counter:(counter+tmp.n-1)
counter<-counter+tmp.n
}
attr(x,'params')<-matrix(list(trait.data,Xsig2,Ysig2,mu,lookup,list(fxn="sim.conthistory")),
6,1,
dimnames=list(c('trait.data','Xsig2','Ysig2','mu','lookup','call_info'),NULL))
.uncompress(x)
}
#may want to make some function for auto-estimation of Xsig2/Ysig2
#' Make continuous stochastic character maps
#'
#' These functions create continuous stochastic character maps (class
#' "\code{contsimmap}") by simulating and mapping evolving continuous
#' characters on a given phylogeny. These maps may be conditioned on
#' observed trait data (\code{make.contsimmap()}) or used to simulate
#' trait data (\code{sim.conthistory()}).
#'
#'
#'
#' @param tree A phylogeny or list of phylogenies with or without mapped
#' discrete characters (classes "\code{phylo}", "\code{multiPhylo}",
#' "\code{simmap}", or "\code{multiSimmap}"). Lists of phylogenies with
#' differing discrete character histories are allowed, but lists with
#' \emph{differing topologies are currently not supported}! I plan to
#' implement "\code{multiContsimmap}"-type objects for this purpose in the
#' future.
#'
#'
#' @param trait.data A numeric matrix/vector or data frame specifying
#' observed trait measurements on which to condition simulations. Generally,
#' rows should correspond to different observations while columns correspond
#' to different traits (vectors can only supply data for a single trait). Any
#' tip/node in the phylogeny may be associated with 0 or more observations; use
#' \code{NA} to specify missing trait measurements in the case of partial
#' observations (i.e., only a subset of traits were measured in a given
#' observation).
#'
#' To assign observations to tips/nodes, the data \emph{must} be
#' labelled in some way according to
#' \code{tree$tip.label}/\code{tree$node.label} (node labels default to their
#' numeric index if not provided in \code{tree$node.label}). These labels must
#' be provided as names for vectors, rownames for matrices, and
#' a column of factor/character data for data frames. In the case of data
#' frames, if multiple columns with factor/character data are found, the column
#' with the most matches to \code{tree$tip.label}/\code{tree$node.label} is
#' automatically chosen (any rownames are converted to a column before this
#' step). Trait names are simply taken from the column names of matrices/data
#' frames, and default to "\code{trait_<i>}" for the \code{<i>}th column of
#' trait data if not provided.
#'
#' To specify different trait data for each
#' phylogeny/simulation, format \code{trait.data} instead as a list of
#' vectors/matrices/data frames (see Recycling Behavior section for further
#' explanation of parameter recycling across phylogenies/simulations).
#'
#' Note
#' that, in the case of \code{sim.conthistory()}, trait data is \emph{simulated}
#' rather than provided as an \emph{input}. In this case, the number and names
#' of traits are controlled by the arguments \code{ntraits} and \code{traits},
#' respectively. The number of observations per tip/node may be specified via
#' the argument \code{nobs}, while the \code{X0} argument determines the
#' "starting" trait values at the root of the phylogeny.
#'
#'
#' @param ntraits The number traits to simulate.
#'
#'
#' @param traits A character vector specifying the names of each trait. The
#' \code{<i>}th unnamed trait defaults to "\code{trait_<i>}".
#'
#'
#' @param nobs A numeric vector specifying the number of observations to
#' simulate per tip/node.
#'
#' Entries are assigned to tips/nodes if named according
#' to \code{tree$tip.label}/\code{tree$node.label} (node labels default to
#' their numeric index if not provided in \code{tree$node.label}).
#' Unnamed/unassigned entries are recycled to "fill in" values for any tips
#' lacking input (\emph{but not internal nodes}) in order of increasing node
#' index. If no unnamed/unassigned entries are available, tips and internal
#' nodes default to 1 and 0 observations, respectively.
#'
#' To specify differing
#' numbers of observations for each phylogeny/simulation, format \code{nobs}
#' instead as a list of vectors or matrix with each column/column name
#' corresponding to different tips/nodes (see Recycling Behavior section for further
#' explanation of parameter recycling across phylogenies/simulations).
#'
#'
#' @param X0 A numeric vector specifying the starting trait values at the root
#' of the phylogeny.
#'
#' Entries are assigned to traits if named according to
#' \code{traits}. Unnamed/unassigned entries are recycled to "fill in" values
#' for any traits lacking input in the same order as given in \code{traits}.
#' If no unnamed/unassigned entries are available, defaults to 0.
#'
#' To specify
#' differing starting trait values for each phylogeny/simulation, format
#' \code{X0} instead as a list of vectors or matrix with each column/column
#' name corresponding to different traits (see Recycling Behavior section for
#' more further explanation of parameter recycling across
#' phylogenies/simulations).
#'
#'
#' @param nsims The number of simulations to perform. Each simulation will
#' correspond to a separate phylogeny in \code{tree}, which is recycled as
#' needed. For example, providing a "\code{multiSimmap}" object of length 3
#' for \code{tree} and specifying \code{nsims=7} will result in 7 simulations
#' on phylogenies 1, 2, 3, 1, 2, 3, and 1, in that order.
#'
#'
#' @param res Controls the approximate number of timepoints at which to sample
#' trait values across the entire height of the phylogeny (i.e., from root to
#' last-surviving tip). Higher values result in more densely-sampled character
#' histories but take longer to simulate and use more computer memory.
#'
#'
#' @param Xsig2 A list of numeric matrices/vectors specifying the evolutionary
#' rate matrices for each discrete character state mapped onto the phylogeny.
#' Evolutionary rate matrices are symmetric matrices with diagonal and
#' off-diagonal entries corresponding to evolutionary rates and covariances,
#' respectively. Vectors are assumed to be diagonal matrices (i.e., no
#' evolutionary covariance among traits).
#'
#' Matrix/vector entries are assigned
#' to pairs of traits based on associated row/column names (plain names in the
#' case of vectors), which are matched to \code{trait}. When possible,
#' unspecified matrix entries (\code{NA} entries) and row/column names are
#' automatically set to make inputted matrices symmetric. Any remaining
#' unnamed/unassigned rows/columns are recycled as a block-diagonal matrix to
#' "fill in" values for traits completely lacking input in the same order as
#' given in \code{traits}. All remaining unspecified rates and covariances
#' default to 1 and 0, respectively. Specifying an invalid variance-covariance
#' matrices (either due to asymmetry or not being positive semidefinite)
#' results in an error.
#'
#' List elements are assigned to discrete character states
#' if named according to the state names given in \code{tree}. Any
#' unnamed/unassigned list elements are recycled to "fill in" for states
#' lacking input in alphanumeric order. If no unnamed/unassigned list elements
#' are available, defaults to identity matrix (i.e., all rates of 1 with no
#' covariance).
#'
#' To specify differing evolutionary rate matrices for each
#' phylogeny/simulation, format \code{Xsig2} instead as a list of lists or
#' matrix-shaped list with each column/column name corresponding to different
#' discrete character states (see Recycling Behavior section for further
#' explanation of parameter recycling across phylogenies/simulations).
#'
#'
#' @param Ysig2 A list of numeric matrices/vectors specifying the intraspecifc
#' and/or measurement error for each tip/node in the phylogeny. These are
#' symmetric matrices with diagonal and off-diagonal entries corresponding to
#' the variances and covariances, respectively, of trait measurements for a
#' particular tip/node. Vectors are assumed to be diagonal matrices (i.e., no
#' covariance among trait measurements within a given node).
#'
#' Matrix/vector
#' entries are assigned to pairs of traits based on associated row/column names
#' (plain names in the case of vectors), which are matched to \code{trait}.
#' When possible, unspecified matrix entries (\code{NA} entries) and row/column
#' names are automatically set to make inputted matrices symmetric. Any
#' remaining unnamed/unassigned rows/columns are recycled as a block-diagonal
#' matrix to "fill in" values for traits completely lacking input in the same
#' order as given in \code{traits}. All remaining unspecified matrix entries
#' default to 0. Specifying an invalid variance-covariance matrices (either
#' due to asymmetry or not being positive semidefinite) results in an error.
#'
#' List elements are assigned to tips/nodes if named according to
#' \code{tree$tip.label}/\code{tree$node.label} (node labels default to their
#' numeric index if not provided in \code{tree$node.label}). Any
#' unnamed/unassigned list elements are recycled to "fill in" for
#' nodes/tips lacking input in order of increasing node index. If no
#' unnamed/unassigned list elements are available, defaults to matrix of 0s.
#'
#' To
#' specify differing intraspecific/measurement errors for each
#' phylogeny/simulation, format \code{Ysig2} instead as a list of lists or
#' matrix-shaped list with each column/column name corresponding to different
#' tips/nodes (see Recycling Behavior section for further
#' explanation of parameter recycling across phylogenies/simulations).
#'
#'
#' @param mu A list of numeric vectors specifying evolutionary trends for each
#' discrete character state mapped onto the phylogeny.
#'
#' Vector entries are
#' assigned to traits if named according to \code{traits}. Unnamed/unassigned
#' entries are recycled to "fill in" values for any traits lacking input in
#' the same order as given in \code{traits}. If no unnamed/unassigned entries
#' are available, defaults to 0.
#'
#' List elements are assigned to discrete
#' character states if named according to the state names given in \code{tree}.
#' Any unnamed/unassigned list elements are recycled to "fill in" for states
#' lacking input in alphanumeric order. If no unnamed/unassigned list elements
#' are available, defaults to vector of 0s.
#'
#' To specify differing trends for
#' each phylogeny/simulation, format \code{mu} instead as a list of vectors or
#' matrix with each column/column name corresponding to different discrete
#' character states (see Recycling Behavior section for more further
#' explanation of parameter recycling across phylogenies/simulations).
#'
#'
#' @param verbose \code{TRUE} or \code{FALSE}: do you want to print progress
#' bars to monitor the progress of the contsimmapping algorithm? Defaults to
#' \code{FALSE}.
#'
#'
#'
#' @return An object of class "\code{contsimmap}", which is essentially a
#' fancy 3D array-shaped list with rows corresponding to edges, columns
#' to traits, and slices to simulations. Each element in the list is a vector
#' of simulated continuous trait values. Thus, to limit investigation to
#' particular edge(s) \code{i}, trait(s) \code{j}, and/or simulation(s)
#' \code{k}, one may simply subset \code{contsimmap} objects using standard
#' indexing notation, e.g., \code{contsimmap[i, j, k]} (as with base R, one
#' may omit indices to avoid subsetting along a particular dimension, e.g.,
#' \code{contsimmap[i, , k]}). Lists of "edgewise information" for a
#' \emph{single} edge/trait/simulation combination may be directly extracted
#' using double brackets, e.g., \code{contsimmap[[i, j, k]]}. This yields a list of
#' 3 components:
#' \itemize{
#' \item{\code{values}: the simulated continuous trait values, including the
#' value at the node immediately ancestral to edge \code{i} for convenience.}
#' \item{\code{ts}: the time points at which the simulated continuous trait
#' values occur.}
#' \item{\code{states}: the discrete character states at each time point (and
#' their preceding time interval).}
#' }
#' Lists of multiple "edgewise information lists" may be extracted using a
#' single vector/matrix of indices with single brackets as in base R, e.g., \code{contsimmap[1:3]},
#' \code{contsimmap[cbind(i, j, k)]}, etc.
#'
#' Notably, \code{contsimmap} objects also store a lot of additional data as
#' object "attributes". These consist of data necessary for downstream
#' analyses, as well as cached calculations that are simply annoying to
#' recompute on the fly:
#' \itemize{
#' \item{\code{ts}: a list of numeric vectors specifying the main time points
#' at which continuous trait values are sampled along each edge. List elements
#' are named and ordered according to edge index. Notably, these vectors
#' exclude any time points corresponding to discrete character state shifts,
#' and thus apply across all phylogenies/simulations. }
#' \item{\code{tree}: the list of phylogenies along on which continuous trait
#' values are mapped. The inputted \code{tree} argument is always coerced to
#' an object of class "\code{multiSimmap}" (these include a single discrete
#' character state named "\code{0}" if no mapped discrete character states
#' were originally provided).}
#' \item{\code{maps}: this attribute is mainly for caching calculations
#' that would otherwise have to be tediously recomputed from the \code{ts} and
#' \code{tree} attributes. Thus, I may look into allowing users to discard
#' this attribute in cases where computer memory is a concern, though the
#' package isn't set up for this yet. In any case, users need not bother with
#' accessing/messing with this attribute, but for completeness: this is a 3D
#' array-shaped list with rows corresponding to edges, columns to the different
#' phylogenies in the \code{tree} attribute, and slices to various stored
#' quantities. These stored quantities are:
#' \itemize{
#' \item{\code{coarse}: numeric vectors providing the length of each time interval
#' spent in a discrete character state along each edge, similar to the
#' "\code{maps}" element within normal \code{simmap} objects as implemented
#' in the \bold{phytools} package.}
#' \item{\code{ts}: numeric vectors of \emph{all} time points at which continuous
#' trait values are sampled along each edge, now including those pesky extra
#' time points corresponding to discrete character state shifts.}
#' \item{\code{dts}: numeric vectors of the time intervals between each time point
#' along each edge and the previous time point. In the case of the first
#' time point, the previous time point is simply the height of the node
#' immediately ancestral to the edge.}
#' \item{\code{bb.dts}: numeric vectors of the time intervals between each time
#' point and the previous time point divided by the intervals between each
#' time point and the next "critical time point" (i.e., time points
#' corresponding to either the nodes in the phylogeny or discrete character
#' state shifts) along each edge. Very helpful for Brownian Motion bridge
#' calculations.}
#' \item{\code{bb.sds}: numeric vectors of the standard deviations of
#' continuous trait values at each time point along each edge under a
#' Brownian Motion model with rate 1 and conditioned on fixed continuous
#' trait values at both the immediately previous time point and next critical time point. Again
#' very helpful for Brownian Motion bridge calculations.}
#' \item{\code{state}: character vectors of discrete character states at
#' each time point (and its preceding time interval) along each edge.}
#' \item{\code{incl}: logical vectors returning \code{FALSE} for each time
#' point corresponding to a discrete character state shift along each edge.
#' Helps quickly determine which time points are "main" time points
#' repeated across all phylogenies/simulations and which aren't.}
#' \item{\code{inds}: numeric vectors of the indices of critical time
#' points along each edge.}
#' }
#' }
#' \item{\code{traits}: again, users should generally not view/mess with this
#' attribute, which is simply a named vector encoding where the information
#' about a given trait is stored in the \code{params} attribute.}
#' \item{\code{params}: this is a pretty critical attribute storing
#' information regarding how the different traits in the \code{contsimmap}
#' object were constructed, but it is hard to interpret directly. Users
#' wanting to check the parameters used to simulate different traits should
#' instead use the function \code{get.param.info()}. In any case, this is a
#' matrix-shaped list with each row corresponding to different stored
#' information and columns to different groups of traits constructed at the
#' same time (as encoded in the \code{traits} attribute). The rows are
#' specifically:
#' \itemize{
#' \item{\code{trait.data}: either the trait data upon which simulated
#' continuous trait values were conditioned (i.e., in the case of
#' \code{make.contsimmap()}) or the trait data simulated by
#' \code{sim.conthistory()}. This is provided in a list of matrices format,
#' with list elements corresponding to different phylogenies/simulations,
#' matrix rows to observations, and matrix columns to traits.}
#' \item{\code{Xsig2}: evolutionary rate matrices used to simulate
#' trait values in matrix-shaped list format, with rows corresponding to
#' different phylogenies/simulations and columns to discrete character
#' states.}
#' \item{\code{Ysig2}: intraspecific variance/measurement error
#' matrices used to simulate trait values in matrix-shaped list format, with
#' rows corresponding to different phylogenies/simulations and columns to
#' tips/nodes.}
#' \item{\code{mu}: evolutionary trends used to simulate
#' trait values in matrix-shaped list format, with rows corresponding to
#' different phylogenies/simulations and columns to discrete character
#' states.}
#' \item{\code{lookup}: matrix of indices with each row corresponding to a
#' different simulation and columns to \code{trait.data}, \code{Xsig2},
#' \code{Ysig2}, and \code{mu}. This conveys which parameters were used to
#' produce each simulation.}
#' \item{\code{call_info}: a list of information about how groups of traits
#' were constructed--mainly including the component \code{fxn}, which
#' provides the name of the function used to construct traits. More
#' complicated trait constructions will generally yield
#' additional components here, such as the formula used to define
#' traits in calls to \code{make.traits()}, the names of traits used to
#' modify evolutionary parameters in calls to \code{diffusion()}, the
#' breakpoints/state names used to convert continuous traits into
#' discrete ones in calls to \code{threshold()}, etc.}
#' }
#' }
#' }
#'
#' @section Recycling Behavior:
#' Each parameter input system (\code{trait.data}, \code{nobs}, \code{X0},
#' \code{Xsig2}, \code{Ysig2}, and \code{mu}) has certain idiosyncrasies, but
#' they all follow a similar philosophy. There are three steps:
#' \enumerate{
#' \item{Match labeled inputs to appropriate nodes/traits/states.}
#' \item{"Fill in" for nodes/traits/states lacking inputs with unlabeled
#' inputs if they exist (recycling the unlabeled inputs as needed) and default
#' values otherwise.}
#' \item{If multiple parameter values are provided, different parameter values
#' will be applied to each phylogeny in \code{tree}, recycling as necessary.
#' This creates some interesting quirks when the length of different parameter
#' values exceeds the number of phylogenies. For example, if \code{tree}
#' contains 3 phylogenies and 4 lists of matrices are specified for
#' \code{Xsig2}, then simulations on the 2nd and 3rd phylogenies will use the
#' 2nd and 3rd \code{Xsig2} lists, respectively, while simulations on the 1st
#' phylogeny will alternate between the 1st and 4th \code{Xsig2} lists.}
#' }
#' I tried to make warning messages informative enough to make potentially
#' unexpected recycling behaviors apparent. Users specifying complicated
#' simulations involving varying parameter values can always double-check their
#' inputs were interpreted correctly using \code{get.param.info()}.
#'
#' @order 1
#'
#' @export
make.contsimmap<-function(tree,trait.data,
nsims=100,res=100,
Xsig2=NULL,Ysig2=NULL,mu=NULL,
verbose=FALSE){
#takes care of all the necessary formatting work...
list2env(.prep.contsimmap(tree,nsims,res,trait.data,Xsig2,Ysig2,mu,TRUE),envir=environment())
#does the post/preorder traversals, returns final simulated trait data
x<-.cond.traversals(prune.seq,anc,des,ndes,
maps,
parsed.obs,parsed.mis,nobs,Xsig2,Ysig2[,Yperm,drop=FALSE],mu,lookup,
nts,NTS,t1s,seed,x,v,dx,dv,
verbose=verbose)
#format output
attr(x,'ts')<-ts
attr(x,'tree')<-tree
attr(x,'maps')<-maps
attr(x,'treeID')<-treeID
traits<-colnames(Xsig2[[1]])
attr(x,'traits')<-setNames(rep(1,length(traits)),traits)
attr(x,'params')<-matrix(list(trait.data,Xsig2,Ysig2,mu,lookup,list(fxn="make.contsimmap")),
6,1,
dimnames=list(c('trait.data','Xsig2','Ysig2','mu','lookup','call_info'),NULL))
.uncompress(x)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.