R/PCM.R

Defines functions PCMExtractRegimes.PCM PCMExtractRegimes PCMExtractDimensions.PCM PCMExtractDimensions PCMFindMethod PCMCreateLikelihood PCMInfo.PCM PCMInfo PCMPresentCoordinates logLik.PCM PCMLikTrace PCMLik PCMSim PCMLikDmvNorm PCMTrajectory PCMVarAtTime PCMVar PCMMeanAtTime PCMMean PCMCond PCMApplyTransformation.PCM PCMApplyTransformation.default PCMApplyTransformation PCMGetVecParamsRegimesAndModels.PCM PCMGetVecParamsRegimesAndModels PCMMapModelTypesToRegimes.PCM PCMMapModelTypesToRegimes PCMModelTypes.PCM PCMModelTypes PCMNumRegimes.PCM PCMNumRegimes PCMRegimes.PCM PCMRegimes PCMNumTraits.PCM PCMNumTraits PCMUnfixParameter PCMFixParameter PCMCombineListAttribute PCMAddToListAttribute PCMSetAttribute PCMGetAttribute MatchListMembers PCMListMembers PCMGenerateModelTypes PCMGenerateParameterizations PCMTableParameterizations PCMListDefaultParameterizations.default PCMListDefaultParameterizations PCMListParameterizations PCMDescribeParameters PCMSpecify PCMDescribe.PCM PCMDescribe PCMParentClasses.PCM PCMParentClasses format.PCM print.PCM PCMDefaultObject.PCM PCMDefaultObject is.PCM PCM.character PCM.PCM PCM.default PCM PCMOptions PCMModels

Documented in is.PCM MatchListMembers PCM PCMAddToListAttribute PCMApplyTransformation PCMCombineListAttribute PCMCond PCMCreateLikelihood PCMDefaultObject PCMDescribe PCMDescribeParameters PCMExtractDimensions PCMExtractRegimes PCMFindMethod PCMFixParameter PCMGenerateModelTypes PCMGenerateParameterizations PCMGetAttribute PCMGetVecParamsRegimesAndModels PCMInfo PCMLik PCMLikDmvNorm PCMLikTrace PCMListDefaultParameterizations PCMListMembers PCMListParameterizations PCMMapModelTypesToRegimes PCMMean PCMMeanAtTime PCMModels PCMModelTypes PCMNumRegimes PCMNumTraits PCMOptions PCMParentClasses PCMPresentCoordinates PCMRegimes PCMSetAttribute PCMSim PCMSpecify PCMTableParameterizations PCMTrajectory PCMUnfixParameter PCMVar PCMVarAtTime

# Copyright 2016-2019 Venelin Mitov
#
# This file is part of PCMBase.
#
# PCMBase is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# PCMBase is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with PCMBase.  If not, see <http://www.gnu.org/licenses/>.

#' Get a list of PCM models currently implemented
#' @param pattern a character string specifying an optional for the model-names to search for.
#' @param parentClass a character string specifying an optional parent class of the models to look for.
#' @param ... additional arguments used by implementing methods.
#'
#' @return a character vector of the model classes found.
#' @details The function is using the S3 api function \code{\link{methods}} looking
#' for all registered implementations of the function \code{\link{PCMSpecify}}.
#' @importFrom utils methods
#' @export
#' @examples
#' PCMModels()
#' PCMModels("^OU")
PCMModels <- function(pattern = NULL, parentClass = NULL, ...) {
  models <- sub("PCMSpecify.", "", as.character(methods("PCMSpecify")), fixed = TRUE)
  if(!is.null(pattern)) {
    models <- models[grep(pattern, models, ...)]
  }
  if(!is.null(parentClass)) {
    models <- models[sapply(models, function(m) {
      length(intersect(class(PCM(m)), parentClass)) > 0
    })]
  }
  models
}

#' Global options for the PCMBase package
#'
#'
#' @return a named list with the currently set values of the following global
#' options:
#' \itemize{
#' \item{\code{PCMBase.Value.NA }}{NA value for the likelihood; used in GaussianPCM to
#' return this value in case of an error occurring
#' during likelihood calculation. By default, this is set to \code{as.double(NA)}.}
#' \item{\code{PCMBase.Errors.As.Warnings }}{a logical flag indicating if errors
#' (occuring, e.g. during likelihood calculation) should be treated as warnings
#' and added as an attribute "error" to attach to the likelihood values. Default TRUE.}
#' \item{\item{PCMBase.Raise.Lik.Errors} }{Should numerical and other sort of
#' errors occurring during likelihood calculation be raised either as errors or
#' as warnings, depending on the option \code{PCMBase.Errors.As.Warnings}.
#' Default TRUE. This option can be useful if too frequent warnings get raised
#' during a model fit procedure.}
#' \item{\code{PCMBase.Threshold.Lambda_ij }}{a 0-threshold for abs(Lambda_i + Lambda_j),
#' where Lambda_i and Lambda_j are eigenvalues of the parameter matrix H of an OU or
#' other model. Default 1e-8. See \code{\link{PCMPExpxMeanExp}}.}
#' \item{\code{PCMBase.Threshold.EV }}{A 0-threshold for the eigenvalues of the
#' matrix V for a given branch. The V matrix is considered singular if it has
#' eigenvalues smaller than \code{PCMBase.Threshold.EV } or when the ratio
#' min(svdV)/max(svdV) is below \code{PCMBase.Threshold.SV }. Default is 1e-5.
#' Treatment of branches with singular V matrix is defined by the option
#' \code{PCMBase.Skip.Singular}.}
#' \item{\code{PCMBase.Threshold.SV }}{A 0-threshold for min(svdV)/max(svdV), where
#' svdV is the vector of singular values of the matrix V for a given branch.
#' The V matrix is considered singular if it has eigenvalues smaller than
#' \code{PCMBase.Threshold.EV } or when the ratio min(svdV)/max(svdV) is below
#' PCMBase.Threshold.SV. Default is 1e-6. Treatment
#' of branches with singular V matrix is defined by the option \code{PCMBase.Skip.Singular}.}
#' \item{\code{PCMBase.Threshold.Skip.Singular }}{A double indicating if an internal branch of shorter
#' length with singular matrix V should be skipped during likelihood calculation. Setting this
#' option to a higher value, together with a TRUE value for the option PCMBase.Skip.Singular
#' will result in tolerating some parameter values resulting in singular variance covariance
#' matrix of the transition distribution. Default 1e-4.}
#' \item{\code{PCMBase.Skip.Singular }}{A logical value indicating whether internal branches with
#' singular matrix V and shorter than \code{getOption("PCMBase.Threshold.Skip.Singular")}
#'  should be skipped during likelihood calculation, adding their children
#' L,m,r values to their parent node. Default TRUE. Note, that setting this option to FALSE
#' may cause some models to stop working, e.g. the White model. Setting this option to FALSE
#' will also cause errors or NA likelihood values in the case of trees with very short or
#' 0-length branches.}
#' \item{\code{PCMBase.Tolerance.Symmetric }}{A double specifying the tolerance in tests
#' for symmetric matrices. Default 1e-8; see also \code{\link{isSymmetric}}.}
#' \item{\code{PCMBase.Lmr.mode} }{An integer code specifying the parallel likelihood calculation mode.}
#' \item{\code{PCMBase.ParamValue.LowerLimit} }{Default lower limit value for parameters, default setting is -10.0.}
#' \item{\code{PCMBase.ParamValue.LowerLimit.NonNegative} }{Numeric (default: 0.0) indication the lower limit for
#' parameters inheriting from class \code{'_NonNegative'}}s
#' \item{\code{PCMBase.ParamValue.LowerLimit.NonNegativeDiagonal} }{Default lower limit value for parameters corresponding to non-negative diagonal elements of matrices, default setting is 0.0.}
#' \item{\code{PCMBase.ParamValue.UpperLimit} }{Default upper limit value for parameters, default setting is 10.0.}
#' \item{\code{PCMBase.Transpose.Sigma_x} }{Should upper diagonal factors for variance-covariance rate matrices be transposed, e.g. should Sigma = t(Sigma_x) Sigma_x or, rather Sigma = Sigma_x t(Sigma_x)? Note that the two variants are not equal. The default is FALSE, meaning Sigma = Sigma_x t(Sigma_x). In this case, Sigma_x is not the actual upper Cholesky factor of Sigma, i.e. chol(Sigma) != Sigma_x. See also \code{\link{chol}} and \code{\link{UpperTriFactor}}. This option applies to parameters Sigma_x, Sigmae_x, Sigmaj_x and the measurement errors \code{SE[,,i]} for each measurement i when the argument \code{SE} is specified as a cube.}
#' \item{\code{PCMBase.MaxLengthListCladePartitions} }{Maximum number of tree partitions returned by \code{\link{PCMTreeListCladePartitions}}. This option has the goal to interrupt the recursive search for new partitions in the case of calling PCMTreeListCladePartitions on a big tree with a small value of the maxCladeSize argument. By default this is set to Inf.}
#' \item{\code{PCMBase.PCMPresentCoordinatesFun} }{A function with the same synopsis as \code{\link{PCMPresentCoordinates}} that can be specified in case of custom setting for the present coordinates for specific nodes of the tree. See \code{\link{PCMPresentCoordinates}}, and \code{\link{PCMInfo}}.}
#' \item{\code{PCMBase.Use1DClasses} }{Logical indicating if 1D arithmetic operations
#' should be used instead of multi-dimensional ones. This can speed-up computations
#' in the case of a single trait. Currently, this feature is implemented only in
#' the PCMBaseCpp R-package and only for some model types, such as OU and BM.
#' Default: FALSE}
#' \item{\code{PCMBase.PrintSubscript_u} }{Logical indicating if a subscript 'u'
#' should be printed instead of a subscript 'x'. Used in \code{PCMTable}. Default: FALSE.}
#' \item{\code{PCMBase.MaxNForGuessSigma_x} }{A real fraction number in the
#' interval (0, 1) or an integer bigger than 1 controlling the
#' number of tips to use for analytical calculation of the evolutionary rate
#' matrix under a BM assumption. This option is used in the suggested PCMFit
#' R-package. Default: 0.25. }
#' \item{\code{PCMBase.UsePCMVarForVCV} }{Logical (default: FALSE) indicating
#' if the function \code{\link{PCMTreeVCV}} should use \code{\link{PCMVar}}
#' instead of ape's function \code{\link{vcv}} to calculate the phylogenetic
#' variance covariance matrix under BM assumption. Note that setting this option
#' to TRUE would slow down the function PCMTreeVCV considerably but may be more
#' stable, particularly in the case of very big and deep trees, where previous
#' ape's versions of the \code{\link{vcv}} function have thrown stack-overflow
#' errors.}
#' }
#' @export
#' @examples
#' PCMOptions()
PCMOptions <- function() {
  list(PCMBase.Value.NA = getOption("PCMBase.Value.NA", as.double(NA)),
       PCMBase.Errors.As.Warnings = getOption("PCMBase.Errors.As.Warnings", TRUE),
       PCMBase.Threshold.Lambda_ij = getOption("PCMBase.Threshold.Lambda_ij", 1e-8),
       PCMBase.Threshold.EV = getOption("PCMBase.Threshold.EV", 1e-5),
       PCMBase.Threshold.SV = getOption("PCMBase.Threshold.SV", 1e-6),
       PCMBase.Threshold.Skip.Singular = getOption("PCMBase.Threshold.Skip.Singular", 1e-4),
       PCMBase.Skip.Singular = getOption("PCMBase.Skip.Singular", TRUE),
       PCMBase.Tolerance.Symmetric = getOption("PCMBase.Tolerance.Symmetric", 1e-8),
       PCMBase.Lmr.mode = getOption("PCMBase.Lmr.mode", as.integer(11)),
       PCMBase.ParamValue.LowerLimit.NonNegative = getOption("PCMBase.ParamValue.LowerLimit.NonNegative", 0.0),
       PCMBase.ParamValue.LowerLimit.NonNegativeDiagonal = getOption("PCMBase.ParamValue.LowerLimit.NonNegativeDiagonal", 0.0),
       PCMBase.ParamValue.LowerLimit = getOption("PCMBase.ParamValue.LowerLimit", -10.0),
       PCMBase.ParamValue.UpperLimit = getOption("PCMBase.ParamValue.UpperLimit", 10.0),
       PCMBase.Transpose.Sigma_x = getOption("PCMBase.Transpose.Sigma_x", FALSE),
       PCMBase.MaxLengthListCladePartitions = getOption("PCMBase.MaxLengthListCladePartitions", Inf),
       PCMBase.PCMPresentCoordinatesFun = getOption("PCMBase.PCMPresentCoordinatesFun", PCMPresentCoordinates),
       PCMBase.Use1DClasses = getOption("PCMBase.Use1DClasses", FALSE),
       PCMBase.Raise.Lik.Errors = getOption("PCMBase.Raise.Lik.Errors", TRUE),
       PCMBase.PrintSuffix_u = getOption("PCMBase.PrintSuffix_u", FALSE),
       PCMBase.MaxNForGuessSigma_x = getOption("PCMBase.MaxNForGuessSigma_x", 0.25),
       PCMBase.UsePCMVarForVCV = getOption("PCMBase.UsePCMVarForVCV", FALSE)
       )
}

#' @name PCM
#' @title Create a phylogenetic comparative model object
#'
#' @description This is the entry-point function for creating model objects
#' within the PCMBase framework representing a single model-type with one or
#' several model-regimes of this type associated with the branches of a tree.
#' For mixed Gaussian phylogenetic models, which enable multiple model-types,
#' use the \code{\link{MixedGaussian}} function.
#' @param model This argument can take one of the following forms:
#' \itemize{
#' \item a character vector of the S3-classes of the model object to be
#' created (one model object can have one or more S3-classes, with the class
#' PCM at the origin of the hierarchy);
#' \item an S3 object which's class inherits from the PCM S3 class.
#' }
#' The Details section explains how these two types of input are processed.
#' @param modelTypes a character string vector specifying a set (family) of
#' model-classes, to which the constructed model object belongs. These are used
#' for model-selection.
#' @param k integer denoting the number of traits (defaults to 1).
#' @param regimes a character or integer vector denoting the regimes.
#' @param params NULL (default) or a list of parameter values (scalars, vectors,
#' matrices, or arrays) or sub-models (S3 objects inheriting from the PCM class).
#' See details.
#' @param vecParams NULL (default) or a numeric vector the vector
#' representation of the variable parameters in the model. See details.
#' @param offset integer offset in vecParams; see Details.
#' @param spec NULL or a list specifying the model parameters (see
#' \code{\link{PCMSpecify}}). If NULL (default), the generic PCMSpecify
#' is called on the created object of class \code{model}.
#' @param ... additional parameters intended for use by sub-classes of the PCM
#' class.
#' @return an object of S3 class as defined by the argument \code{model}.
#'
#' @details This is an S3 generic. The PCMBase package defines three methods for
#' it:
#' \itemize{
#' \item{PCM.PCM: }{A default constructor for any object with a class inheriting
#' from "PCM".}
#' \item{PCM.character: }{A default PCM constructor from a character string
#' specifying the type of model.}
#' \item{PCM.default: }{A default constructor called when no other constructor
#' is found. When called this constructor raises an error message.}
#' }
#' @seealso \code{\link{MixedGaussian}}
#'
#' @importFrom ape rtree
#'
#' @export
#'
#' @examples
#' # a Brownian motion model with one regime
#' modelBM <- PCM(model = "BM", k = 2)
#' # print the model
#' modelBM
#'
#' # a BM model with two regimes
#' modelBM.ab <- PCM("BM", k = 2, regimes = c("a", "b"))
#' modelBM.ab
#'
#' # print a single parameter of the model (in this case, the root value)
#' modelBM.ab$X0
#'
#' # assign a value to this parameter (note that the brackets [] are necessary
#' # to preserve  the parameter attributes):
#' modelBM.ab$X0[] <- c(5, 2)
#'
#' PCMNumTraits(modelBM)
#' PCMNumRegimes(modelBM)
#' PCMNumRegimes(modelBM.ab)
#'
#' # number of numerical parameters in the model
#' PCMParamCount(modelBM)
#'
#' # Get a vector representation of all parameters in the model
#' PCMParamGetShortVector(modelBM)
#'
#' # Limits for the model parameters:
#' lowerLimit <- PCMParamLowerLimit(modelBM)
#' upperLimit <- PCMParamUpperLimit(modelBM)
#'
#' # assign the model parameters at random: this will use uniform distribution
#' # with boundaries specified by PCMParamLowerLimit and PCMParamUpperLimit
#' # We do this in two steps:
#' # 1. First we generate a random vector. Note the length of the vector equals PCMParamCount(modelBM)
#' randomParams <- PCMParamRandomVecParams(modelBM, PCMNumTraits(modelBM), PCMNumRegimes(modelBM))
#' randomParams
#' # 2. Then we load this random vector into the model.
#' PCMParamLoadOrStore(modelBM, randomParams, 0, PCMNumTraits(modelBM), PCMNumRegimes(modelBM), TRUE)
#'
#' print(modelBM)
#'
#' PCMParamGetShortVector(modelBM)
#'
#' # generate a random phylogenetic tree of 10 tips
#' tree <- ape::rtree(10)
#'
#' #simulate the model on the tree
#' traitValues <- PCMSim(tree, modelBM, X0 = modelBM$X0)
#'
#' # calculate the likelihood for the model parameters, given the tree and the trait values
#' PCMLik(traitValues, tree, modelBM)
#'
#' # create a likelihood function for faster processing for this specific model.
#' # This function is convenient for calling in optim because it recieves and parameter
#' # vector instead of a model object.
#' likFun <- PCMCreateLikelihood(traitValues, tree, modelBM)
#' likFun(randomParams)
PCM <- function(
  model, modelTypes = class(model)[1], k = 1L, regimes = 1L,
  params = NULL, vecParams = NULL, offset = 0L,
  spec = NULL, ...) {
  UseMethod("PCM", model)
}

#' @export
PCM.default <- function(
  model, modelTypes = class(model)[1], k = 1L, regimes = 1L,
  params = NULL, vecParams = NULL, offset = 0L,
  spec = NULL, ...) {
  stop(paste0("PCM.default:: The type of the model argument is not supported: ",
              toString(class(model))))
}

#' @export
PCM.PCM <- function(
  model, modelTypes = class(model)[1], k = 1L, regimes = 1L,
  params = NULL, vecParams = NULL, offset = 0L,
  spec = NULL, ...) {

  if(is.null(spec)) {
    spec <- PCMSpecify(model, ...)
  }

  obj <- PCMDefaultObject(spec, model, ...)
  if(!is.null(params)) {
    PCMParamSetByName(obj, params, ...)
  }

  if(is.null(attr(obj, "k", exact = TRUE))) {
    attr(obj, "k") <- as.integer(k)
  }

  if(is.null(attr(obj, "regimes", exact = TRUE))) {
    attr(obj, "regimes") <- regimes
  }

  if(is.null(attr(obj, "p", exact = TRUE))) {
    attr(obj, "p") <- PCMParamCount(obj)
  }

  if(is.null(attr(obj, "modelTypes", exact = TRUE))) {
    attr(obj, "modelTypes") <- modelTypes
  }

  if(is.null(attr(obj, "mapping", exact = TRUE))) {
    attr(obj, "mapping") <- match(class(obj)[1],
                                    attr(obj, "modelTypes", exact = TRUE))
  }

  if(is.null(attr(obj, "spec", exact = TRUE))) {
    attr(obj, "spec") <- spec
  }

  if(!is.null(vecParams)) {
    PCMParamLoadOrStore(obj, vecParams, offset, load = TRUE)
  }

  obj
}

#' @export
PCM.character <- function(
  model, modelTypes = model[1], k = 1L, regimes = 1L,
  params = NULL, vecParams = NULL, offset = 0L,
  spec = NULL, ...) {

  modelObj <- list()
  class(modelObj) <- model
  if(length(model) == 1) {
    class(modelObj) <- c(class(modelObj), PCMParentClasses(modelObj))
  }
  attr(modelObj, "k") <- as.integer(k)
  attr(modelObj, "regimes") <- regimes

  PCM(modelObj, modelTypes, k, regimes, params, vecParams, offset, spec, ...)
}

#' Check if an object is a PCM.
#' @param x an object.
#' @return TRUE if `x` inherits from the S3 class "PCM".
#' @export
is.PCM <- function(x) inherits(x, "PCM")

#' Generate a default object of a given PCM model type or parameter type
#' @param spec any object having a class attribute. The value of this object is not
#' used, but its class is used for method-dispatch.
#' @param model a PCM object used to extract attributes needed for creating a
#' default object of class specified in \code{class(spec)}, such as the number of
#' traits (k) or the regimes and the number of regimes;
#' @param ... additional arguments that can be used by methods.
#'
#' @description This is an S3 generic. See, e.g. `PCMDefaultObject.MatrixParameter`.
#' @return a parameter or a PCM object.
#' @export
PCMDefaultObject <- function(spec, model, ...) {
  UseMethod("PCMDefaultObject", spec)
}

#' @export
PCMDefaultObject.PCM <- function(spec, model, ...) {
  o <- spec
  PCMParamSetByName(o, lapply(spec, PCMDefaultObject, model = o), replaceWholeParameters = TRUE, ...)
  o[] <- o[!sapply(o, is.null)]
  attr(o, "spec") <- spec
  o
}


#' @method print PCM
#' @export
print.PCM <- function(x, ...) cat(format(x, ...), "\n")

#' @method print PCM
#' @export
format.PCM <- function(x, ...) {
  if( !is.PCM(x) ) {
    stop("format.PCM:: x must inherit from S3 class PCM.")
  }
  spec <- attr(x, "spec", exact = TRUE)

  res <- paste0(
    PCMDescribe(x, ...)[[1L]],
    "\nS3 class: ", toString(class(x)), "; ",
    "k=", attr(x, "k", exact = TRUE), "; p=", PCMParamCount(x, ...), "; ",
    "regimes: ", toString(attr(x, "regimes")), ". Parameters/sub-models:\n")

  for(name in names(spec)) {

    if(is.PCM(spec[[name]])) {
      objToPrint <- x[[name]]
      type <- class(objToPrint)
      description <- attr(objToPrint, "description", exact = TRUE)

      strList <- as.list(capture.output(print(objToPrint)))
      strList$sep = "\n"
    } else if(!is.Omitted(spec[[name]])) {
      objToPrint <- x[[name]]
      type <- class(objToPrint)
      description <- attr(objToPrint, "description", exact = TRUE)

      # avoid writing class and description attributes twice
      attr(objToPrint, "class") <- NULL
      attr(objToPrint, "description") <- NULL
      attr(objToPrint, "k") <- NULL
      attr(objToPrint, "regimes") <- NULL
      attr(objToPrint, "spec") <- NULL
      strList <- as.list(capture.output(print(objToPrint)))
      strList$sep = "\n"
    } else {

      # _Omitted
      type <- class(spec[[name]])
      description <- attr(spec[[name]], "description", exact = TRUE)

      strList <- list(sep = "\n")
    }
    res <- paste0(res, name, " (", toString(type),
                  if(!is.null(description)) paste0("; ", description[[1L]]) else "", "):\n",
                  do.call(paste, strList))
    res <- paste0(res, "\n")
  }
  res
}

#' Parent S3 classes for a model class
#' @param model an S3 object.
#' @details This S3 generic function is intended to be specified for user models.
#' This function is called by the `PCM.character` method to determine the parent
#' classes for a given model class.
#' @return a vector of character string denoting the names of the parent classes
#' @export
PCMParentClasses <- function(model) {
  UseMethod("PCMParentClasses", model)
}

#' @export
PCMParentClasses.PCM <- function(model) c()

#' Human friendly description of a PCM
#' @param model a PCM model object
#' @param ... additional arguments used by implementing methods.
#' @details This S3 generic function is intended to be specified for user models
#' @return a character string
#' @export
PCMDescribe <- function(model, ...) {
  UseMethod("PCMDescribe", model)
}

#' @export
PCMDescribe.PCM <- function(model, ...) {
  if(!is.null(attr(model, "description", exact = TRUE))) {
    attr(model, "description", exact = TRUE)
  } else {
    "PCM base class model with no parameters; serves as a basis for PCM model classes"
  }
}


#' Parameter specification of PCM model
#' @param model a PCM model object.
#' @param ... additional arguments used by implementing methods.
#'
#' @description The parameter specification of a PCM model represents a named
#' list with an entry for each parameter of the model. Each entry in the list is
#' a structure defining the S3 class of the parameter and its verbal description.
#' This is an S3 generic. See `PCMSpecify.OU` for an example method.
#'
#' @return a list specifying the parameters of a PCM.
#' @export
PCMSpecify <- function(model, ...) {
  UseMethod("PCMSpecify", model)
}

#' Describe the parameters of a PCM
#' @param model a PCM object.
#' @param ... additional arguments that can be used by implementing methods.
#' @return a named list with character elements corresponding to each parameter.
#'
#' @description This is an S3 generic.
#'
#' @export
PCMDescribeParameters <- function(model, ...) {
  UseMethod("PCMDescribeParameters", model)
}

#' Specify the parameterizations for each parameter of a model
#' @param model a PCM.
#' @param ... additional arguments used by implementing methods.
#'
#' @description These are S3 generics. `PCMListParameterizations` should return
#' all possible parametrizations for the class of `model`.
#' `PCMListDefaultParameterizations` is a handy way to specify a subset of all
#' parametrizations. `PCMListDefaultParameterizations` should be used to avoid
#' generating too many model parametrizations which occupy space in the R-global
#' environment while they are not used (see \link{PCMGenerateParameterizations}).
#' It is mandatory to implement a specification for `PCMListParameterizations`
#' for each newly defined class of models.
#' `PCMListDefaultParameterizations` has a default implementation that calls
#' `PCMListParameterizations` and returns the first parametrization for each
#' parameter. Hence, implementing a method for `PCMListDefaultParameterizations`
#' for a newly defined model type is optional.
#'
#' @return a named list with list elements corresponding to each parameter in
#' model. Each list element is a list of character vectors, specifying the possible
#' S3 class attributes for the parameter in question. For an example, type
#' `PCMListParameterizations.BM` to see the possible parameterizations for the
#' BM model.
#'
#' @seealso PCMGenerateParameterizations
#' @export
PCMListParameterizations <- function(model, ...) {
  UseMethod("PCMListParameterizations", model)
}

#' @rdname PCMListParameterizations
#' @export
PCMListDefaultParameterizations <- function(model, ...) {
  UseMethod("PCMListDefaultParameterizations", model)
}

#' @export
PCMListDefaultParameterizations.default <- function(model, ...) {
  lapply(PCMListParameterizations(model, ...), function(item) item[1])
}

#' Cartesian product of possible parameterizations for the different parameters of a model
#' @param model a PCM object.
#' @param listParameterizations a list returned by a method for `PCMListParameterizations`.
#' Default: `PCMListParameterizations(model, ...)`.
#' @param ... additional arguments passed to `PCMListParameterizations(model, ...)`.
#'
#' @description This function generates a data.table in which each column corresponds to
#' one parameter of model and each row corresponds to one combination of parameterizations
#' for the model parameters, such that the whole table corresponds to the Cartesian product
#' of the lists found in `listParameterizations`. Usually, subsets of this table
#' should be passed to `PCMGenerateParameterizations`
#' @return a data.table object.
#'
#' @importFrom data.table data.table as.data.table
#' @export
PCMTableParameterizations <- function(
  model, listParameterizations = PCMListParameterizations(model, ...), ...) {

  # produce the cartesian product of the parameterizations for each parameter
  dtClasses <- do.call(expand.grid, listParameterizations)
  dtClasses <- as.data.table(dtClasses)
  dtClasses
}

#' Generate possible parameterizations for a given type of model
#'
#' @description A parameterization of a PCM of given type, e.g. OU, is a PCM-class
#' inheriting from this type, which imposes some restrictions or transformations of
#' the parameters in the base-type. This function generates the S3 methods responsible
#' for creating such parameterizations, in particular it generates the definition
#' of the methods for the two S3 generics `PCMParentClasses` and `PCMSpecify` for
#' al parameterizations specified in the `tableParameterizations` argument.
#' @param model a PCM object.
#' @param listParameterizations a list or a sublist returned by `PCMListParameterizations`.
#' Default: `PCMListParameterizations(model)`.
#' @param tableParameterizations a data.table containing the parameterizations to
#' generate. By default this is generated from `listParameterizations` using a
#' call `PCMTableParameterizations(model, listParameterizations)`. If specified
#' by the user, this parameter takes precedence over `listParameterizations` and
#' `listParameterizations` is not used.
#' @param env an environment where the method definitions will be stored.
#' Default: `env = .GlobalEnv`.
#' @param useModelClassNameForFirstRow A logical specifying if the S3 class name of
#' `model` should be used as a S3 class for the model defined in the first row of
#' `tableParameterizations`. Default: FALSE.
#' @param sourceFile NULL or a character string indicating a .R filename, to
#' which the automatically generated code will be saved. If NULL (the default),
#' the generated source code is evaluated and the S3 methods are defined in the
#' global environment. Default: NULL.
#' @return This function does not return a value. It only has a side effect by
#' defining S3 methods in `env`.
#'
#' @export
PCMGenerateParameterizations <- function(
  model,
  listParameterizations = PCMListParameterizations(model),
  tableParameterizations = PCMTableParameterizations(model, listParameterizations),
  env = .GlobalEnv,
  useModelClassNameForFirstRow = FALSE,
  sourceFile = NULL) {

  if(!is.PCM(model)) {
    # We assume that the passed model's class is a single character not including the
    # corresponding hierarchy, which is specified by PCMParentClasses
    class(model) <- c(class(model)[1], PCMParentClasses(model))
  }

  classesModel <- class(model)
  paramNames <- names(tableParameterizations)
  paramDescriptions <- PCMDescribeParameters(model)

  for(i in seq_len(nrow(tableParameterizations))) {
    nameFunPCMParentClasses <- paste0("PCMParentClasses.", classesModel[1])
    nameFunPCMSpecify <- paste0("PCMSpecify.", classesModel[1])
    if(i != 1 || !useModelClassNameForFirstRow) {
      # add suffix to names based on the parameterizations for the row
      suffix <- ""
      for(name in paramNames) {
        qualifiersParam <- tableParameterizations[i][[name]][[1]]
        qualifiersParam <- qualifiersParam[startsWith(qualifiersParam, "_")]
        if(length(qualifiersParam) > 0) {
          suffix <- paste0(suffix, "_")
          suffix <- paste0(suffix, do.call(paste0, as.list(qualifiersParam)), "_", name)
        } else {
          suffix <- paste0(suffix, "__")
          suffix <- paste0(suffix, name)
        }
      }
      nameFunPCMParentClasses <- paste0(nameFunPCMParentClasses, suffix)
      nameFunPCMSpecify <- paste0(nameFunPCMSpecify, suffix)
      parentClasses <- classesModel
    } else {
      # this is the base class for a model, all parameterisations being its daughter classes
      parentClasses <- classesModel[-1]
    }

    sourcePCMParentClasses <- paste0(
      nameFunPCMParentClasses, " <- function(model) ",
      AsRExpression(parentClasses))

    sourcePCMSpecify <- paste0(
      nameFunPCMSpecify, " <- function(model, ...) {\n",
      "spec <- list(\n")
    for(j in 1:length(paramNames)){
      sourcePCMSpecify <- paste0(
        sourcePCMSpecify,
        paramNames[j], " = structure(0.0, class = ",
        AsRExpression(tableParameterizations[i][[paramNames[j]]][[1]]),
        ",\n",
        "description = ", AsRExpression(paramDescriptions[[j]]), ")")
      if(j < length(paramNames)) {
        sourcePCMSpecify <- paste0(
          sourcePCMSpecify, ",\n")
      } else {
        sourcePCMSpecify <- paste0(
          sourcePCMSpecify, ")\n")
      }
    }
    sourcePCMSpecify <- paste0(
      sourcePCMSpecify,
      "attributes(spec) <- attributes(model)\n",
      "if(is.null(names(spec))) names(spec) <- ",
      as.character(list(paramNames)), "\n",
      "if(any(sapply(spec, is.Transformable))) class(spec) <- c(class(spec), '_Transformable')\n",
      "spec\n",
      "}")

    if(!is.null(sourceFile)) {
      write(paste0(
        "#' @export\n", sourcePCMParentClasses, "\n\n",
        "#' @export\n", sourcePCMSpecify, "\n\n"),
        file = sourceFile, append = TRUE)
    } else {
      eval(parse(text = sourcePCMParentClasses), env)
      eval(parse(text = sourcePCMSpecify), env)
    }
  }
}


#' Generate default model types for given PCM base-classes
#' @description This function calls `PCMListParameterizations` or
#' `PCMListDefaultParameterizations` and generates the corresponding
#' `PCMParentClasses` and `PCMSpecify` methods in the global environment.
#' @param baseTypes a named list with character string elements among
#' \code{c("default", "all")} and names specifying base S3-class names for which
#' the parametrizations (sub-classes) will be generated. Defaults to
#' \code{list(BM="default", OU = "default", White = "all")}. The element value
#' specifies which one of `PCMListParameterizations` or
#' `PCMListDefaultParameterizations` should be used:
#' \itemize{
#' \item{"all"}{for calling `PCMListParameterizations`}
#' \item{"default"}{for calling `PCMListDefaultParameterizations`}
#' }
#' @param sourceFile NULL or a character string indicating a .R filename, to
#' which the automatically generated code will be saved. If NULL (the default),
#' the generated source code is evaluated and the S3 methods are defined in the
#' global environment. Default: NULL.
#' @return This function has side effects only and does not return a value.
#' @seealso PCMListDefaultParameterizations
#' @export
PCMGenerateModelTypes <- function(
  baseTypes = list(BM="default", OU = "default", White = "all"),
  sourceFile = NULL) {

  if(!is.list(baseTypes)) {
    stop(paste0(
      "PCMGenerateModelTypes:: baseTypes should be a list with ",
      'with character string elements among c("default", "all") and names ',
      'specifying base S3-class names for which the parametrizations ',
      '(sub-classes) will be generated.'))
  }

  if( !is.null(sourceFile) ) {
    write(paste0(
      "# This file was auto-generated through a call to ",
      "PCMGenerateModelTypes()\n" ,
      "# Do not edit by hand.\n\n"),
      file = sourceFile)
  }

  for(bt in names(baseTypes)) {
    o <- structure(0.0, class=bt)
    PCMGenerateParameterizations(
      o,
      listParameterizations = if(baseTypes[[bt]] == "all") {
        PCMListParameterizations(o)
      } else {
        PCMListDefaultParameterizations(o)
      },
      sourceFile = sourceFile)
  }
}

#' A vector of access-code strings to all members of a named list
#' @param l a named list object.
#' @param recursive logical indicating if list members should be gone through
#'  recursively. TRUE by default.
#' @param format a character string indicating the format for accessing a member.
#' Acceptable values are \code{c("$", "$'", '$"', '$`', "[['", '[["', '[[`')} of which
#' the first one is taken as default.
#' @return a vector of character strings denoting each named member of the list.
#' @examples
#' PCMListMembers(PCMBaseTestObjects$model_MixedGaussian_ab)
#' PCMListMembers(PCMBaseTestObjects$model_MixedGaussian_ab, format = '$`')
#' PCMListMembers(PCMBaseTestObjects$tree.ab, format = '$`')
#' @export
PCMListMembers <- function(
  l, recursive = TRUE, format = c("$", "$'", '$"', '$`', "[['", '[["', '[[`')) {

  openers = c("$", "$'", '$"', '$`', "[['", '[["', '[[`')
  closers <- structure(c("", "'", '"', '`', "']]", '"]]', '`]]'), names = openers)
  if(!format[[1L]] %in% openers) {
    stop("PCMListMembers::The argument format should be one of ",
         toString(openers), ".\n")
  }
  op <- format[[1L]]
  cl <- closers[format[[1]]]

  ListMembersInternal <- function(ll) {
    res <- character(0L)

    if(is.list(ll)) {
      for(n in names(ll)) {
        res <- c(res, paste0(op, n, cl))
        if(recursive && is.list(ll[[n]])) {
          res <- c(res, paste0(op, n, cl, ListMembersInternal(ll[[n]])))
        }
      }
    }
    res
  }

  ListMembersInternal(l)
}

#' Find the members in a list matching a member expression
#'
#' @param object a list containing named elements.
#' @param member a member expression. Member expressions are character strings
#' denoting named elements in a list object (see examples).
#' @param enclos a character string containing the special symbol '?'. This
#' symbol is to be replaced by matching expressions. The result of this
#' substitution can be anything but, usually would be a valid R expression.
#' Default: "?".
#' @param q a quote symbol, Default: \code{"'"}.
#' @param ... additional arguments passed to \code{\link{grep}}. For example,
#' these could be \code{ignore.case=TRUE} or \code{perl=TRUE}.
#' @return a named character vector, with names corresponding to the matched
#' member quoted expressions (using the argument \code{q} as a quote symbol),
#' and values corresponding to the '\code{enclos}-ed' expressions after
#' substituting the '?'.
#' @seealso \code{\link{PCMListMembers}}
#' @examples
#' model <- PCMBaseTestObjects$model_MixedGaussian_ab
#' MatchListMembers(model, "Sigma_x", "diag(model?[,,1L])")
#' MatchListMembers(model, "S.*_x", "diag(model?[,,1L])")
#' MatchListMembers(model, "Sigma_x", "model?[,,1L][upper.tri(model?[,,1L])]")
#' MatchListMembers(model, "a$Sigma_x", "model?[,,1L][upper.tri(model?[,,1L])]")
#'
#' @export
MatchListMembers <- function(object, member, enclos = "?", q = "'", ...) {
  if(member != "") {
    member <- gsub('[[', '$', member, fixed = TRUE)
    member <- gsub(']]', '', member, fixed = TRUE)
    member <- gsub("'", "", member, fixed = TRUE)
    member <- gsub('`', "", member, fixed = TRUE)
    member <- gsub('"', "", member, fixed = TRUE)
    ms <- lapply(
      strsplit(member, split = "$", fixed = TRUE)[[1L]],
      function(s) if(!s=="") paste0(q, s, q) else "")
    ms$sep <- "\\$"
    member <- do.call(paste, ms)
    ms <- grep(member, x = PCMListMembers(object, format = paste0("$", q)), value = TRUE, ...)
    sapply(ms, function(m) gsub('?', m, enclos, fixed = TRUE))
  } else {
    character(0L)
  }
}

#' Value of an attribute of an object or values for an attribute found in its members
#' @param name attribute name.
#' @param object a PCM model object or a PCMTree object.
#' @param member a member expression. Member expressions are character strings
#' denoting named elements in a list object (see examples). Default: "".
#' @param ... additional arguments passed to \code{\link{MatchListMembers}}.
#' @return if member is an empty string, \code{attr(object, name)}. Otherwise, a named list
#' containing the value for the attribute for each member in \code{object}
#' matched by \code{member}.
#' @examples
#' PCMGetAttribute("class", PCMBaseTestObjects$model_MixedGaussian_ab)
#' PCMGetAttribute(
#'   "dim", PCMBaseTestObjects$model_MixedGaussian_ab,
#'   member = "$Sigmae_x")
#' @export
PCMGetAttribute <- function(name, object, member = "", ...) {
  if(member == "") {
    attr(object, name)
  } else {
    members <- MatchListMembers(object, member, ...)
    if(length(members) == 0L) {
      stop("PCMGetAttribute::Could not match the member expression.")
    } else {
      res <- list()
      for(m in members) {
        res[[m]] <- eval(parse(text = paste0("attr(object", m, ", '", name, "')")))
      }
      res
    }
  }
}

#' Set an attribute of a named member in a PCM or other named list object
#' @param name a character string denoting the attribute name.
#' @param value the value for the attribute.
#' @param object a PCM or a list object.
#' @param member a member expression. Member expressions are character strings
#' denoting named elements in a list object (see examples). Default: "".
#' @param spec a logical (TRUE by default) indicating if the attribute should
#' also be set in the corresponding member of the spec attribute (this is for
#' PCM objects only).
#' @param inplace logical (TRUE by default) indicating if the attribute should
#'  be set to the object in the current environment, or a modified object should
#'  be returned.
#' @param ... additional arguments passed to \code{\link{MatchListMembers}}.
#' @return if inplace is TRUE (default) nothing is returned. Otherwise, a
#' modified version of object is returned.
#' @details Calling this function can affect the attributes of multiple members
#' matched by the \code{member} argument.
#' @examples
#' model <- PCMBaseTestObjects$model_MixedGaussian_ab
#' PCMSetAttribute("class", c("MatrixParameter", "_Fixed"), model, "H")
#' @export
PCMSetAttribute <- function(
  name, value, object, member = "", spec = TRUE, inplace = TRUE, ...) {

  if(member == "") {
    if(inplace) {
      eval(substitute(attr(object, name) <- value), parent.frame())
      if(is.PCM(object) && spec) {
        eval(substitute(
          attr(attr(object, "spec"), name) <- value),
          parent.frame())
      }
    } else {
      attr(object, name) <- value
      if(is.PCM(object) && spec) {
        attr(attr(object, "spec"), name) <- value
      }
      object
    }
  } else {
    members <- MatchListMembers(object, member, q = "", ...)
    if(length(members) == 0L) {
      warning("PCMSetAttribute::Could not match the member expression.")
    } else {
      if(inplace) {
          for(m in members) {
            ms <- strsplit(m, split = "$", fixed = TRUE)[[1L]][-1L]
            if(length(ms) == 1L) {
              eval(substitute(
                attr(object[[ms[1]]], name) <- value),
                parent.frame())
              if(is.PCM(object) && spec) {
                eval(substitute(
                  attr(attr(object, "spec")[[ms[1]]], name) <- value),
                  parent.frame())
              }
            } else if(length(ms) == 2L) {
              eval(substitute(
                attr(object[[ms[1]]][[ms[2]]], name) <- value),
                parent.frame())
              if(is.PCM(object) && spec) {
                eval(substitute(
                  attr(attr(object, "spec")[[ms[1]]][[ms[2]]], name) <- value),
                  parent.frame())
              }
            } else if(length(ms) == 3L) {
              eval(substitute(
                attr(object[[ms[1L]]][[ms[2L]]][[ms[3L]]], name) <- value),
                parent.frame())
              if(is.PCM(object) && spec) {
                eval(substitute(
                  attr(attr(object, "spec")[[ms[1]]][[ms[2]]][[ms[3L]]], name) <- value),
                  parent.frame())
              }
            } else {
              stop("PCMSetAttribute:: Nested levels beyond 3 are not supported")
            }
          }
      } else {
        for(m in members) {
          for(m in members) {
            ms <- strsplit(m, split = "$", fixed = TRUE)[[1L]][-1L]
            if(length(ms) == 1L) {
              attr(object[[ms[1]]], name) <- value
            } else if(length(ms) == 2L) {
              attr(object[[ms[1]]][[ms[2]]], name) <- value
            } else if(length(ms) == 3L) {
              attr(object[[ms[1L]]][[ms[2L]]][[ms[3L]]], name) <- value
            } else {
              stop("PCMSetAttribute:: Nested levels beyond 3 are not supported")
            }
          }
        }
        object
      }
    }
  }
}

#' Add a value to a list-valued attribute of a member or members matching a pattern
#' @inheritParams PCMSetAttribute
#' @param enclos a character string containing the special symbol '?'. This
#' symbol is to be replaced by matching expressions. The result of this
#' substitution can be anything but, usually would be a valid R expression.
#' Default: "?".
#' @return if \code{inplace} is \code{TRUE} no value is returned. Otherwise, a
#' modified version of \code{object} is returned.
#' @export
PCMAddToListAttribute <- function(
  name, value, object, member = "", enclos = "?", spec = TRUE, inplace = TRUE, ...) {

  value$enclos <- enclos
  if(is.null(member) || member == "") {
    values <- PCMGetAttribute(name, object)
    if(is.null(values)) {
      values <- list()
    } else {
      values <- list(values)
    }
    values[[length(values) + 1L]] <- value

    if(inplace) {
      eval(substitute(PCMSetAttribute(
        name = name, value = values, object = object, spec = spec)),
        parent.frame())
    } else {
      PCMSetAttribute(
        name = name, value = values, object = object, spec = spec)
      object
    }
  } else {
    membersEnclos <- MatchListMembers(object, member, enclos = enclos, ...)

    if(length(membersEnclos) == 0L) {
      warning(
        "PCMAddAttribute:: no members matched by member argument (",
        member, "), so nothing to do.")
    } else {
      for(i in seq_along(membersEnclos)) {
        m <- names(membersEnclos)[i]
        mE <- unname(membersEnclos[i])

        # PCMGetAttribute returns an empty list or a named list with 1 element
        # corresponding to the list attribute for member m.
        values <- PCMGetAttribute(name, object, m)
        if(length(values) == 0L) {
          # empty list
          values <- list(value)
        } else {
          # a list with 1 element
          values <- c(values[[1L]], list(value))
        }

        if(inplace) {
          eval(substitute(PCMSetAttribute(
            name = name, value = values, object = object, member = m, spec = spec)),
            parent.frame())
        } else {
          PCMSetAttribute(
            name = name, value = values, object = object, member = m, spec = spec)
          object
        }
      }
    }
  }
}

#' Combine all member attributes of a given name into a list
#' @param object a named list object.
#' @param name a character string denoting the name of the attribute.
#' @return a list of attribute values
#' @export
PCMCombineListAttribute <- function(object, name) {
  attrs <- list()
  if(is.list(attr(object, name, exact = TRUE))) {
    attrs <- attr(object, name, exact = TRUE)
    for(i in seq_along(attrs)) {
      if(!is.null(attrs[[i]]$enclos)) {
        attrs[[i]]$pos <- which(!is.na(
          PCMParamLocateInShortVector(object, "", attrs[[i]]$enclos)))
      }
      attrs[[i]]$member <- ""
    }
  }
  members <- PCMListMembers(object, recursive = TRUE, format = "$`")
  for(m in members) {
    memberAttrs <- eval(parse(text = paste0(
      "attr(object", m, ", '", name, "', exact = TRUE)")))

    if(is.list(memberAttrs)) {
      # update 'pos' fields in memberAttrs based on m and their 'enclos' fields.
      for(i in seq_along(memberAttrs)) {
        memberAttrs[[i]]$pos <- which(!is.na(
          PCMParamLocateInShortVector(object, m, memberAttrs[[i]]$enclos)))
        memberAttrs[[i]]$member <- m
      }
      attrs <- c(attrs, memberAttrs)
    }
  }
  attrs
}

#' Fix a parameter in a PCM model
#' @param model a PCM object
#' @param name a character string
#' @return a copy of the model with added class '_Fixed' to the class of the
#' parameter \code{name}
#' @export
PCMFixParameter <- function(model, name) {
  modelNew <- model
  if(!is.Fixed(modelNew[[name]])) {
    class(modelNew[[name]]) <- c(class(modelNew[[name]]), "_Fixed")
    class(attr(modelNew, "spec")[[name]]) <- c(class(attr(modelNew, "spec")[[name]]), "_Fixed")
  }
  modelNew
}

#' Unfix a parameter in a PCM model
#' @param model a PCM object
#' @param name a character string
#' @return a copy of the model with removed class '_Fixed' from the class of the
#' parameter \code{name}
#' @export
PCMUnfixParameter <- function(model, name) {
  modelNew <- model
  if(is.Fixed(modelNew[[name]])) {
    class(modelNew[[name]]) <- class(modelNew[[name]])[!class(modelNew[[name]])=="_Fixed"]
    class(attr(modelNew, "spec")[[name]]) <-
      class(attr(modelNew, "spec")[[name]])[!class(attr(modelNew, "spec")[[name]])=="_Fixed"]
  }
  modelNew
}



#' Number of traits modeled by a PCM
#' @param model a PCM object or an a parameter object (the name of this argument
#' could be misleading, because both, model and parameter objects are supported).
#' @return an integer
#' @export
PCMNumTraits <- function(model) {
  UseMethod("PCMNumTraits", model)
}

#' @export
PCMNumTraits.PCM <- function(model) {
  as.integer(attr(model, "k", exact = TRUE))
}

#' Get the regimes (aka colors) of a PCM or of a PCMTree object
#' @param obj a PCM or a PCMTree object
#' @return a character or an integer vector giving the regime names in the obj
#' @export
PCMRegimes <- function(obj) {
  UseMethod("PCMRegimes", obj)
}

#' @export
PCMRegimes.PCM <- function(obj) {
  attr(obj, "regimes", exact = TRUE)
}


#' Number of regimes in a obj
#' @param obj a PCM object
#' @return an integer
#' @export
PCMNumRegimes <- function(obj) {
  UseMethod("PCMNumRegimes", obj)
}

#' @export
PCMNumRegimes.PCM <- function(obj) {
  length(PCMRegimes(obj))
}

#' Get the model type(s) of a model
#'
#' @description For a regular PCM object, the model type is its S3 class. For a
#' MixedGaussian each regime is mapped to one of several possible model types.
#'
#' @param obj a PCM object
#' @return a character vector
#' @export
PCMModelTypes <- function(obj) {
  UseMethod("PCMModelTypes", obj)
}

#' @export
PCMModelTypes.PCM <- function(obj) {
  class(obj)[1]
}


#' Integer vector giving the model type index for each regime
#' @param model a PCM model
#' @param tree a phylo object with an edge.part member
#' @param ... additional parameters passed to methods
#' @return an integer vector with elements corresponding to the elements in
#' \code{PCMTreeGetPartNames(tree)}
#' @details This is a generic S3 method. The default implementation for the basic
#' class PCM returns a vector of 1's, because it assumes that a single model type
#' is associated with each regime. The implementation for mixed Gaussian models
#' returns the mapping attribute of the MixedGaussian object reordered to
#' correspond to \code{PCMTreeGetPartNames(tree)}.
#' @export
PCMMapModelTypesToRegimes <- function(model, tree, ...) {
  UseMethod("PCMMapModelTypesToRegimes", model)
}

#' @export
PCMMapModelTypesToRegimes.PCM <- function(model, tree, ...) {
  uniqueRegimes <- PCMRegimes(tree)
  res <- rep(1, length(uniqueRegimes))
  names(res) <- as.character(uniqueRegimes)
  res
}


#' Get a vector of all parameters (real and discrete) describing a model on a
#' tree including the numerical parameters of each model regime, the integer ids
#' of the splitting nodes defining the regimes on the tree and the integer ids of
#' the model types associated with each regime.
#'
#' @details This is an S3 generic.
#' In the default implementation, the last entry in the returned vector is the
#' number of numerical parameters. This is used to identify the starting positions
#' in the vector of the first splitting node.
#'
#' @param model a PCM model
#' @param tree a phylo object with an edge.part member.
#' @param ... additional parameters passed to methods.
#' @return a numeric vector concatenating the result
#' @export
PCMGetVecParamsRegimesAndModels <- function(model, tree, ...) {
  UseMethod("PCMGetVecParamsRegimesAndModels", model)
}

#' @export
PCMGetVecParamsRegimesAndModels.PCM <- function(model, tree, ...) {
  numericParams <- PCMParamGetShortVector(model)
  startingNodesRegimes <- PCMTreeGetPartition(tree)
  models <- PCMMapModelTypesToRegimes(model, tree, ...)
  c(numericParams, startingNodesRegimes, models)
}


#' Map a parametrization to its original form.
#' @description This is an S3 generic that transforms the passed argument by
#' applying the transformation rules for its S3 class.
#' @param o a PCM object or a parameter
#' @param ... additional arguments that can be used by implementing methods.
#'
#' @return a transformed version of o.
#'
#' @description This is an S3 generic. See `PCMApplyTransformation._CholeskyFactor`
#' for an example.
#'
#' @details This function returns the same object if it is not transformable.
#'
#' @seealso \code{\link{is.Transformable}}
#'
#' @export
PCMApplyTransformation <- function(o, ...) {
  if(is.Transformable(o)) {
    # this if-statement prevents a transformation when it is not needed, e.g.
    # in the case of a parameter, which is a _CholeskyFactor but should not be
    # converted into a positive definite matrix, either, because this is not needed,
    # i.e. the parameter is defined as an upper triangular matrix with a non-negative
    # diagonal, or because the conversion is done at a lower level.
    UseMethod("PCMApplyTransformation", o)
  } else {
    o
  }
}

#' @export
PCMApplyTransformation.default <- function(o, ...) {
  # do nothing (act as an identity function)
  o
}

#' @export
PCMApplyTransformation.PCM <- function(o, ...) {
  if(is.Transformable(o)) {

    # Previously, the following call was used but was too slow, particularly
    # in the case of MixedGaussian models, because of their deeper nested
    # structure:
    # PCMParamSetByName(o, lapply(o, PCMApplyTransformation, ...), replaceWholeParameters = TRUE)
    # The for loop below is faster, but does no checks on the transformed object:
    for(i in seq_along(o)) {
      o[[i]] <- PCMApplyTransformation(o[[i]], ...)
    }
    classes <- class(o)
    classes <- classes[classes != "_Transformable"]
    classes <- c(classes, "_Transformed")

    class(o) <- classes
    o
  } else {
    o
  }
}

#' Conditional distribution of a daughter node given its parent node
#' @description An S3 generic function that has to be implemented for every
#'  model class.
#' @inheritParams PCMLik
#' @param r an integer specifying a model regime
#' @return an object of type specific to the type of model
#' @export
PCMCond <- function(tree, model, r=1, metaI=PCMInfo(NULL, tree, model, verbose = verbose), verbose = FALSE) {
  UseMethod("PCMCond", model)
}

#' Expected mean vector at each tip conditioned on a trait-value vector at the root
#' @inheritParams PCMLik
#' @param X0 a k-vector denoting the root trait
#' @param internal a logical indicating ig the per-node mean vectors should be returned (see Value). Default FALSE.
#'
#' @return If internal is FALSE (default), then a k x N matrix Mu, such that \code{Mu[, i]} equals the expected mean k-vector
#' at tip i, conditioned on \code{X0} and the tree. Otherwise, a k x M matrix Mu containing the mean vector for each node.
#' @export
#' @examples
#' # a Brownian motion model with one regime
#' modelBM <- PCM(model = "BM", k = 2)
#' # print the model
#' modelBM
#' # assign the model parameters at random: this will use uniform distribution
#' # with boundaries specified by PCMParamLowerLimit and PCMParamUpperLimit
#' # We do this in two steps:
#' # 1. First we generate a random vector. Note the length of the vector equals PCMParamCount(modelBM)
#' randomParams <- PCMParamRandomVecParams(modelBM, PCMNumTraits(modelBM), PCMNumRegimes(modelBM))
#' randomParams
#' # 2. Then we load this random vector into the model.
#' PCMParamLoadOrStore(modelBM, randomParams, 0, PCMNumTraits(modelBM), PCMNumRegimes(modelBM), TRUE)
#'
#' # create a random tree of 10 tips
#' tree <- ape::rtree(10)
#' PCMMean(tree, modelBM)
PCMMean <- function(tree, model, X0 = model$X0, metaI=PCMInfo(NULL, tree, model, verbose = verbose), internal = FALSE, verbose = FALSE)  {
  UseMethod("PCMMean", model)
}

#' Calculate the mean at time t, given X0, under a PCM model
#' @param t positive numeric denoting time
#' @param model a PCM model object
#' @param X0 a numeric vector of length k, where k is the number of traits in the model (Defaults to model$X0).
#' @param regime an integer or a character denoting the regime in model for
#' which to do the calculation; Defaults to PCMRegimes(model)[1L], meaning the
#' first regime in the model.
#' @param verbose a logical indicating if (debug) messages should be written on the console (Defaults to FALSE).
#' @return A numeric vector of length k
#' @export
#' @examples
#' # a Brownian motion model with one regime
#' modelBM <- PCM(model = "BM", k = 2)
#' # print the model
#' modelBM
#' # assign the model parameters at random: this will use uniform distribution
#' # with boundaries specified by PCMParamLowerLimit and PCMParamUpperLimit
#' # We do this in two steps:
#' # 1. First we generate a random vector. Note the length of the vector equals PCMParamCount(modelBM)
#' randomParams <- PCMParamRandomVecParams(modelBM, PCMNumTraits(modelBM), PCMNumRegimes(modelBM))
#' randomParams
#' # 2. Then we load this random vector into the model.
#' PCMParamLoadOrStore(modelBM, randomParams, 0, PCMNumTraits(modelBM), PCMNumRegimes(modelBM), TRUE)
#'
#' # PCMMeanAtTime(1, modelBM)
#'
#' # note that the variance at time 0 is not the 0 matrix because the model has a non-zero
#' # environmental deviation
#' PCMMeanAtTime(0, modelBM)
PCMMeanAtTime <- function(
  t, model, X0 = model$X0, regime = PCMRegimes(model)[1L], verbose = FALSE) {
  if(! (regime %in% PCMRegimes(model)) ) {
    stop("PCMMeanAtTime:: regime should be among PCMRegimes(model).")
  }

  cherry <- list(tip.label = c("t1", "t2"),
                 edge = rbind(c(3L, 1L), c(3L, 2L)),
                 edge.length = rep(t, 2),
                 Nnode = 1L)
  class(cherry) <- "phylo"
  cherry <- PCMTree(cherry)
  PCMTreeSetPartRegimes(cherry, c(`3` = regime[1]))

  metaI <- PCMInfo(X = NULL, tree = cherry, model = model, verbose = verbose)

  MeanCherry <- PCMMean(
    tree = cherry,
    model = model,
    X0 = X0,
    metaI = metaI,
    verbose = verbose)

  MeanCherry[, 1L]
}


#' Expected variance-covariance matrix for each couple of tips (i,j)
#' @inheritParams PCMLik
#' @param W0 a numeric matrix denoting the initial k x k variance covariance matrix at the
#'  root (default is the k x k zero matrix).
#' @param internal a logical indicating if the per-node variance-covariances matrices for
#' the internal nodes should be returned (see Value). Default FALSE.
#' @param diagOnly a logical indicating if only the variance
#' blocks for the nodes should be calculated. By default this is set to FALSE,
#' meaning that the co-variances are calculated for all couples of nodes.
#' @return If internal is FALSE, a (k x N) x (k x N) matrix W, such that k x k block
#' \code{W[((i-1)*k)+(1:k), ((j-1)*k)+(1:k)]} equals the expected
#' covariance matrix between tips i and j. Otherwise, a list with an element 'W' as described above and
#' a k x M matrix element 'Wii' containing the per-node variance covariance matrix for each node:
#' The k x k block \code{Wii[, (i-1)*k + (1:k)]} represents the variance covariance matrix for node i.
#' @export
#' @examples
#' # a Brownian motion model with one regime
#' modelBM <- PCM(model = "BM", k = 2)
#' # print the model
#' modelBM
#' # assign the model parameters at random: this will use uniform distribution
#' # with boundaries specified by PCMParamLowerLimit and PCMParamUpperLimit
#' # We do this in two steps:
#' # 1. First we generate a random vector. Note the length of the vector equals PCMParamCount(modelBM)
#' randomParams <- PCMParamRandomVecParams(modelBM, PCMNumTraits(modelBM), PCMNumRegimes(modelBM))
#' randomParams
#' # 2. Then we load this random vector into the model.
#' PCMParamLoadOrStore(modelBM, randomParams, 0, PCMNumTraits(modelBM), PCMNumRegimes(modelBM), TRUE)
#'
#' # create a random tree of 10 tips
#' tree <- ape::rtree(10)
#' covMat <- PCMVar(tree, modelBM)
PCMVar <- function(
  tree, model,
  W0 = matrix(0.0, PCMNumTraits(model), PCMNumTraits(model)),
  SE = matrix(0.0, PCMNumTraits(model), PCMTreeNumTips(tree)),
  metaI=PCMInfo(NULL, tree, model, verbose = verbose),
  internal = FALSE, diagOnly = FALSE, verbose = FALSE)  {
  UseMethod("PCMVar", model)
}

#' Calculate the variance covariance k x k matrix at time t, under a PCM model
#' @param t positive numeric denoting time
#' @param model a PCM model object
#' @param W0 a numeric matrix denoting the initial k x k variance covariance matrix at the
#'  root (default is the k x k zero matrix).
#' @param SE a k x k matrix specifying the upper triangular factor of
#' the measurement error variance-covariance matrix. The product
#' SE %*% t(SE) is added to the variance calculated from the model.
#' Default: SE = matrix(0.0, PCMNumTraits(model), PCMNumTraits(model)).
#' @param regime an integer or a character denoting the regime in model for
#' which to do the calculation; Defaults to PCMRegimes(model)[1L], meaning the
#' first regime in the model.
#' @param verbose a logical indicating if (debug) messages should be written on the console (Defaults to FALSE).
#' @return A numeric k x k matrix
#' @export
#' @examples
#' # a Brownian motion model with one regime
#' modelBM <- PCM(model = "BM", k = 2)
#' # print the model
#' modelBM
#' # assign the model parameters at random: this will use uniform distribution
#' # with boundaries specified by PCMParamLowerLimit and PCMParamUpperLimit
#' # We do this in two steps:
#' # 1. First we generate a random vector. Note the length of the vector equals PCMParamCount(modelBM)
#' randomParams <- PCMParamRandomVecParams(modelBM, PCMNumTraits(modelBM), PCMNumRegimes(modelBM))
#' randomParams
#' # 2. Then we load this random vector into the model.
#' PCMParamLoadOrStore(modelBM, randomParams, 0, PCMNumTraits(modelBM), PCMNumRegimes(modelBM), TRUE)
#'
#' # PCMVarAtTime(1, modelBM)
#'
#' # note that the variance at time 0 is not the 0 matrix because the model has a non-zero
#' # environmental deviation
#' PCMVarAtTime(0, modelBM)
PCMVarAtTime <- function(
  t, model,
  W0 =  matrix(0.0, PCMNumTraits(model), PCMNumTraits(model)),
  SE = matrix(0.0, PCMNumTraits(model), PCMNumTraits(model)),
  regime = PCMRegimes(model)[1L], verbose = FALSE) {

  if(! (regime %in% PCMRegimes(model)) ) {
    stop("PCMVarAtTime:: regime should be among PCMRegimes(model).")
  }

  cherry <- list(tip.label = c("t1", "t2"),
                 edge = rbind(c(3L, 1L), c(3L, 2L)),
                 edge.length = rep(t, 2),
                 Nnode = 1L)
  class(cherry) <- "phylo"
  cherry <- PCMTree(cherry)
  PCMTreeSetPartRegimes(cherry, c(`3` = regime[1]))

  metaI <- PCMInfo(
    X = NULL, tree = cherry, model = model, verbose = verbose)

  VarCherry <- PCMVar(
    tree = cherry, model = model, W0 = W0,
    metaI = metaI, verbose = verbose)

  k <- PCMNumTraits(model)
  VarCherry[1:k, 1:k] + (SE %*% t(SE))
}


#' Generate a trajectory for the mean in one regime of a PCM
#'
#' @param model a PCM object.
#' @param regime a regime in `model`. Default is PCMRegimes(model)[1].
#' @param X0 a numeric vector specifying an initial point in the trait space.
#' Default is rep(0, PCMNumTraits(model))
#' @param W0 a numeric k x k symmetric positive definite matrix or 0 matrix,
#' specifying the initial variance covariance matrix at t0. By default, this is
#' a k x k 0 matrix.
#' @param tX,tVar numeric vectors of positive points in time sorted in
#' increasing order. tX specifies the points in time at which to calculate the
#' mean (conditional on X0). tVar specifies a subset of the points in tX at
#' which to generate random samples from the k-variate Gaussian distribution
#' with mean equal to the mean value at the corresponding time conditional on X0
#' and variance equal to the variance at this time, conditional on W0. Default
#' settings are `tX = seq(0, 100, by = 1)` and
#' `tVar = tX[seq(0, length(tX), length.out = 4)]`.
#' @param dims an integer vector specifying the traits for which samples at tVar
#' should be generated (see tX,tVar above).
#' Default: seq_len(PCMNumTraits(model)).
#' @param sizeSamp an integer specifying the number points in the random
#' samples (see tX and tVar above). Default 100.
#' @param doPlot2D Should a ggplot object be produced and returned. This is
#' possible only for two of the traits specified in dims.  Default: FALSE.
#' @param plot a ggplot object. This can be specified when doPlot2D is TRUE and
#' allows to add the plot of this trajectory as a layer in an existing ggplot.
#' Default: NULL
#'
#' @return if doPlot2D is TRUE, returns a ggplot. Otherwise a named list of two
#' elements:
#' \itemize{
#' \item{dt }{a data.table with columns 'regime', 't', 'X', 'V' and 'samp'. For
#' each row corresponding to time in tVar, the column samp represents a list of
#' sizeSamp k-vectors.}
#' \item{dtPlot }{a data.table with the same data as in dt, but with converted
#' columns X and samp into 2 x k columns denoted xi, i=1,...,k and xsi (i=1...k)
#' This is suitable for plotting with ggplot.}}
#' @importFrom ggplot2 ggplot scale_color_continuous geom_path aes stat_ellipse arrow
#' @importFrom data.table data.table rbindlist
#' @importFrom mvtnorm rmvnorm
#' @export
#' @examples
#' set.seed(1, kind = "Mersenne-Twister", normal.kind = "Inversion")
#'
#' # a Brownian motion model with one regime
#' modelOU <- PCM(model = PCMDefaultModelTypes()['F'], k = 2)
#'
#' # assign the model parameters at random: this will use uniform distribution
#' # with boundaries specified by PCMParamLowerLimit and PCMParamUpperLimit
#' # We do this in two steps:
#' # 1. First we generate a random vector. Note the length of the vector equals
#' # PCMParamCount(modelBM).
#'
#' randomParams <- PCMParamRandomVecParams(
#'   modelOU, PCMNumTraits(modelOU), PCMNumRegimes(modelOU))
#' # 2. Then we load this random vector into the model.
#' PCMParamLoadOrStore(
#'   modelOU,
#'   randomParams,
#'   0, PCMNumTraits(modelBM), PCMNumRegimes(modelBM), load = TRUE)
#'
#' # let's plot the trajectory of the model starting from X0 = c(0,0)
#' PCMTrajectory(
#'   model = modelOU,
#'   X0 = c(0, 0),
#'   doPlot2D = TRUE)
#'
#'
#' # A faceted grid of plots for the two regimes in a mixed model:
#' pla <- PCMTrajectory(
#'   model = PCMBaseTestObjects$model_MixedGaussian_ab, regime = "a",
#'   X0 = c(0, 0, 0),
#'   doPlot2D = TRUE) +
#'   ggplot2::scale_y_continuous(limits = c(0, 10)) +
#'   ggplot2::facet_grid(.~regime)
#'
#' plb <- PCMTrajectory(
#'   model = PCMBaseTestObjects$model_MixedGaussian_ab, regime = "b",
#'   X0 = c(0, 0, 0),
#'   doPlot2D = TRUE) +
#'   ggplot2::scale_y_continuous(limits = c(0, 10)) +
#'   ggplot2::facet_grid(.~regime) +
#'   ggplot2::theme(
#'     axis.title.y = ggplot2::element_blank(),
#'     axis.text.y = ggplot2::element_blank(),
#'     axis.ticks.y = ggplot2::element_blank())
#'
#' plot(pla)
#' plot(plb)
PCMTrajectory <- function(
  model,
  regime = PCMRegimes(model)[1],
  X0 = rep(0, PCMNumTraits(model)),
  W0 = matrix(0.0, nrow = PCMNumTraits(model), ncol = PCMNumTraits(model)),

  tX = seq(0, 100, by = 1),
  tVar = tX[seq(0, length(tX), length.out = 4)],
  dims = seq_len(PCMNumTraits(model)),
  sizeSamp = 100,
  doPlot2D = FALSE,
  plot = NULL) {

  dt <- data.table(
    regime = regime,
    t = tX,
    X = list(NULL),
    V = list(NULL),
    samp = list(NULL))

  for(i in seq_len(nrow(dt))) {
    dt[i, c("regime", "X", "V", "samp"):={
      X <- PCMMeanAtTime(
        t = t,
        model = model,
        X0 = X0,
        regime = regime)
      V = PCMVarAtTime(
        t = t,
        W0 = W0,
        model = model,
        regime = regime)
      if(t %in% tVar) {
        samp <- rmvnorm(sizeSamp, sigma = V)
      } else {
        samp <- NULL
      }
      list(regime = regime, X = list(X), V = list(V), samp = list(samp))
    }]
  }

  dtPlot <- rbindlist(lapply(seq_len(nrow(dt)), function(i) {
    dt[i,
       c(list(t = t, regime = regime),
         lapply(dims, function(d) {
           X[[1]][d]
         }),
         lapply(dims, function(d) {
           if(is.null(samp[[1]])) {
             NA_real_
           } else {
             samp[[1]][, d] + X[[1]][d]
           }
         }))]
  }))
  names(dtPlot) <- c("t", "regime", paste0("x", dims), paste0("xs", dims))

  # avoid warning from r check
  x1 <- x2 <- xs1 <- xs2 <- NULL

  if(doPlot2D) {
    if(is.null(plot)) {
      pl <- ggplot(NULL)
    } else {
      pl <- plot
    }
    pl <- pl + geom_path(
      data = dtPlot,
      mapping = aes(x = x1, y = x2))
    pl <- pl +
      stat_ellipse(
        data = dtPlot,
        aes(x = xs1, y = xs2, group = factor(t)),
        type="norm")
    pl
  } else {
    list(dt = dt, dtPlot = dtPlot)
  }
}

#' Calculate the likelihood of a model using the standard formula for multivariate pdf
#' @inheritParams PCMLik
#' @return a numerical value with named attributes as follows:
#' @importFrom mvtnorm dmvnorm
#' @export
PCMLikDmvNorm <- function(
  X, tree, model,
  SE = matrix(0.0, PCMNumTraits(model), PCMTreeNumTips(tree)),
  metaI = PCMInfo(X, tree, model, SE, verbose = verbose),
  log = TRUE,
  verbose = FALSE) {

  dmvnorm(as.vector(X[, 1:PCMTreeNumTips(tree)]),
          as.vector(PCMMean(tree, model, model$X0, metaI = metaI)),
          PCMVar(tree, model, metaI = metaI), log = log)
}


#' Simulation of a phylogenetic comparative model on a tree
#'
#' @description Generate trait data on a tree according to a multivariate stochastic
#' model with one or several regimes
#'
#' @param X0 a numeric vector of length k (the number of traits) specifying the
#' trait values at the root of the tree.
#' @param tree a phylo object specifying a rooted tree.
#' @param model an S3 object specifying the model (see Details).
#' @param SE a k x N matrix specifying the standard error for each measurement in
#' X. Alternatively, a k x k x N cube specifying an upper triangular k x k
#' factor of the variance covariance matrix for the measurement error
#' for each node i=1, ..., N.
#' Default: \code{matrix(0.0, PCMNumTraits(model), PCMTreeNumTips(tree))}.
#' @param metaI a named list containing meta-information about the data and the
#' model.
#' @param verbose a logical indicating if informative messages should be written
#' during execution.
#'
#' @details Internally, this function uses the \code{\link{PCMCond}} implementation
#'  for the given model class.
#'
#' @return numeric M x k matrix of values at all nodes of the tree, i.e. root,
#' internal and tip, where M is the number of nodes: \code{M=dim(tree$edge)[1]+1},
#' with indices from 1 to N=length(tree$tip.label) corresponding to tips, N+1
#' corresponding to the root and bigger than N+1 corresponding to internal nodes.
#' The function will fail in case that the length of the argument vector X0 differs
#' from the number of traits specified in \code{metaI$k}. Error message:
#' "PCMSim:: X0 must be of length ...".
#'
#' @importFrom mvtnorm rmvnorm
#' @seealso \code{\link{PCMLik}} \code{\link{PCMInfo}} \code{\link{PCMCond}}
#' @examples
#' N <- 10
#' L <- 100.0
#' tr <- ape::stree(N)
#' tr$edge.length <- rep(L, N)
#' for(epoch in seq(1, L, by = 1.0)) {
#'   tr <- PCMTreeInsertSingletonsAtEpoch(tr, epoch)
#' }
#'
#' model <- PCMBaseTestObjects$model_MixedGaussian_ab
#'
#' PCMTreeSetPartRegimes(tr, c(`11` = 'a'), setPartition = TRUE)
#'
#' set.seed(1, kind = "Mersenne-Twister", normal.kind = "Inversion")
#' X <- PCMSim(tr, model, X0 = rep(0, 3))
#'
#'
#' @export
PCMSim <- function(
  tree, model, X0,
  SE = matrix(0.0, PCMNumTraits(model), PCMTreeNumTips(tree)),
  metaI = PCMInfo(
    X = NULL, tree = tree, model = model, SE = SE, verbose = verbose),
  verbose = FALSE) {

  UseMethod("PCMSim", model)
}

#' Likelihood of a multivariate Gaussian phylogenetic comparative model with non-interacting lineages
#'
#' @description The likelihood of a PCM represents the probability density function
#'   of observed trait values (data) at the tips of a tree given the tree and
#'   the model parameters. Seen as a function of the model parameters, the
#'   likelihood is used to fit the model to the observed trait data and the
#'   phylogenetic tree (which is typically inferred from another sort of data, such
#'   as an alignment of genetic sequences for the species at the tips of the tree).
#'   The \code{\link{PCMLik}} function
#'   provides a common interface for calculating the (log-)likelihood of different
#'   PCMs.
#'   Below we denote by N the number of tips, by M the total number of nodes in the
#'   tree including tips, internal and root node, and by k - the number of traits.
#'
#' @param X a \code{k x N} numerical matrix with possible \code{NA} and
#' \code{NaN} entries. For \code{i=1,..., N}, the column \code{i} of X contains
#' the measured trait values for species \code{i} (the tip with integer
#' identifier equal to \code{i} in \code{tree}). Missing values can be either
#' not-available (\code{NA}) or not existing (\code{NaN}). These two values are
#' treated differently when calculating likelihoods (see
#' \code{\link{PCMPresentCoordinates}}).
#' @param tree a phylo object with N tips.
#' @param model an S3 object specifying both, the model type (class, e.g. "OU") as
#'   well as the concrete model parameter values at which the likelihood is to be
#'   calculated (see also Details).
#' @param SE a k x N matrix specifying the standard error for each measurement in
#' X. Alternatively, a k x k x N cube specifying an upper triangular k x k
#' factor of the variance covariance matrix for the measurement error
#' for each tip \code{i=1, ..., N}. When \code{SE} is a matrix, the k x k
#' measurement error variance matrix for a tip \code{i} is calculated as
#' \code{VE[, , i] <- diag(SE[, i] * SE[, i], nrow = k)}. When \code{SE} is a
#' cube, the way how the measurement variance matrix for a tip \code{i} is
#' calculated depends on the runtime option \code{PCMBase.Transpose.Sigma_x}
#' as follows:
#' \describe{
#' \item{if \code{getOption("PCMBase.Transpose.Sigma_x", FALSE) == FALSE} (default): }{VE[, , i] <- SE[, , i] \%*\% t(SE[, , i])}
#' \item{if \code{getOption("PCMBase.Transpose.Sigma_x", FALSE) == TRUE}: }{VE[, , i] <- t(SE[, , i]) \%*\% SE[, , i]}
#' }
#' Note that the above behavior is consistent with the treatment of the model
#' parameters \code{Sigma_x}, \code{Sigmae_x} and \code{Sigmaj_x}, which are
#' also specified as upper triangular factors.
#' Default: \code{matrix(0.0, PCMNumTraits(model), PCMTreeNumTips(tree))}.
#' @param metaI a list returned from a call to \code{PCMInfo(X, tree, model, SE)},
#'   containing meta-data such as N, M and k. Alternatively, this can be a
#'   character string naming a function or a function object that returns such
#'   a list, e.g. the function\code{PCMInfo} or the function \code{PCMInfoCpp}
#'   from the \code{PCMBaseCpp} package.
#' @param log logical indicating whether a log-likelehood should be calculated. Default
#'  is TRUE.
#' @param verbose logical indicating if some debug-messages should printed.
#'
#' @return a numerical value with named attributes as follows:
#' \describe{
#' \item{X0}{A numerical vector of length k specifying the value at the root for which
#' the likelihood value was calculated. If the model contains a member called X0, this
#' vector is used; otherwise the value of X0 maximizing the likelihood for the given
#' model parameters is calculated by maximizing the quadratic polynomial
#' 'X0 * L_root * X0 + m_root * X0 + r_root';}
#' \item{error}{A character string with information if a numerical or other
#' logical error occurred during likelihood calculation.}
#' }
#' If an error occured during likelihood calculation, the default behavior is to
#'  return NA with a non-NULL error attribute. This behavior can be changed in
#'  using global options:
#'\describe{
#'  \item{"PCMBase.Value.NA"}{Allows to specify a different NA value such as \code{-Inf} or \code{-1e20} which can be used in combination with \code{log = TRUE} when
#'   using \code{optim} to maximize the log-likelihood;}
#'  \item{"PCMBase.Errors.As.Warnings"}{Setting this option to FALSE will cause any
#'  error to result in calling the \code{\link{stop}} R-base function. If not caught
#'  in a \code{\link{tryCatch}}, this will cause the inference procedure to abort at the occurence of a numerical error. By default, this option is set to TRUE, which
#'  means that \code{getOption("PCMBase.Value.NA", as.double(NA))} is returned with
#'  an error attribute and a warning is issued.}
#'}
#' @details For efficiency, the argument \code{metaI}
#'   can be provided explicitly, because this is not supposed to change during a
#'   model inference procedure such as likelihood maximization.
#'
#' @seealso \code{\link{PCMInfo}} \code{\link{PCMAbCdEf}} \code{\link{PCMLmr}} \code{\link{PCMSim}} \code{\link{PCMCond}}
#' @examples
#' N <- 10
#' tr <- PCMTree(ape::rtree(N))
#'
#' model <- PCMBaseTestObjects$model_MixedGaussian_ab
#'
#' PCMTreeSetPartRegimes(tr, c(`11` = 'a'), setPartition = TRUE)
#'
#' set.seed(1, kind = "Mersenne-Twister", normal.kind = "Inversion")
#' X <- PCMSim(tr, model, X0 = rep(0, 3))
#'
#' PCMLik(X, tr, model)
#' @export
PCMLik <- function(
  X, tree, model,
  SE = matrix(0.0, PCMNumTraits(model), PCMTreeNumTips(tree)),
  metaI = PCMInfo(
    X = X, tree = tree, model = model, SE = SE, verbose = verbose),
  log = TRUE,
  verbose = FALSE) {

  UseMethod("PCMLik", model)
}

#' Tracing the log-likelihood calculation of a model over each node of the tree
#'
#' @description This is an S3 generic function providing tracing information
#' for the likelihood calculation for a given tree, data and model parameters.
#' Useful for illustration or for debugging purpose.
#'
#' @inheritParams PCMLik
#'
#' @return The returned object will, in general, depend on the type of model and
#' the algorithm used for likelihood calculation. For a G_LInv model and
#' pruning-wise likelihood calculation, the returned object will be a data.table
#' with columns corresponding to the node-state variables, e.g. the quadratic
#' polynomial coefficients associated with each node in the tree.
#' @seealso \code{\link{PCMInfo}} \code{\link{PCMAbCdEf}} \code{\link{PCMLmr}} \code{\link{PCMSim}} \code{\link{PCMCond}} \code{\link{PCMParseErrorMessage}}
#' @export
PCMLikTrace <- function(
  X, tree, model,
  SE = matrix(0.0, PCMNumTraits(model), PCMTreeNumTips(tree)),
  metaI = PCMInfo(
    X = X, tree = tree, model = model, SE = SE, verbose = verbose),
  log = TRUE,
  verbose = FALSE) {

  UseMethod("PCMLikTrace", model)
}

#' @export
logLik.PCM <- function(object, ...) {
  if(!is.PCM(object)) {
    stop("logLik.PCM:: object must inherit from class PCM.")
  }

  X <- attr(object, "X", exact = TRUE)
  if( !is.matrix(X) ) {
    stop("logLik.PCM:: When calling logLik.PCM on a model object, it should have a k x N numeric matrix attribute called 'X'.")
  }
  SE <- attr(object, "SE", exact = TRUE)
  if( !(is.matrix(SE) || length(dim(SE)) == 3) ) {
    warning("logLik.PCM:: When calling logLik.PCM on a model object, it should have a (k x N) numeric matrix or a (k x k x N) numeric cube attribute called 'SE'. Setting SE to a (k x N) zero matrix.")
    SE <- X * 0.0
  }
  tree <- attr(object, "tree", exact = TRUE)
  if( !inherits(tree, "phylo") ) {
    stop("logLik.PCM:: When calling logLik.PCM on a model object should have an attribute called 'tree' of class phylo.")
  }

  if( is.character(attr(object, "PCMInfoFun", exact = TRUE)) ) {
    metaIFun <- try(
      eval(parse(text = attr(object, "PCMInfoFun", exact = TRUE))),
      silent = TRUE)
    if(!is.function(metaIFun)) {
      warning(paste0("logLik.PCM:: The expression ",
                     attr(object, "PCMInfoFun", exact = TRUE),
                     " does not evaluate to a function. Using PCMInfo"))
      metaIFun <- PCMInfo
    }
    value <- PCMLik(
      X, tree, object, SE, metaI = metaIFun(X, tree, object, SE), log = TRUE)
  } else if(is.function(attr(object, "PCMInfoFun", exact = TRUE)) ) {
    value <- PCMLik(
      X, tree, object, SE, metaI = attr(object, "PCMInfoFun", exact = TRUE)(
        X, tree, object, SE), log = TRUE)
  } else if(is.list(attr(object, "PCMInfoFun", exact = TRUE))) {
    # In this case, it is assumed that attr(object, "PCMInfoFun", exact = TRUE) is
    # the result from calling the PCMInfoFun on the model (object) X, tree and data.
    value <- PCMLik(
      X, tree, object, SE, metaI = attr(object, "PCMInfoFun", exact = TRUE), log = TRUE)
  } else {
    value <- PCMLik(X, tree, object, SE, log = TRUE)
  }

  attr(value, "df") <- PCMParamCount(object, countRegimeChanges = TRUE, countModelTypes = TRUE)
  attr(value, "nobs") <- PCMTreeNumTips(tree)

  value
}


#' Determine which traits are present (active) on each node of the tree
#'
#' @description For every node (root, internal or tip) in tree, build a logical
#' vector of length k with TRUE values for every present coordinate. Non-present
#' coordinates arize from NA-values in the trait data. These can occur in two cases:
#' \describe{
#' \item{Missing measurements for some traits at some tips:}{the present coordinates
#' are FALSE for the corresponding tip and trait, but are full for all traits
#' at all internal and root nodes.}
#' \item{non-existent traits for some species:}{the FALSE present coordinates
#' propagate towards the parent
#' nodes - an internal or root node will have a present coordinate set to FALSE
#' for a given trait, if all of its descendants have this coordinate set to FALSE.}
#' }
#' These two cases have different effect on the likelihood calculation: missing
#' measurements (NA) are integrated out at the parent nodes; while non-existent
#' traits (NaN) are treated as reduced dimensionality of the vector at the
#' parent node.
#'
#' @param X numeric k x N matrix of observed values, with possible NA entries. The
#' columns in X are in the order of tree$tip.label
#' @param tree a phylo object
#' @param metaI  The result of calling PCMInfo.
#' @return a k x M logical matrix. The function fails in case when all traits are NAs for some of the tips.
#' In that case an error message is issued
#' "PCMPresentCoordinates:: Some tips have 0 present coordinates. Consider
#' removing these tips.".
#' @seealso \code{\link{PCMLik}}
#' @export
PCMPresentCoordinates <- function(X, tree, metaI) {

  if(is.null(metaI)) {
    N <- PCMTreeNumTips(tree)
    M <- PCMTreeNumNodes(tree)
    k <- nrow(X)
    postorder <- PCMTreePostorder(tree)
  } else {
    N <- metaI$N
    M <- metaI$M
    k <- metaI$k
    postorder <- rev(metaI$preorder)
  }

  edge <- tree$edge

  if(is.null(X)) {
    pc <- matrix(TRUE, k, M)
  } else {
    pc <- matrix(FALSE, k, M)
    for(ei in postorder) {
      i <- edge[ei, 2]
      j <- edge[ei, 1]

      if(i <= N) {
        # all es pointing to tips
        # here we count NAs as present because they are integrated out at the
        # parent nodes.
        pc[, i] <- !is.nan(X[, i])
      } else {
        # edges pointing to internal nodes, for which all children nodes have
        # been visited here we do nothing.
      }

      #update parent pc
      pc[, j] <- pc[, j] | pc[, i]
    }
    # at the end correct the present coordinates for the traits at the tips which
    # are NA but not NaN: Both NAs and NaNs result in FALSE at a tip;
    # only coordinates for which all descending tips have NaN are FALSE at
    # internal nodes.
    pc[, seq_len(N)] <- !is.na(X[, seq_len(N)])

    if(any(rowSums(pc) == 0)) {
      stop("PCMPresentCoordinates:: Some tips have 0 present coordinates.
           Consider removing these tips.")
    }
  }
  pc
}

#' Meta-information about a tree and trait data associated with a PCM
#'
#' @description This function pre-processes the given tree and data in order to
#' create meta-information used during likelihood calculation.
#' @inheritParams PCMLik
#' @param preorder an integer vector of row-indices in tree$edge matrix as returned
#' by PCMTreePreorder. This can be given for performance speed-up when several
#' operations needing preorder are executed on the tree. Default : \code{NULL}.
#' @param ... additional arguments used by implementing methods.
#'
#' @return a named list with the following elements:
#' \item{X}{k x N matrix denoting the trait data;}
#' \item{VE}{k x k x N array denoting the measurement error variance covariance
#' matrix for each for each tip i = 1,...,N. See the parameter \code{SE} in
#' \code{\link{PCMLik}.}}
#' \item{M}{total number of nodes in the tree;}
#' \item{N}{number of tips;}
#' \item{k}{number of traits;}
#' \item{RTree}{number of parts on the tree (distinct elements of tree$edge.part);}
#' \item{RModel}{number of regimes in the model (elements of attr(model, regimes));}
#' \item{p}{number of free parameters describing the model;}
#' \item{r}{an integer vector corresponding to tree$edge with the regime for each
#' branch in tree;}
#' \item{xi}{an integer vector of 0's and 1's corresponding to the rows in tree$edge
#' indicating the presence of a jump at the corresponding branch;}
#' \item{pc}{a logical matrix of dimension k x M denoting the present coordinates
#' for each node; in special cases this matrix can be edited by hand after calling
#' PCMInfo and before passing the returned list to PCMLik. Otherwise, this matrix
#' can be calculated in a custom way by specifying the option PCMBase.PCMPresentCoordinatesFun.
#' See also \code{\link{PCMPresentCoordinates}} and \code{\link{PCMOptions}}. }
#' This list is passed to \code{\link{PCMLik}}.
#'
#' @export
PCMInfo <- function(
  X, tree, model,
  SE = matrix(0.0, PCMNumTraits(model), PCMTreeNumTips(tree)),
  verbose = FALSE, preorder = NULL, ...) {
  UseMethod("PCMInfo", model)
}

#' @export
PCMInfo.PCM <- function(
  X, tree, model,
  SE = matrix(0.0, PCMNumTraits(model), PCMTreeNumTips(tree)),
  verbose = FALSE, preorder = NULL, ...) {

  if(is.Transformable(model)) {
    model <- PCMApplyTransformation(model)
  }

  tree <- PCMTree(tree)

  if(is.null(preorder)) {
    preorder <- PCMTreePreorder(tree)
  }

  k <- PCMNumTraits(model)
  N <- PCMTreeNumTips(tree)
  M <- PCMTreeNumNodes(tree)

  VE <- array(0.0, dim = c(k, k, N))
  if(is.matrix(SE) && identical(unname(dim(SE)), c(k, N)) ) {
    # SE is k x N matrix
    for(i in seq_len(N)) {
      VE[, , i] <- diag(SE[, i]*SE[, i], nrow = k)
    }
  } else if(is.array(SE) && identical(unname(dim(SE)), c(k, k, N)) ) {
    # SE is k x k x N array
    if(getOption("PCMBase.Transpose.Sigma_x", FALSE)) {
      for(i in seq_len(N)) VE[, , i] <- t(SE[, , i]) %*% SE[, , i]
    } else {
      for(i in seq_len(N)) VE[, , i] <- SE[, , i] %*% t(SE[, , i])
    }

  } else {
    stop("PCMInfo:: SE should be a k x N matrix or a k x k x N array.")
  }

  res <- list(
    X = X,
    VE = VE,
    M = M,
    N = N,
    k = k,
    RTree = PCMTreeNumParts(tree),
    RModel = PCMNumRegimes(model),
    r = {
      regimeIndices <- match(PCMTreeGetRegimesForEdges(tree), PCMRegimes(model))
      if(any(is.na(regimeIndices))) {
        stop(paste0(
          "PCMInfo:: Some of the regimes for the edges in tree could not be",
          "matched with regimes in PCMRegimes(model)."))
      }
      regimeIndices
    },
    p = PCMParamCount(model),
    xi = PCMTreeJumps(tree),
    edge = tree$edge,
    edge.length = tree$edge.length,
    preorder = preorder
  )
  res <- c(res, PCMOptions())

  PCMPresentCoordinatesFun <- getOption(
    "PCMBase.PCMPresentCoordinatesFun", PCMPresentCoordinates)
  res$pc <- PCMPresentCoordinatesFun(X, tree, res)
  res$NA_double_ <- NA_real_

  res
}

#' Create a likelihood function of a numerical vector parameter
#' @inheritParams PCMLik
#' @param positiveValueGuard positive numerical value (default Inf), which
#' serves as a guard for numerical error. Values exceeding
#' this positiveGuard are most likely due to numerical error and
#' PCMOptions()$PCMBase.Value.NA is returned instead.
#' @return a function of a numerical vector parameter called p returning the
#' likelihood of X given the tree and the model with parameter values specified
#' by p.
#' @details It is possible to specify a function for the argument metaI. This
#' function should have three parameters (X, tree, model) and should return a
#' metaInfo object. (see \code{\link{PCMInfo}}).
#'
#' @export
PCMCreateLikelihood <- function(
  X, tree, model,
  SE = matrix(0.0, PCMNumTraits(model), PCMTreeNumTips(tree)),
  metaI = PCMInfo(X, tree, model, SE),
  positiveValueGuard = Inf) {

  metaI <- if(is.character(metaI)) {
    metaIFun <- try(eval(parse(text = metaI)), silent = TRUE)
    if(!is.function(metaIFun)) {
      warning(paste0("PCMCreateLikelihood:: The expression ", metaI,
                     " does not evaluate to a function. Using PCMInfo."))
      metaIFun <- PCMInfo
    }
    metaI <- metaIFun(X = X, tree = tree, model = model, SE = SE)
  } else if(is.function(metaI)) {
    metaI(X = X, tree = tree, model = model, SE = SE)
  } else {
    metaI
  }

  value.NA <- PCMOptions()$PCMBase.Value.NA

  function(p, log = TRUE) {
    PCMParamLoadOrStore(model, p, offset = 0L,
                        k = PCMNumTraits(model),
                        R = PCMNumRegimes(model),
                        load = TRUE)
    value <- PCMLik(X, tree, model, SE, metaI, log = log)
    if(is.na(value) || value > positiveValueGuard) {
      value <- value.NA
    }
    value
  }
}

#' Find the S3 method for a given PCM object or class-name and an S3 generic
#' @param x a character string denoting a PCM S3 class name (e.g. "OU"), or a
#' PCM object.
#' @param method a character string denoting the name of an S3 generic function.
#' Default: "PCMCond".
#' @return a function object corresponding to the S3 method found or an error is
#' raised if no such function is found for the specified object and method.
#'
#' @importFrom utils getS3method
#' @export
PCMFindMethod <- function(x, method = "PCMCond") {
  if(is.character(x)) {
    o <- try(PCM(x), silent = TRUE)
    if(inherits(o, "try-error")) {
      stop(
        paste0(
          "PCMFindMethod:: ", toString(o), " PCM constructor called on x",
          "='", x, "'")
      )
    }
  } else if(is.PCM(x)) {
    o <- x
  } else {
    stop(
      paste0(
        "PCMFindMethod:: x should be a character string denoting a",
        "PCM class name or a PCM object."))
  }

  cls <- c(class(o), 'default')
  results <- lapply(cls, function(y) try(getS3method(method, y), silent = TRUE))
  Find(function (x) class(x) != 'try-error', results)
}

#' Given a PCM or a parameter object, extract an analogical object for a subset
#' of the dimensions (traits) in the original object.
#'
#' @details This is an S3 generic
#' @param obj a PCM or a parameter object.
#' @param dims an integer vector; should be a subset or equal to
#' \code{seq_len(PCMNumTraits(obj))} (the default).
#' @param nRepBlocks a positive integer specifying if the specified dimensions
#' should be replicated to obtain a higher dimensional model, where the parameter
#' matrices are block-diagonal with blocks corresponding to dims. Default: 1L.
#' @return an object of the same class as obj with a subset of obj's dimensions
#' multiplied \code{nRepBlocks} times.
#' @export
PCMExtractDimensions <- function(
  obj,
  dims = seq_len(PCMNumTraits(obj)),
  nRepBlocks = 1L) {
  UseMethod("PCMExtractDimensions", obj)
}

#' @export
PCMExtractDimensions.PCM <- function(
  obj,
  dims = seq_len(PCMNumTraits(obj)),
  nRepBlocks = 1L) {
  dims <- unique(dims)
  if( !isTRUE(all(dims %in% seq_len(PCMNumTraits(obj)))) ) {
    stop("PCMExtractDimensions.PCM:: Some dims are outside 1:PCMNumTraits(obj).")
  }
  obj2 <- lapply(obj, PCMExtractDimensions, dims = dims, nRepBlocks = nRepBlocks)
  attributes(obj2) <- attributes(obj)
  attr(obj2, "k") <- as.integer(length(dims) * nRepBlocks)
  attr(attr(obj2, "spec"), "k") <- as.integer(length(dims) * nRepBlocks)
  for(name in names(attr(obj2, "spec"))) {
    if(is.PCM(attr(obj2, "spec")[[name]])) {
      attr(attr(obj2, "spec")[[name]], "k") <- as.integer(length(dims) * nRepBlocks)
    }
  }
  attr(obj2, "p") <- PCMParamCount(obj2)
  if(!is.null(attr(obj, "X"))) {
    attr(obj2, "X") <- attr(obj, "X")[rep(dims, nRepBlocks), , drop = FALSE]
  }
  if(!is.null(attr(obj, "SE"))) {
    attr(obj2, "SE") <- if(is.matrix(attr(obj, "SE"))) {
      attr(obj, "SE")[rep(dims, nRepBlocks), , drop = FALSE]
    } else {
      kronecker(diag(1, nRepBlocks), attr(obj, "SE"))
    }
  }
  if(!is.null(attr(obj, "tree"))) {
    attr(obj2, "tree") <- attr(obj, "tree")
  }
  obj2
}

#' Given a PCM or a parameter object, extract an analogical object for a subset
#' of the regimes in the original object.
#'
#' @details This is an S3 generic
#' @param obj a PCM or a parameter object.
#' @param regimes an integer vector; should be a subset or equal to
#' \code{seq_len(PCMNumRegimes(obj))} (the default).
#' @return an object of the same class as obj with a subset of obj's regimes
#' @export
PCMExtractRegimes <- function(obj, regimes = seq_len(PCMNumRegimes(obj))) {
  UseMethod("PCMExtractRegimes", obj)
}

#' @export
PCMExtractRegimes.PCM <- function(obj, regimes = seq_len(PCMNumRegimes(obj))) {
  obj2 <- lapply(obj, PCMExtractRegimes, regimes = regimes)
  attributes(obj2) <- attributes(obj)
  attr(obj2, "regimes") <- attr(obj2, "regimes")[regimes]
  attr(obj2, "p") <- PCMParamCount(obj2)
  attr(attr(obj2, "spec"), "regimes") <- attr(attr(obj2, "spec"), "regimes")[regimes]

  obj2
}
venelin/PCMBase documentation built on March 14, 2024, 8:24 p.m.