Nothing
#' Compare causal models in a phylogenetic context.
#'
#' Continuous variables are modeled using [phylolm::phylolm], while binary
#' traits are modeled using [phylolm::phyloglm].
#'
#' *Parallel processing*: From v1.2, `phylopath` uses the `future` framework
#' for parallel processing. This is compatible with the parallel computation
#' within the underlying `phylolm`, making it easy to enable parallel
#' processing of multiple models, and of bootstrap replicates. To enable,
#' simply set a parallel `plan()` using the `future` package. Typically, you'll
#' want to run `future::plan("multisession", workers = n)`, where `n` is the
#' number of cores. Now parallel processing is enabled. Return to sequential
#' processing using `future::plan("sequential")`
#'
#' @param model_set A list of directed acyclic graphs. These are matrices,
#' typically created with \code{define_model_set}.
#' @param data A \code{data.frame} with data. If you have binary variables, make
#' sure they are either character values or factors!
#' @param tree A phylogenetic tree of class \code{phylo}.
#' @param model The evolutionary model used for the regressions on continuous
#' variables. See [phylolm::phylolm] for options and details. Defaults to
#' Pagel's lambda model
#' @param method The estimation method for the binary models. See
#' [phylolm::phyloglm] for options and details. Defaults to logistic MPLE.
#' @param order Causal order of the included variable, given as a character
#' vector. This is used to determine which variable should be the dependent
#' in the dsep regression equations. If left unspecified, the order will be
#' automatically determined. If the combination of all included models is
#' itself a DAG, then the ordering of that full model is used. Otherwise,
#' the most common ordering between each pair of variables is used to create
#' a general ordering.
#' @param parallel *Superseded* From v1.2 `phylopath` uses the `future` package
#' for all parallel processing, see details.
#' @param na.rm Should rows that contain missing values be dropped from the data
#' as necessary (with a message)?
#' @param ...
#' Arguments passed on to `phylolm`:
#'
#' `lower.bound`: optional lower bound for the optimization of the phylogenetic model parameter.
#'
#' `upper.bound`: optional upper bound for the optimization of the phylogenetic model parameter.
#'
#' `starting.value`: optional starting value for the optimization of the phylogenetic model parameter.
#'
#' `measurement_error`: a logical value indicating whether there is measurement error sigma2_error (see Details).
#'
#' Arguments passed on to `phyloglm`:
#'
#' `btol`: bound on the linear predictor to bound the searching space.
#'
#' `log.alpha.bound`: bound for the log of the parameter alpha.
#'
#' `start.beta`: starting values for beta coefficients.
#'
#' `start.alpha`: starting values for alpha (phylogenetic correlation).
#'
#'
#' @return A phylopath object, with the following components:
#' \describe{
#' \item{d_sep}{for each model a table with separation statements and statistics.}
#' \item{model_set}{the DAGs}
#' \item{data}{the supplied data}
#' \item{tree}{the supplied tree}
#' \item{model}{the employed model of evolution in `phylolm`}
#' \item{method}{the employed method in `phyloglm`}
#' \item{dots}{any additional arguments given, these are passed on to downstream functions}
#' \item{warnings}{any warnings generated by the models}
#' }
#' @export
#' @examples
#' #see vignette('intro_to_phylopath') for more details
#' candidates <- define_model_set(
#' A = NL ~ BM,
#' B = NL ~ LS,
#' .common = c(LS ~ BM, DD ~ NL)
#' )
#' p <- phylo_path(candidates, rhino, rhino_tree)
#'
#' # Printing p gives some general information:
#' p
#' # And the summary gives statistics to compare the models:
#' summary(p)
#'
phylo_path <- function(model_set, data, tree, model = 'lambda', method = 'logistic_MPLE',
order = NULL, parallel = NULL, na.rm = TRUE, ...) {
# Always coerce to data.frame, as tibbles and data.tables do NOT play nice.
data <- as.data.frame(data)
dots <- list(...)
# check for users passing boot as an optional parameter, which has been deprecated
if ('boot' %in% names(dots)) {
warning('Using `boot` has been deprecated here, pass to choice(), best(), average() or est_DAG() instead.')
dots <- dots[names(dots) != 'boot']
}
tmp <- check_models_data_tree(model_set, data, tree, na.rm)
model_set <- tmp$model_set
data <- tmp$data
tree <- tmp$tree
if (!is.null(order)) {
# check for duplications in the order vector, when manually supplied
if (length(order) > length(unique(order))) {
stop('The supplied `order` argument must contain each variable exactly once.')
}
if (length(order) != ncol(data)) {
stop('Not all variables are included in `order`, missing variables: ',
setdiff(colnames(data), order))
}
} else {
order <- find_consensus_order(model_set)
}
formulas <- lapply(model_set, find_formulas, order)
formulas <- purrr::map(
formulas,
~purrr::map(.x, ~{attr(., ".Environment") <- NULL; .})
)
f_list <- unique(unlist(formulas))
if (!is.null(parallel)) {
warning('Use of the `parallel` argument has been superseded by the use of `future`. See documentation for details.')
}
dsep_models_runs <- future.apply::future_lapply(
f_list, phylo_g_lm,
data = data, tree = tree, model = model, method = method, dots = dots,
future.seed = TRUE
)
# Produce appropriate error if needed
errors <- purrr::map(dsep_models_runs, 'error')
purrr::map2(
errors, f_list,
~if(!is.null(.x))
stop(paste(
'Fitting the following model:\n ',
Reduce(paste, deparse(.y)),
'\nproduced this error:\n ',
.x
), call. = FALSE)
)
# Collect warnings as well, but save those for later.
warnings <- purrr::map(dsep_models_runs, 'warning')
warnings <- purrr::map2(
warnings, f_list,
~if(!is.null(.x))
paste(
'Fitting the following model:\n ',
Reduce(paste, deparse(.y)),
'\nproduced this/these warning(s):\n ',
.x, '\n'
)
)
warnings <- warnings[!sapply(warnings, is.null)]
if (length(warnings) > 1) {
warning('Some models produced warnings. Use `show_warnings()` to view them.\n')
}
# Collect models.
dsep_models <- purrr::map(dsep_models_runs, 'result')
dsep_models <- purrr::map(formulas, ~dsep_models[match(.x, f_list)])
d_sep <- purrr::map2(
formulas,
dsep_models,
~tibble::tibble(
d_sep = as.character(.x),
p = purrr::map_dbl(.y, get_p),
phylo_par = purrr::map_dbl(.y, get_phylo_param),
model = .y
)
)
out <- list(
d_sep = d_sep, model_set = model_set, data = data, tree = tree, model = model, method = method,
dots = dots, warnings = warnings
)
class(out) <- 'phylopath'
return(out)
}
#' @export
summary.phylopath <- function(object, ...) {
phylopath <- object
stopifnot(inherits(phylopath, 'phylopath'))
n <- nrow(phylopath$data)
k <- sapply(phylopath$d_sep, nrow)
q <- sapply(phylopath$model_set, function(m) nrow(m) + sum(m))
C <- sapply(phylopath$d_sep, function(x) C_stat(x$p))
p <- C_p(C, k)
IC <- CICc(C, q, n)
if (n <= max(q) + 1) {
IC[n <= q] <- NA
warning('CICc was not calculated for causal models where the number of parameters is equal to',
'or larger than the number of species.')
}
d <- data.frame(
model = names(phylopath$model_set), k = k, q = q, C = C, p = p,
CICc = IC, stringsAsFactors = FALSE
)
d <- d[order(d$CICc), ]
d$delta_CICc <- d$CICc - d$CICc[1]
d$l <- l(d$delta_CICc)
d$w <- w(d$l)
class(d) <- c('phylopath_summary', 'data.frame')
return(d)
}
#' Extract and estimate the best supported model from a phylogenetic path
#' analysis.
#'
#' @param phylopath An object of class \code{phylopath}.
#' @param ... Arguments to pass to [phylolm::phylolm] and [phylolm::phyloglm]. Provide `boot = K`
#' parameter to enable bootstrapping, where `K` is the number of bootstrap replicates. If you
#' specified other options in the original [phylo_path] call you don't need to specify them again.
#'
#' @return An object of class \code{fitted_DAG}.
#' @export
#'
#' @examples
#' candidates <- define_model_set(
#' A = NL ~ BM,
#' B = NL ~ LS,
#' .common = c(LS ~ BM, DD ~ NL)
#' )
#' p <- phylo_path(candidates, rhino, rhino_tree)
#' best_model <- best(p)
#' # Print the best model to see coefficients, se and ci:
#' best_model
#' # Plot to show the weighted graph:
#' plot(best_model)
#'
best <- function(phylopath, ...) {
stopifnot(inherits(phylopath, 'phylopath'))
dots <- combine_dots(phylopath$dots, ...)
b <- summary(phylopath)[1, 'model']
best_model <- phylopath$model_set[[b]]
do.call(
est_DAG,
c(list(best_model, phylopath$data, phylopath$tree, phylopath$model, phylopath$method), dots)
)
}
#' Extract and estimate an arbitrary model from a phylogenetic path analysis.
#'
#' @param phylopath An object of class \code{phylopath}.
#' @param choice A character string of the name of the model to be chosen, or
#' the index in \code{model_set}.
#' @param ... Arguments to pass to [phylolm::phylolm] and [phylolm::phyloglm]. Provide `boot = K`
#' parameter to enable bootstrapping, where `K` is the number of bootstrap replicates. If you
#' specified other options in the original [phylo_path] call you don't need to specify them again.
#'
#' @return An object of class \code{fitted_DAG}.
#' @export
#'
#' @examples
#' candidates <- define_model_set(
#' A = NL ~ BM,
#' B = NL ~ LS,
#' .common = c(LS ~ BM, DD ~ NL)
#' )
#' p <- phylo_path(candidates, rhino, rhino_tree)
#' my_model <- choice(p, "B")
#' # Print the best model to see coefficients, se and ci:
#' my_model
#' # Plot to show the weighted graph:
#' plot(my_model)
#'
choice <- function(phylopath, choice, ...) {
stopifnot(inherits(phylopath, 'phylopath'))
dots <- combine_dots(phylopath$dots, ...)
do.call(
est_DAG,
c(
list(
phylopath$model_set[[choice]], phylopath$data, phylopath$tree, phylopath$model,
phylopath$method),
dots
)
)
}
#' Extract and average the best supported models from a phylogenetic path
#' analysis.
#'
#' @param phylopath An object of class `phylopath`.
#' @param cut_off The CICc cut-off used to select the best models. Use
#' `Inf` to average over all models. Use the [best()] function to
#' only use the top model, or [choice()] to select any single model.
#' @param ... Arguments to pass to [phylolm::phylolm] and [phylolm::phyloglm]. Provide `boot = K`
#' parameter to enable bootstrapping, where `K` is the number of bootstrap replicates. If you
#' specified other options in the original [phylo_path] call you don't need to specify them again.
#'
#' @inheritParams average_DAGs
#'
#' @return An object of class `fitted_DAG`.
#' @export
#'
#' @examples
#' candidates <- define_model_set(
#' A = NL ~ RS,
#' B = RS ~ NL + BM,
#' .common = c(LS ~ BM, DD ~ NL, NL ~ BM)
#' )
#' p <- phylo_path(candidates, rhino, rhino_tree)
#' summary(p)
#'
#' # Models A and B have similar support, so we may decide to take
#' # their average.
#'
#' avg_model <- average(p)
#' # Print the average model to see coefficients, se and ci:
#' avg_model
#'
#' \dontrun{
#' # Plot to show the weighted graph:
#' plot(avg_model)
#'
#' # One can see that an averaged model is not necessarily a DAG itself.
#' # This model actually has a path in two directions.
#'
#' # Note that coefficients that only occur in one of the models become much
#' # smaller when we use full averaging:
#'
#' coef_plot(avg_model)
#' coef_plot(average(p, method = 'full'))
#' }
#'
average <- function(phylopath, cut_off = 2, avg_method = 'conditional', ...) {
stopifnot(inherits(phylopath, 'phylopath'))
dots <- combine_dots(phylopath$dots, ...)
d <- summary(phylopath)
b <- d[d$delta_CICc < cut_off, ]
best_models <- lapply(
phylopath$model_set[b$model],
function(x) {
do.call(
est_DAG,
c(
list(
DAG = x, data = phylopath$data, tree = phylopath$tree, model = phylopath$model,
method = phylopath$method),
dots
)
)
}
)
average <- average_DAGs(best_models, b$w, avg_method)
class(average$coef) <- c('matrix', 'DAG')
return(average)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.