globalVariables(c("lower.95", "upper.95", "lower.75", "upper.75", "Generation", "ACSF",
"Chain", "Clade", "title", "Generations", "par", "cor", "text", "sd",
"generation", "chain", "x", "y", "strwidth", "mtext", "Split.Frequency",
"hclust", "..density..", "recordPlot", "rgb", "panel.smooth", "ci.lower",
"ci.upper", "median", "quantile", "median.ess", "ASDSF" ,"Axis" ,"CSF" ,"Gen" ,
"as.dist" ,"box" ,"cmdscale" ,"dev.flush" ,"dev.hold" ,"dev.off" ,"ess" ,
"optim" ,"pdf" ,"plot" ,"points" ,"read.table" ,"reorder" ,"sampling.interval",
"split.frequency" ,"tail" ,"topo.distance" ,"topological.distance",
"dev.control"))
#' analyze.rwty, the main interface for rwty analyses and plots.
#'
#' This is the main user interface to rwty. It allows users to conduct simple
#' visualizations of MCMC chain performance with very few arguments.
#'
#' @import ape
#' @import ggplot2
#' @importFrom reshape2 melt
#' @importFrom phangorn RF.dist
#' @importFrom coda effectiveSize mcmc
#' @importFrom viridis scale_color_viridis scale_fill_viridis viridis plasma
#' @importFrom grid unit
#' @importFrom plyr ddply summarize .
#' @importFrom ggdendro ggdendrogram
#' @importFrom GGally ggpairs
#' @importFrom parallel mclapply detectCores
#' @importFrom utils citation
#'
#' @param chains A list of rwty.chain objects.
#' @param burnin The number of trees to eliminate as burnin. Default value is zero.
#' @param window.size The number of trees to include in each windows of sliding window plots
#' @param treespace.points The number of trees to plot in the treespace plot. Default is 100
#' @param n.clades The number of clades to include in plots of split frequencies over the course of the MCMC
#' @param min.freq The minimum frequency for a node to be used for calculating ASDSF. Default is 0.1
#' @param fill.color The name of a column in your log file that you would like to use as the fill colour of points in the treespace plots
#' @param filename Name of an output file (e.g., "output.pdf"). If none is supplied, rwty will not save outputs to file.
#' @param overwrite Boolean variable saying whether output file should be overwritten, if it exists.
#' @param facet A Boolean expression indicating whether multiple chains should be plotted as facet plots (default TRUE).
#' @param free_y TRUE/FALSE to turn free y scales on the facetted plots on or off (default FALSE). Only works if facet = TRUE.
#' @param autocorr.intervals The maximum number of intervals to use for autocorrelation plots.
#' @param ess.reps The number of replicate analyses to do when calculating the pseudo ESS.
#' @param treedist the type of tree distance metric to use, can be 'PD' for path distance or 'RF' for Robinson Foulds distance.
#' @param params A vector of parameters to use when making the parameter correlation plots. Defaults to the first two columns in the log table.
#' @param max.sampling.interval The maximum sampling interval to use for generating autocorrelation plots
#' @param ... Extra arguments to be passed to plotting and analysis functions.
#'
#' @return output The output is a list containing the following plots:
#'
#' Plots of likelihood, model parameters, and tree topologies as a function of chain length (the first two only when output from MCMC parameters has been loaded along with the tree topologies).
#'
#' Plot of autocorrelation of tree topolgies at different sampling intervals along a chain
#'
#' Plot of split frequencies calculated in sliding windows for the most variable clades
#'
#' Plot of change in split frequencies between sliding windows for all clades
#'
#' Plot of cumulative split frequencies as the MCMC progresses
#'
#' Plot of change in cumulative split frequencies as the MCMC progresses
#'
#' Heatmap and point depictions of chains in treespace.
#'
#' Plot of the Average Standard Deviation of Split Frequencies (ASDSF) between chains as the MCMC progresses
#'
#' Plot of pairwise correlations between split frequencies among chains
#'
#' Plot of chains clustered by their pairwise ASDSF values
#'
#' @keywords plots, rwty, MCMC, topology, ESS
#'
#' @export analyze.rwty
#' @examples
#' \dontrun{
#' data(fungus)
#' p <- analyze.rwty(fungus, burnin = 50, window.num = 50)
#' p
#' }
analyze.rwty <- function(chains, burnin=0, window.size=20, treespace.points = 100, n.clades = 20,
min.freq = 0.0, fill.color = NA, filename = NA,
overwrite=FALSE, facet=TRUE, free_y=FALSE, autocorr.intervals=100, ess.reps = 20,
treedist = 'PD', params = NA, max.sampling.interval = NA, ...){
chains <- check.chains(chains)
N <- length(chains[[1]]$trees)
rwty.params.check(chains, N, burnin, window.size, treespace.points, min.freq, filename, overwrite)
# check to see if ptables exist, make related plots
if(all(unlist(lapply(chains, function(x) length(x$ptable[,1])))) > 0){
# plot parameters for all chains
parameter.plots <- makeplot.all.params(chains, burnin = burnin, facet=facet, strip = 1)
parameter.correlations <- makeplot.pairs(chains, burnin = burnin, params = params, treedist = treedist, strip = 1)
names(parameter.correlations) <- paste0(names(parameter.correlations), ".correlations")
}
else{
parameter.plots <- makeplot.topology(chains, burnin = burnin, facet = facet)
parameter.correlations <- NA
}
# plot autocorrelation
if(N < 200){
autocorr.plot <- NULL
} else {
autocorr.plot <- makeplot.autocorr(chains, burnin = burnin, autocorr.intervals = autocorr.intervals, facet = facet, max.sampling.interval = max.sampling.interval)
}
# plot sliding window sf plots
splitfreq.sliding <- makeplot.splitfreqs.sliding(chains, burnin=burnin, n.clades = n.clades, window.size = window.size, facet = facet)
acsf.sliding <- makeplot.acsf.sliding(chains, burnin=burnin, window.size = window.size, facet = facet)
# plot cumulative sf plots
splitfreq.cumulative <- makeplot.splitfreqs.cumulative(chains, burnin=burnin, n.clades = n.clades, window.size = window.size, facet = facet)
acsf.cumulative <- makeplot.acsf.cumulative(chains, burnin=burnin, window.size = window.size, facet = facet)
# plot treespace for all chains
treespace.plots <- makeplot.treespace(chains, n.points = treespace.points, burnin = burnin, fill.color = fill.color)
# Add citations for all packages
citations <- list(
citation('rwty'),
citation('ape'),
citation('phangorn'),
citation('ggplot2'),
citation('coda'),
citation('viridis'),
citation('ggdendro'),
citation('GGally'),
citation('plyr'),
citation('reshape2'),
citation('stats')
)
plots <- c(parameter.plots,
parameter.correlations,
autocorr.plot,
splitfreq.sliding,
acsf.sliding,
splitfreq.cumulative,
acsf.cumulative,
treespace.plots)
# plot multichain plots when appropriate
if(length(chains) > 1){
asdsf.plot <- makeplot.asdsf(chains, burnin = burnin, window.size = window.size, min.freq = min.freq)
splitfreq.matrix.plots <- makeplot.splitfreq.matrix(chains, burnin = burnin)
plots <- c(plots, asdsf.plot, splitfreq.matrix.plots)
}
plots[["citations"]] <- citations
# Print all to pdf if filename provided
if(!is.na(filename)){
print(sprintf("Saving plots to file: %s", filename))
pdf(file=filename, width = 10, height = 7, ...)
print(plots)
dev.off()
}
return(plots)
}
rwty.params.check <- function(chains, N, burnin, window.size, treespace.points, min.freq, filename, overwrite){
# Checks for reasonable burnin
if(!is.numeric(burnin)){
stop("burnin must be numeric")
}
if(burnin < 0){
stop("burnin must be zero or greater")
}
if(burnin > N){
stop("burnin must be smaller than the number of trees in the chain")
}
# Checks for reasonable window.num
if(!is.numeric(window.size)){
stop("window.num must be numeric")
}
if(window.size < 1){
stop("window.size must be 1 or greater")
}
if(as.integer((N - burnin) / window.size < 2)) {
stop("window.size cannot be more than half the number of post-burnin trees")
}
# Checks for reasonable treespace.points
if(!is.numeric(treespace.points)){
stop("treespace.points must be numeric")
}
if(treespace.points < 2){
stop("treespace.points must be 2 or greater")
}
if((N - burnin) < treespace.points){
stop("treespace.points cannot be more than the number of post-burnin trees")
}
# Checks for reasonable min.freq
if(!is.numeric(min.freq)){
stop("min.freq must be numeric")
}
if(min.freq < 0 || min.freq > 1){
stop("min.freq must be between 0 and 1")
}
# Checks for output file
if(!is.na(filename)){
if(file.exists(filename) && overwrite==FALSE){
stop("You specified an output filename, but an output file already exists and overwrite is set to FALSE. Please check and try again.")
}
}
}
get.processors <- function(processors){
if(Sys.info()["sysname"] == 'Windows'){
# mclapply is not supported on windows
# so we give a single processor,
# in which case mclapply calls fall back
# on lapply
return(1)
}
# check for global user-defined variable
if(exists('rwty.processors')){
# should be an integer
if(!is.numeric(rwty.processors)){
stop("the global rwty.processors variable must be an integer")
}
if(rwty.processors%%1==0){
available_processors = detectCores(all.tests = FALSE, logical = FALSE)
if(rwty.processors > available_processors){
rwty.processors = available_processors - 1
}
if(rwty.processors < 1){
rwty.processors = 1
}
return(rwty.processors)
}else{
stop("the global rwty.processors variable must be an integer")
}
}
if(is.null(processors)){
available_processors = detectCores(all.tests = FALSE, logical = FALSE)
processors = max(c(1, c(available_processors - 1)))
}
return(processors)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.