knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) if(!requireNamespace("ggtree")) { message("Building the vignette requires ggtree R-package. Trying to install.") status.ggtree <- try({ if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("ggtree", version = "3.8") }, silent = TRUE) if(class(status.ggtree == "try-error")) { stop( "The ggtree installation did not succeed. The vignette cannot be built.") } } useCacheMGPM_A_F_BC2_RR <- TRUE
Note: The writing of this vignette is currently in progress. If needed, please, contact the author for assistance. Thanks for your understanding.
library(PCMBase) library(PCMBaseCpp) library(PCMFit) library(data.table) # other needed packages, e.g. ape, data.table etc... # A character string used in filenames for a model inference on a given data: prefixFiles = paste0("MGPM_A_F_BC2_RR")
# creating the cluster for this PCMFit run: if(!exists("cluster") || is.null(cluster)) { if(require(doMPI)) { # using MPI cluster as distributed node cluster (possibly running on a # cluster of multiple nodes) # Get the number of cores. Assume this is run in a batch job. p = strtoi(Sys.getenv('LSB_DJOB_NUMPROC')) cluster <- startMPIcluster(count = p-1, verbose = TRUE) doMPI::registerDoMPI(cluster) } else { warning("Could not create an MPI cluster") } }
if(!exists("cluster") || is.null(cluster)) { if(require(parallel)) { # possibly running on personal computer without mpi installation cluster <- parallel::makeCluster( parallel::detectCores(logical = TRUE), outfile = paste0("log_", prefixFiles, ".txt")) doParallel::registerDoParallel(cluster) } else { warning("Could not create a parallel cluster") } }
# This function is going to be executed on each worker node. generatePCMModelsFunction <- function() { # make results reproducible set.seed(4, kind = "Mersenne-Twister", normal.kind = "Inversion") PCMGenerateModelTypes() # An example DefineParameterLimits.R file can be found in # vignettes/DefineParameterLimits.R of the PCMFit package source-code. # Note that the path here is relative to the working directory of # the worker node R-process. source('DefineParameterLimits.R', local=FALSE) }
tree <- PCMTree(PCMFitDemoObjects$dtSimulated$tree[[1]]) X <- PCMFitDemoObjects$dtSimulated$X[[1]][, seq_len(PCMTreeNumTips(tree))]
currentResultFile <- paste0("Current_", prefixFiles, ".RData") if(file.exists(currentResultFile)) { load(currentResultFile) tableFitsPrev <- listResults$tableFits } else { tableFitsPrev <- NULL } if(is.null(PCMFitDemoObjects$fitMGPM_A_F_BC2_RR) || !useCacheMGPM_A_F_BC2_RR) { fitMGPM_A_F_BC2_RR <- PCMFitMixed( X = X, tree = tree, metaIFun = PCMInfoCpp, tableFitsPrev = tableFitsPrev, generatePCMModelsFun = generatePCMModelsFunction, maxNumRoundRobins = 2, maxNumPartitionsInRoundRobins = 2, printFitVectorsToConsole = TRUE, doParallel = TRUE) PCMFitDemoObjects$fitMGPM_A_F_BC2_RR <- fitMGPM_A_F_BC2_RR usethis::use_data(PCMFitDemoObjects, overwrite = TRUE) } fitMGPM_A_F_BC2_RR <- PCMFitDemoObjects$fitMGPM_A_F_BC2_RR
currentResultFile <- paste0("Current_", prefixFiles, ".RData") if(file.exists(currentResultFile)) { load(currentResultFile) tableFitsPrev <- listResults$tableFits } else { tableFitsPrev <- NULL } fitMGPM_A_F_BC2_RR <- PCMFitMixed( X = X, tree = tree, metaIFun = PCMInfoCpp, generatePCMModelsFun = generatePCMModelsFunction, maxNumRoundRobins = 2, maxNumPartitionsInRoundRobins = 2, tableFitsPrev = tableFitsPrev, prefixFiles = prefixFiles, doParallel = TRUE)
modelTrue <- PCMFitDemoObjects$dtSimulated$model[[1]] # We specify the tree and trait values for the true model in order to easily # calculate parameter count likelihood and AIC for it: attr(modelTrue, "tree") <- PCMFitDemoObjects$dtSimulated$treeWithRegimes[[1]] attr(modelTrue, "X") <- X attr(modelTrue, "SE") <- X * 0.0
bestFit <- RetrieveBestFitScore(fitMGPM_A_F_BC2_RR) modelTrue <- PCMFitDemoObjects$dtSimulated$model[[1]] # We specify the tree and trait values for the true model in order to easily # calculate parameter count likelihood and AIC for it: attr(modelTrue, "tree") <- PCMFitDemoObjects$dtSimulated$treeWithRegimes[[1]] attr(modelTrue, "X") <- X attr(modelTrue, "SE") <- X * 0.0 listModels <- list( RetrieveBestModel(PCMFitDemoObjects$fitBM), RetrieveBestModel(PCMFitDemoObjects$fitOU), RetrieveBestModel(PCMFitDemoObjects$fitMGPMTrueTypeMapping), RetrieveBestModel(PCMFitDemoObjects$fitMGPMTrueTypeMappingCheat), modelTrue, bestFit$inferredModel) dtSummary <- data.table( model = c( "Global BM", "Global OU", "True MGPM, known shifts, unknown parameters", "True MGPM, known shifts, starting from known true parameters (cheating)", "True MGPM, fixed true shifts and true parameters", "Inferred MGPM, unknown shifts and parameters"), p = sapply(listModels, PCMParamCount), logLik = sapply(listModels, logLik), AIC = sapply(listModels, AIC)) knitr::kable(dtSummary)
library(ggtree) library(ggplot2) TipLabelTable <- function(model, ...) { tree <- PCMTree(attr(model, "tree")) data.table( node = sapply(sapply(PCMRegimes(tree), PCMTreeGetTipsInRegime, tree = tree), sample, size = 1), part.model = paste0(" ", PCMRegimes(tree), ".", LETTERS[PCMMapModelTypesToRegimes(model, tree)], " "), ...) } plTree <- PCMTreePlot(attr(modelTrue, "tree"), layout="fan") %<+% TipLabelTable(modelTrue, offset = 5) + geom_tiplab2(size = 2) + geom_tiplab2(aes(label = part.model), offset = 16) + geom_nodelab(size = 2, color = "black") + geom_treescale( width = max(PCMTreeNodeTimes(attr(modelTrue, "tree"))), x = 0, linesize = .25, fontsize = 2, offset = 79) plX <- PCMPlotTraitData2D( X[, seq_len(PCMTreeNumTips(attr(modelTrue, "tree")))], attr(modelTrue, "tree"), scaleSizeWithTime = FALSE, numTimeFacets = 4, nrowTimeFacets = 2, ncolTimeFacets = 2) + geom_text( aes(x = x, y = y, label = id, color = regime), size=2, position = position_jitter(.4, .4)) + theme_bw() + theme(legend.position = "bottom") plTreeInferred <- PCMTreePlot(attr(bestFit$inferredModel, "tree"), layout="fan") %<+% TipLabelTable(bestFit$inferredModel, offset = 5) + geom_tiplab2(size = 2) + geom_tiplab2(aes(label = part.model), offset = 16) + geom_nodelab(size = 2, color = "black") + geom_treescale( width = max(PCMTreeNodeTimes(attr(bestFit$inferredModel, "tree"))), x = 0, linesize = .25, fontsize = 2, offset = 79) plXInferred <- PCMPlotTraitData2D( X[, seq_len(PCMTreeNumTips(attr(bestFit$inferredModel, "tree")))], attr(bestFit$inferredModel, "tree"), scaleSizeWithTime = FALSE, numTimeFacets = 4, nrowTimeFacets = 2, ncolTimeFacets = 2) + geom_text( aes(x = x, y = y, label = id, color = regime), size=2, position = position_jitter(.4, .4)) + theme_bw() + theme(legend.position = "bottom") cowplot::plot_grid( plTree, plX, plTreeInferred, plXInferred, labels = LETTERS[1:4], nrow = 2)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.