library(data.table) library(PCMBase) library(PCMFit) library(ggplot2) knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) if(!requireNamespace("ggtree")) { message("Building the vignette requires ggtree R-package. Trying to install.") status.ggtree <- try({ if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("ggtree", version = "3.8") }, silent = TRUE) if(class(status.ggtree == "try-error")) { stop( "The ggtree installation did not succeed. The vignette cannot be built.") } } generateParametricBootstrapDatasets <- FALSE
Note: The writing of this vignette is currently in progress. Please, contact the author for assistance, in case you need to use PCMFit immediately and cannot find your way through the coding examples. Thanks for your understanding.
# make results reproducible set.seed(12, kind = "Mersenne-Twister", normal.kind = "Inversion") numBootstraps <- 10 bestModel <- RetrieveBestFitScore(PCMFitDemoObjects$fitMGPM_A_F_BC2_RR)$inferredModel metaI <- PCMInfo( X = NULL, tree = attr(bestModel, "tree"), model = bestModel) # Simulate data with the best fit model from the mammal data with SEs valuesBootstrapMGPM_A_F_BC2_RR <- data.table( IdGlob = seq_len(numBootstraps), X = lapply(seq_len(numBootstraps), function(id) { PCMSim( tree = attr(bestModel, "tree"), model = bestModel, X0 = bestModel$X0, metaI = metaI) }) ) options(PCMBase.Value.NA = -1e20) options(PCMBase.Lmr.mode = 11) options(PCMBase.Threshold.EV = 1e-7) valuesBootstrapMGPM_A_F_BC2_RR[ , c("df", "logLik", "score") := { resLists <- lapply(.I, function(i) { attr(bestModel, "X") <- X[[i]] ll <- logLik(bestModel) aic <- AIC(bestModel) df <- attr(ll, "df") c(df, ll, aic) }) list(df = sapply(resLists, function(.) .[1]), logLik = sapply(resLists, function(.) .[2]), score = sapply(resLists, function(.) .[3])) }] PCMFitDemoObjects$valuesBootstrapMGPM_A_F_BC2_RR <- valuesBootstrapMGPM_A_F_BC2_RR
usethis::use_data(PCMFitDemoObjects, overwrite = TRUE)
To make the following example work, create a directory where several files and execution results should be stored. Here, we call this directory TEST.
# File: FitBootstrap_MGPM_A_F_BC2_RR.R # Usage: R --vanilla --slave -f ../../FitBootstrap_MGPM_A_F_BC2_RR.R --args 1 library(PCMBase) library(PCMBaseCpp) library(PCMFit) library(data.table) # extract dataset identifier and possibly other parameters from the command line: args <- commandArgs(trailingOnly = TRUE) if(length(args) > 0) { data_id <- as.integer(args[1]) } else { data_id <- 1L } # A character string used in filenames for a model inference on a given data: prefixFiles = paste0("MGPM_A_F_BC2_RR_BSID_", data_id) # Creating the cluster for this PCMFit run. # Uncomment the follwing code only if doMPI package is available and working. # if(!exists("cluster") || is.null(cluster)) { # if(require(doMPI)) { # # using MPI cluster as distributed node cluster (possibly running on a # # cluster of multiple nodes) # # Get the number of cores. Assume this is run in a batch job. # p = strtoi(Sys.getenv('LSB_DJOB_NUMPROC')) # cluster <- startMPIcluster(count = p-1, verbose = TRUE) # doMPI::registerDoMPI(cluster) # } else { # # possibly running on personal computer without mpi installation # cluster <- parallel::makeCluster( # parallel::detectCores(logical = TRUE), # outfile = paste0("log_", prefixFiles, ".txt")) # doParallel::registerDoParallel(cluster) # } #} # This function is going to be executed on each worker node. # Hence, it is the convenient place to define global settings, # in particular: # - set global option values, # - call the function PCMGenerateModelTypes to generate the default PCM model types; # - write custom S3 methods for PCMBase::PCMParentClasses and PCMBase::PCMSpecify # for custom PCM model types (see corresponding man pages in the PCMBase package), # - define S3 methods for the PCMBase::PCMParamUpperLimit and PCMParamLowerLimit in the # global environment (see R-code below). generatePCMModelsFunction <- function() { # make results reproducible but keep in mind that data_id is unique for this specific # worker node, and will be used to generate a unique parametric boostrap set of trait values. set.seed(data_id, kind = "Mersenne-Twister", normal.kind = "Inversion") PCMGenerateModelTypes() # An example DefineParameterLimits.R file can be found in # vignettes/DefineParameterLimits.R of the PCMFit package source-code. # Note that the path here is relative to the working directory of # the worker node R-process. source('../../DefineParameterLimits.R', local = FALSE) } # Inferred best model ( # replace this with the result from a previous call to RetrieveBestFitScore(fitObject)$inferredModel) bestModel <- RetrieveBestFitScore(PCMFitDemoObjects$fitMGPM_A_F_BC2_RR)$inferredModel # Inferred tree tree <- attr(bestModel, "tree") # Simulate parameteric bootstrap trait values X <- PCMSim(tree, bestModel, bestModel$X0)[, seq_len(PCMTreeNumTips(tree))] currentResultFile <- paste0("CurrentResults_fits_", prefixFiles, ".RData") if(file.exists(currentResultFile)) { load(currentResultFile) tableFitsPrev <- listResults$tableFits } else { tableFitsPrev <- NULL } fitMGPM_A_F_BC2_RR <- PCMFitMixed( X = X, tree = tree, metaIFun = PCMInfoCpp, generatePCMModelsFun = generatePCMModelsFunction, maxNumRoundRobins = 2, maxNumPartitionsInRoundRobins = 2, tableFitsPrev = tableFitsPrev, prefixFiles = prefixFiles, doParallel = TRUE) save(fitMGPM_A_F_BC2_RR, file = paste0("Result_", prefixFiles, ".RData"))
```{sh eval=FALSE}
for id in seq 1 1 10
do
mkdir -p ResultsBootstrap/MGPM_A_F_BC2_RR_BSID_$id
cd ResultsBootstrap/MGPM_A_F_BC2_RR_BSID_$id
if [ -f "Result_MGPM_A_F_BC2_RR_BSID_"$id".RData" ]
then
rm MPI.log
rm CurrentResults.RData
else
R --vanilla --slave -f ../../FitBootstrap_MGPM_A_F_BC2_RR.R --args $id fi cd ../.. done
* Run the script by calling the command `sh FitBootstrap_MGPM_A_F_BC2_RR_bsub.sh` from the command prompt in the TEST directory. # Collect bootstrap results ```r # model and tree used to generate the bootstrap datasets inferredModel <- RetrieveBestFitScore(PCMFitDemoObjects$fitMGPM_A_F_BC2_RR)$inferredModel inferredTree <- PCMTree(attr(inferredModel, "tree")) epochsHD <- seq(.2, max(PCMTreeNodeTimes(inferredTree)), by = 2) if(is.null(PCMFitDemoObjects$bs_MGPM_A_F_BC2_RR)) { bs_MGPM_A_F_BC2_RR <- CollectBootstrapResults( resultDir = "ResultsBootstrap", prefixFiles = "MGPM_A_F_BC2_RR_BSID_", nameFitObject = 'fitMGPM_A_F_BC2_RR', ids = seq_len(10), inferredTree = inferredTree, epochs = epochsHD, minLength = 0.2, verbose = FALSE) PCMFitDemoObjects$bs_MGPM_A_F_BC2_RR <- bs_MGPM_A_F_BC2_RR # The following line should be uncommented only if preparing the PCMFit R-package usethis::use_data(PCMFitDemoObjects, overwrite = TRUE) } else { bs_MGPM_A_F_BC2_RR <- PCMFitDemoObjects$bs_MGPM_A_F_BC2_RR }
inferredModel <- RetrieveBestFitScore(PCMFitDemoObjects$fitMGPM_A_F_BC2_RR)$inferredModel inferredTree <- PCMTree(attr(inferredModel, "tree")) epochsHD <- seq(.2, max(PCMTreeNodeTimes(inferredTree)), by = 2) bs_MGPM_A_F_BC2_RR <- CollectBootstrapResults( resultDir = "ResultsBootstrap", prefixFiles = "MGPM_A_F_BC2_RR_BSID_", nameFitObject = 'fitMGPM_A_F_BC2_RR', ids = seq_len(10), inferredTree = inferredTree, epochs = epochsHD, minLength = 0.2, verbose = FALSE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.