knitr::opts_chunk$set(echo = TRUE) library(ggplot2) library(PCMBase) library(PCMFit) library(MGPMSimulations) library(data.table) library(rmarkdown) library(knitr) library(scales) library(RColorBrewer) library(phytools) opts_chunk$set(dev='pdf', #results="hide", warning=FALSE, dev.args=list( family="ArialMT", pointsize=10, colormodel='rgb' ), dpi=600, bg='white') options(PCMBase.Value.NA = -1e20) options(PCMBase.Lmr.mode = 11) options(PCMBase.Threshold.EV = 1e-7) options(width = 120)
dtSimulationFitsNULL <- fits_MGPM_A_F_best_clade_2_RR_t5_NULL[ , cbind(.SD, data.table(fitType2 = "MGPM A-F RCP B.2 RR AIC q=20" ), data.table( deltaNumRegimes = sapply(clusterNodes, function(cn) if(is.null(cn)) NA_real_ else length(unique(cn))) - sapply(IdGlob, function(idGlob) { testData_t5_NULL[idGlob, length(unique(clusterNodes[[1]]))] }), deltaScore = score - sapply(IdGlob, function(idGlob) { testData_t5_NULL[IdGlob == idGlob, AIC[[1]]] })))] dtSimulationFitsNULL[, fitType:=factor( fitType2, levels = c("MGPM A-F RCP B.2 RR AIC q=20"))] for(i in seq_len(nrow(dtSimulationFitsNULL))) { dtSimulationFitsNULL[ i, c("TreeType", "TreeSize", "Mapping", "NumTraits") := { idx <- list(IdTree[[1]], IdClusteringForTree[[1]], IdMappingForClustering[[1]], numTraits[[1]], IdParamForMapping[[1]]) subTestData <- testData_t5_NULL[ idx, list(treeType, treeSize, mapping, numTraits)] list( subTestData$treeType[1], subTestData$treeSize[1], sapply( subTestData$mapping[1], function(m) { do.call(paste0, as.list(names(MGPMDefaultModelTypes())[m])) }), subTestData$numTraits[1]) }] } dtSimulationFitsNULL[ , TreeType2:=factor( TreeType, levels = c("ultrametric", "non-ultrametric"), labels = c("Ultr.", "Non-ultr."))] setnames(dtSimulationFitsNULL, c("TreeType", "TreeType2"), c("TreeType2", "TreeType")) dtSimulationFitsNULL[ , TreeSize2:=factor(TreeSize, levels = paste0("N=", c(80,159,318,638)))] setnames(dtSimulationFitsNULL, c("TreeSize", "TreeSize2"), c("TreeSize2", "TreeSize")) dtSimulationFitsNULL[ , NumRegimes:=stringr::str_length(Mapping)] usethis::use_data(dtSimulationFitsNULL, overwrite = TRUE)
dtFigNULL <- dtSimulationFitsNULL[ IdGlob %in% testData_t5_NULL_fittedIds, list( TreeType = unique(TreeType), TreeType2 = unique(TreeType2), TreeSize = unique(TreeSize), Mapping = unique(Mapping), NumTraits = unique(NumTraits), perf_Cluster_tpr = mean(perf_Cluster_tpr, na.rm = TRUE), perf_Cluster_fpr = mean(perf_Cluster_fpr, na.rm = TRUE), perf_BM_tpr = mean(perf_BM_tpr, na.rm = TRUE), perf_BM_fpr = mean(perf_BM_fpr, na.rm = TRUE), perf_OU_tpr = mean(perf_OU_tpr, na.rm = TRUE), perf_OU_fpr = mean(perf_OU_fpr, na.rm = TRUE), perf_Uncorrelated_tpr = mean(perf_Uncorrelated_tpr, na.rm = TRUE), perf_Uncorrelated_fpr = mean(perf_Uncorrelated_fpr, na.rm = TRUE), perf_Correlated_tpr = mean(perf_Correlated_tpr, na.rm = TRUE), perf_Correlated_fpr = mean(perf_Correlated_fpr, na.rm = TRUE), deltaNumRegimes = mean(deltaNumRegimes, na.rm = TRUE), deltaScore = mean(deltaScore, na.rm = TRUE) ), keyby = list( IdTree, IdClusteringForTree, IdMappingForClustering, numTraits, IdParamForMapping, IdSimulationForParam)] dtFigNULL[, IdParamSimul:=apply( cbind(IdParamForMapping, IdSimulationForParam), 1, function(r) { #paste0(match(r[1], c(1, 3)), ":", r[2]) paste0(r[1], ":", r[2]) })] usethis::use_data(dtFigNULL, overwrite = TRUE)
paletteName <- "RdBu" mypalette <- brewer.pal(7, paletteName) plScore <- ggplot( data = dtFigNULL, aes(factor(IdParamSimul), factor(NumTraits, levels = c(2, 4, 6, 8)), fill = deltaScore)) + geom_tile(color = "white") + scale_fill_gradient2( low = mypalette[7], high=mypalette[1], na.value = "grey98", space = "Lab", name="Score\ndifference\n") + theme_grey() + coord_fixed(ratio = 1) + xlab("Parameter:Simulation") + ylab("Number of traits (k)") + theme( axis.text = element_text(size = 8), legend.position = "right", strip.text = element_text(size = 7)) + facet_grid(TreeSize ~ TreeType + factor(Mapping, levels = unique(Mapping))) + theme(legend.position = "bottom") plRegimes <- ggplot( data = dtFigNULL, aes(factor(IdParamSimul), factor(NumTraits, levels = c(2, 4, 6, 8)), fill = factor( as.integer(deltaNumRegimes), levels = seq(0, 4, by =1))))+ geom_tile(color = "white") + scale_fill_brewer( palette = paletteName, direction = -1, limits = c("0", "1", "2", "3"), na.value = "grey98", name="Number of regimes\ndifference\n") + theme_grey() + coord_fixed(ratio = 1) + xlab("Parameter:Simulation") + ylab("Number of traits (k)") + theme( axis.text = element_text(size = 8), legend.position = "right", strip.text = element_text(size = 7)) + facet_grid(TreeSize ~ TreeType + factor(Mapping, levels = unique(Mapping))) + theme(legend.position = "bottom") plCluster <- ggplot( data = dtFigNULL, aes(factor(IdParamSimul), factor(NumTraits, levels = c(2, 4, 6, 8)), fill = perf_Cluster_tpr)) + geom_tile(color = "white") + scale_fill_distiller( palette = paletteName, direction = 1, na.value = "grey98", name="Cluster tpr\n") + theme_grey() + coord_fixed(ratio = 1) + xlab("Parameter:Simulation") + ylab("Number of traits (k)") + theme( axis.text = element_text(size = 8), legend.position = "right", strip.text = element_text(size = 7)) + facet_grid(TreeSize ~ TreeType + factor(Mapping, levels = unique(Mapping))) + theme(legend.position = "bottom") plBM <- ggplot( data = dtFigNULL, aes(factor(IdParamSimul), factor(NumTraits, levels = c(2, 4, 6, 8)), fill = perf_OU_fpr)) + geom_tile(color = "white") + scale_fill_distiller( palette = paletteName, direction = -1, na.value = "grey98", name="OU fpr\n") + theme_grey() + coord_fixed(ratio = 1) + xlab("Parameter:Simulation") + ylab("Number of traits (k)") + theme( axis.text = element_text(size = 8), legend.position = "right", strip.text = element_text(size = 7)) + facet_grid(TreeSize ~ TreeType + factor(Mapping, levels = unique(Mapping))) + theme(legend.position = "bottom") plCorrelatedTPR <- ggplot( data = dtFigNULL, aes(factor(IdParamSimul), factor(NumTraits, levels = c(2, 4, 6, 8)), fill = perf_Correlated_tpr)) + geom_tile(color = "white") + scale_fill_distiller( palette = paletteName, direction = 1, na.value = "grey98", name="Correlated tpr\n") + theme_grey() + coord_fixed(ratio = 1) + xlab("Parameter:Simulation") + ylab("Number of traits (k)") + theme( axis.text = element_text(size = 8), legend.position = "right", strip.text = element_text(size = 7)) + facet_grid(TreeSize ~ TreeType + factor(Mapping, levels = unique(Mapping))) + theme(legend.position = "bottom") plCorrelatedFPR <- ggplot( data = dtFigNULL, aes(factor(IdParamSimul), factor(NumTraits, levels = c(2, 4, 6, 8)), fill = perf_Correlated_fpr)) + geom_tile(color = "white") + scale_fill_distiller( palette = paletteName, direction = -1, na.value = "grey98", name="Correlated fpr\n") + theme_grey() + coord_fixed(ratio = 1) + xlab("Parameter:Simulation") + ylab("Number of traits (k)") + theme( legend.position = "right", strip.text = element_text(size = 7)) + facet_grid(TreeSize ~ TreeType + factor(Mapping, levels = unique(Mapping))) + theme(legend.position = "bottom") cowplot::plot_grid(plScore, plRegimes, plCluster, plBM, plCorrelatedTPR, plCorrelatedFPR, labels = LETTERS[1:6], nrow = 3)
testData_t5_NULL[testData_t5_fittedIds][treeType=="ultrametric" & IdMappingForClustering==2&numTraits==4 & nobs == 80, IdGlob] id <- 41 tree <- testData_t5_NULL$treeWithRegimes[[id]] values <- testData_t5_NULL$X[[id]][, seq_len(PCMTreeNumTips(tree))] rownames(values) <- paste0("V", seq_len(nrow(values))) trueModel <- testData_t5_NULL$model[[id]] #logLikFun <- PCMCreateLikelihood(X = values, tree = tree, model = trueModel) #guessParam <- GuessInitVecParams(o = trueModel, tree = tree, X = values) bestModel <- fits_MGPM_A_F_best_clade_2_RR_t5_NULL[IdGlob == id]$model[[1]] objPPRC <- phyl.pca(tree, t(values)) valuesPPRC <- t(objPPRC$S[seq_len(PCMTreeNumTips(tree)), ]) rotatedModel <- bestModel rotatedModel$`1`$Sigma_x[,,1] <- chol(t(objPPRC$Evec) %*% ((rotatedModel$`1`$Sigma_x[,,1]) %*% t(rotatedModel$`1`$Sigma_x[,,1])) %*% objPPRC$Evec) cat("Det objPRC$rotation:\n") print(det(objPPRC$Evec)) cat("objPPRC$Evec%*%t(objPPRC$Evec):\n") print(objPPRC$Evec%*%t(objPPRC$Evec)) cat("True model:\n") PCMLik(X = values, tree = tree, model = trueModel) cat("Best model (original data):\n") PCMLik(X = values, tree = tree, model = bestModel) cat("Rotated best model (rotated data):\n") rotatedModel$X0[] <- NA PCMLik(X = valuesPPRC, tree = tree, model = rotatedModel) bestModel$`1`$Sigma_x[,,1]%*%t(bestModel$`1`$Sigma_x[,,1]) rotatedModel$`1`$Sigma_x[,,1]%*%t(rotatedModel$`1`$Sigma_x[,,1]) modelAOnOriginalData <- MixedGaussian( k = PCMNumTraits(trueModel), modelTypes = MGPMDefaultModelTypes(), mapping = c(`1`=1), Sigmae_x = attr(trueModel, "spec")$Sigmae_x) PCMParamSetByName(modelAOnOriginalData, bestModel, deepCopySubPCMs = TRUE) modelAVec <- PCMParamGetShortVector(modelAOnOriginalData) modelAOnOriginalData$`1`$Sigma_x[] <- 0 PCMParamLoadOrStore(modelAOnOriginalData, modelAVec, 0, PCMNumTraits(modelAOnOriginalData), R = PCMNumRegimes(modelAOnOriginalData), load = TRUE) modelAOnRotatedData <- MixedGaussian( k = PCMNumTraits(trueModel), modelTypes = MGPMDefaultModelTypes(), mapping = c(`1`=1), Sigmae_x = attr(trueModel, "spec")$Sigmae_x) PCMParamSetByName(modelAOnRotatedData, rotatedModel, deepCopySubPCMs = TRUE) modelAVec <- PCMParamGetShortVector(modelAOnRotatedData) modelAOnRotatedData$`1`$Sigma_x[] <- 0 PCMParamLoadOrStore(modelAOnRotatedData, modelAVec, 0, PCMNumTraits(modelAOnRotatedData), R = PCMNumRegimes(modelAOnRotatedData), load = TRUE) cat("model A, original data:") PCMLik(X = values, tree = tree, model = modelAOnOriginalData) cat("model A, rotated data:") PCMLik(X = valuesPPRC, tree = tree, model = modelAOnRotatedData) fitModelAOnOriginalData <- PCMFit( X = values, model = modelAOnOriginalData, tree = tree, metaI = PCMInfoCpp, numGuessInitVecParams = 50000, numCallsOptim = 50) fitModelBOnOriginalData <- PCMFit( X = values, model = bestModel, tree = tree, metaI = PCMInfoCpp, numGuessInitVecParams = 50000, numCallsOptim = 50) fitModelAOnRotatedData <- PCMFit( X = valuesPPRC, model = modelAOnRotatedData, tree = tree, metaI = PCMInfoCpp, numGuessInitVecParams = 50000, numCallsOptim = 50) fitModelBOnRotatedData <- PCMFit( X = valuesPPRC, model = rotatedModel, tree = tree, metaI = PCMInfoCpp, numGuessInitVecParams = 50000, numCallsOptim = 50) logLik(fitModelAOnOriginalData) logLik(fitModelBOnOriginalData) logLik(fitModelAOnRotatedData) logLik(fitModelBOnRotatedData) AIC(fitModelAOnOriginalData) AIC(fitModelBOnOriginalData) AIC(fitModelAOnRotatedData) AIC(fitModelBOnRotatedData)
library(PCMBase) library(PCMFit) library(MGPMSimulations) library(ape) # id simulation for id <- 41 testData_t5_NULL[ id, list( N = nobs, k = numTraits, p = df, `model-type` = LETTERS[1:6][unlist(mapping)], `logLik(true model)` = unlist(logLik), `AIC(true model)` = unlist(AIC))] # phylogeny tree <- testData_t5_NULL$treeWithRegimes[[id]] # number of tips in tree N <- PCMTreeNumTips(tree) # X: design matrix (each column vector corresponds to one trait) X <- t(testData_t5_NULL$X[[id]][, seq_len(N)]) colnames(X) <- paste0("V", seq_len(ncol(X))) # True model B used to simulate the data trueModelOnOriginalData <- testData_t5_NULL$model[[id]] # number of traits k <- PCMNumTraits(trueModelOnOriginalData) # Best model found using the RCP algorithm bestModelOnOriginalData <- fits_MGPM_A_F_best_clade_2_RR_t5_NULL[IdGlob == id]$model[[1]] # matrix representation of the tree (using the vcv function from package ape) C <- vcv(tree) # column vector of N 1's one <- rep(1, N) # column vector b b <- as.vector( t(solve(t(one) %*% solve(C) %*% one) %*% t(one) %*% solve(C) %*% X)) # the R matrix R <- (N-1)^(-1) * t(X - one %*% t(b)) %*% solve(C) %*% (X - one %*% t(b)) # the V matrix V <- eigen(R)$vectors # The pPC scores matrix S <- X %*% V - one %*% (t(b) %*% V) rotatedBestModelOnOriginalData <- bestModelOnOriginalData rotatedBestModelOnOriginalData$X0[] <- rotatedBestModelOnOriginalData$X0[] %*% V - 1 %*% (t(b) %*% V) rotatedBestModelOnOriginalData$`1`$Sigma_x[,,1] <- chol(t(V)%*% rotatedBestModelOnOriginalData$`1`$Sigma_x[,,1] %*% t(rotatedBestModelOnOriginalData$`1`$Sigma_x[,,1]) %*% V) modelAFromRotatedBestModelOnOriginalData <- MixedGaussian( k = k, modelTypes = MGPMDefaultModelTypes(), mapping = c(`1`=2), Sigmae_x = structure( 0, class = c("MatrixParameter", "_Omitted"), description = "Omitted upper triangular Choleski factor of the non-phylogenetic variance-covariance")) modelAFromRotatedBestModelOnOriginalData$X0[] <- rotatedBestModelOnOriginalData$X0 diag(modelAFromRotatedBestModelOnOriginalData$`1`$Sigma_x[,,1]) <- diag(rotatedBestModelOnOriginalData$`1`$Sigma_x[,,1]) PCMLik(X = t(X), tree = tree, model = bestModelOnOriginalData) PCMLik(X = t(S), tree = tree, model = rotatedBestModelOnOriginalData) PCMLik(X = t(S), tree = tree, model = modelAFromRotatedBestModelOnOriginalData)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.