knitr::opts_chunk$set(echo = TRUE)
library(ggplot2)
library(PCMBase)
library(PCMFit)
library(MGPMSimulations)
library(data.table)
library(rmarkdown)
library(knitr)
library(scales)
library(RColorBrewer)
library(phytools)

opts_chunk$set(dev='pdf', 
               #results="hide",
               warning=FALSE,
               dev.args=list(
                 family="ArialMT", 
                 pointsize=10,
                 colormodel='rgb'
               ),
               dpi=600, bg='white')

options(PCMBase.Value.NA = -1e20)
options(PCMBase.Lmr.mode = 11)
options(PCMBase.Threshold.EV = 1e-7)

options(width = 120)
dtSimulationFitsNULL <- fits_MGPM_A_F_best_clade_2_RR_t5_NULL[
    , cbind(.SD, data.table(fitType2   =   "MGPM A-F RCP B.2 RR AIC q=20"      ),
            data.table(
              deltaNumRegimes = 
                sapply(clusterNodes, function(cn) if(is.null(cn)) NA_real_ else length(unique(cn))) -
                sapply(IdGlob, function(idGlob) {
                  testData_t5_NULL[idGlob, length(unique(clusterNodes[[1]]))]
                }),
              deltaScore = score - 
                sapply(IdGlob, function(idGlob) {
                  testData_t5_NULL[IdGlob == idGlob, AIC[[1]]]
                })))]

dtSimulationFitsNULL[, fitType:=factor(
  fitType2, 
  levels = c("MGPM A-F RCP B.2 RR AIC q=20"))]

for(i in seq_len(nrow(dtSimulationFitsNULL))) {
  dtSimulationFitsNULL[
    i,
    c("TreeType", "TreeSize", "Mapping", "NumTraits") := {

      idx <- list(IdTree[[1]], 
                  IdClusteringForTree[[1]], 
                  IdMappingForClustering[[1]], 
                  numTraits[[1]],
                  IdParamForMapping[[1]])

      subTestData <- testData_t5_NULL[
        idx, list(treeType, treeSize, mapping, numTraits)]

      list(
        subTestData$treeType[1], 
        subTestData$treeSize[1],
        sapply(
          subTestData$mapping[1], 
          function(m) {
            do.call(paste0, as.list(names(MGPMDefaultModelTypes())[m]))
          }),
        subTestData$numTraits[1])
    }]
}

dtSimulationFitsNULL[
  , TreeType2:=factor(
    TreeType, levels = c("ultrametric", "non-ultrametric"), 
    labels = c("Ultr.", "Non-ultr."))]

setnames(dtSimulationFitsNULL, c("TreeType", "TreeType2"), c("TreeType2", "TreeType"))

dtSimulationFitsNULL[
  , TreeSize2:=factor(TreeSize, levels = paste0("N=", c(80,159,318,638)))]

setnames(dtSimulationFitsNULL, c("TreeSize", "TreeSize2"), c("TreeSize2", "TreeSize"))

dtSimulationFitsNULL[
  , NumRegimes:=stringr::str_length(Mapping)]

usethis::use_data(dtSimulationFitsNULL, overwrite = TRUE)
dtFigNULL <- dtSimulationFitsNULL[
  IdGlob %in% testData_t5_NULL_fittedIds, 
  list(
    TreeType = unique(TreeType),
    TreeType2 = unique(TreeType2),
    TreeSize = unique(TreeSize),
    Mapping = unique(Mapping),
    NumTraits = unique(NumTraits),

    perf_Cluster_tpr = mean(perf_Cluster_tpr, na.rm = TRUE),
    perf_Cluster_fpr = mean(perf_Cluster_fpr, na.rm = TRUE),
    perf_BM_tpr = mean(perf_BM_tpr, na.rm = TRUE),
    perf_BM_fpr = mean(perf_BM_fpr, na.rm = TRUE),
    perf_OU_tpr = mean(perf_OU_tpr, na.rm = TRUE),
    perf_OU_fpr = mean(perf_OU_fpr, na.rm = TRUE),
    perf_Uncorrelated_tpr = mean(perf_Uncorrelated_tpr, na.rm = TRUE),
    perf_Uncorrelated_fpr = mean(perf_Uncorrelated_fpr, na.rm = TRUE),
    perf_Correlated_tpr = mean(perf_Correlated_tpr, na.rm = TRUE),
    perf_Correlated_fpr = mean(perf_Correlated_fpr, na.rm = TRUE),
    deltaNumRegimes = mean(deltaNumRegimes, na.rm = TRUE),
    deltaScore = mean(deltaScore, na.rm = TRUE)
    ),
  keyby = list(
    IdTree, 
    IdClusteringForTree, 
    IdMappingForClustering, 
    numTraits, 
    IdParamForMapping, 
    IdSimulationForParam)]

dtFigNULL[, IdParamSimul:=apply(
  cbind(IdParamForMapping, IdSimulationForParam), 1, function(r) {
    #paste0(match(r[1], c(1, 3)), ":", r[2])
    paste0(r[1], ":", r[2])
  })]

usethis::use_data(dtFigNULL, overwrite = TRUE)
paletteName <- "RdBu"
mypalette <- brewer.pal(7, paletteName)

plScore <- ggplot(
  data = dtFigNULL,
  aes(factor(IdParamSimul), 
      factor(NumTraits, levels = c(2, 4, 6, 8)), 
      fill = deltaScore)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(
    low = mypalette[7], high=mypalette[1],
    na.value = "grey98", 
    space = "Lab",
    name="Score\ndifference\n") +

  theme_grey() + 
  coord_fixed(ratio = 1) + 
  xlab("Parameter:Simulation") + 
  ylab("Number of traits (k)") +
  theme(
    axis.text = element_text(size = 8),
    legend.position = "right", 
    strip.text = element_text(size = 7)) +
  facet_grid(TreeSize ~ TreeType + factor(Mapping, levels = unique(Mapping))) +
  theme(legend.position = "bottom")

plRegimes <- ggplot(
  data = dtFigNULL,
  aes(factor(IdParamSimul), 
      factor(NumTraits, levels = c(2, 4, 6, 8)), 
      fill = factor(
        as.integer(deltaNumRegimes), levels = seq(0, 4, by =1))))+
  geom_tile(color = "white") +
  scale_fill_brewer(
    palette = paletteName,
    direction = -1,
    limits = c("0", "1", "2", "3"), 
    na.value = "grey98", 
    name="Number of regimes\ndifference\n") +

  theme_grey() + 
  coord_fixed(ratio = 1) + 
  xlab("Parameter:Simulation") + 
  ylab("Number of traits (k)") +
  theme(
    axis.text = element_text(size = 8),
    legend.position = "right", 
    strip.text = element_text(size = 7)) +
  facet_grid(TreeSize ~ TreeType + factor(Mapping, levels = unique(Mapping))) +
  theme(legend.position = "bottom")


plCluster <- ggplot(
  data = dtFigNULL,
  aes(factor(IdParamSimul), 
      factor(NumTraits, levels = c(2, 4, 6, 8)), 
      fill = perf_Cluster_tpr)) +
  geom_tile(color = "white") +
  scale_fill_distiller(
    palette = paletteName,
    direction = 1,
    na.value = "grey98",
    name="Cluster tpr\n") +

  theme_grey() + 
  coord_fixed(ratio = 1) + 
  xlab("Parameter:Simulation") + 
  ylab("Number of traits (k)") +
  theme(
    axis.text = element_text(size = 8),
    legend.position = "right", 
    strip.text = element_text(size = 7)) +
  facet_grid(TreeSize ~ TreeType + factor(Mapping, levels = unique(Mapping))) +
  theme(legend.position = "bottom")

plBM <- ggplot(
  data = dtFigNULL,
  aes(factor(IdParamSimul), 
      factor(NumTraits, levels = c(2, 4, 6, 8)), 
      fill = perf_OU_fpr)) +
  geom_tile(color = "white") +
  scale_fill_distiller(
    palette = paletteName,
    direction = -1,
    na.value = "grey98",
    name="OU fpr\n") +

  theme_grey() + 
  coord_fixed(ratio = 1) + 
  xlab("Parameter:Simulation") + 
  ylab("Number of traits (k)") +
  theme(
    axis.text = element_text(size = 8),
    legend.position = "right", 
    strip.text = element_text(size = 7)) +
  facet_grid(TreeSize ~ TreeType + factor(Mapping, levels = unique(Mapping))) +
  theme(legend.position = "bottom")

plCorrelatedTPR <- ggplot(
  data = dtFigNULL,
  aes(factor(IdParamSimul), 
      factor(NumTraits, levels = c(2, 4, 6, 8)), 
      fill = perf_Correlated_tpr)) +
  geom_tile(color = "white") +
  scale_fill_distiller(
    palette = paletteName,
    direction = 1,
    na.value = "grey98",
    name="Correlated tpr\n") +

  theme_grey() + 
  coord_fixed(ratio = 1) + 
  xlab("Parameter:Simulation") + 
  ylab("Number of traits (k)") +
  theme(
    axis.text = element_text(size = 8),
    legend.position = "right", 
    strip.text = element_text(size = 7)) +
  facet_grid(TreeSize ~ TreeType + factor(Mapping, levels = unique(Mapping))) +
  theme(legend.position = "bottom")

plCorrelatedFPR <- ggplot(
  data = dtFigNULL,
  aes(factor(IdParamSimul), 
      factor(NumTraits, levels = c(2, 4, 6, 8)), 
      fill = perf_Correlated_fpr)) +
  geom_tile(color = "white") +
  scale_fill_distiller(
    palette = paletteName,
    direction = -1,
    na.value = "grey98",
    name="Correlated fpr\n") +

  theme_grey() + 
  coord_fixed(ratio = 1) + 
  xlab("Parameter:Simulation") + 
  ylab("Number of traits (k)") +
  theme(
    legend.position = "right", 
    strip.text = element_text(size = 7)) +
  facet_grid(TreeSize ~ TreeType + factor(Mapping, levels = unique(Mapping))) +
  theme(legend.position = "bottom")

cowplot::plot_grid(plScore, plRegimes, plCluster, plBM, plCorrelatedTPR, plCorrelatedFPR, labels = LETTERS[1:6], nrow = 3)
testData_t5_NULL[testData_t5_fittedIds][treeType=="ultrametric" & IdMappingForClustering==2&numTraits==4 & nobs == 80, IdGlob]

id <- 41

tree <- testData_t5_NULL$treeWithRegimes[[id]]

values <- testData_t5_NULL$X[[id]][, seq_len(PCMTreeNumTips(tree))]
rownames(values) <- paste0("V", seq_len(nrow(values)))

trueModel <- testData_t5_NULL$model[[id]]

#logLikFun <- PCMCreateLikelihood(X = values, tree = tree, model = trueModel)

#guessParam <- GuessInitVecParams(o = trueModel, tree = tree, X = values)

bestModel <- fits_MGPM_A_F_best_clade_2_RR_t5_NULL[IdGlob == id]$model[[1]]

objPPRC <- phyl.pca(tree, t(values))

valuesPPRC <- t(objPPRC$S[seq_len(PCMTreeNumTips(tree)), ])

rotatedModel <- bestModel


rotatedModel$`1`$Sigma_x[,,1] <- 
  chol(t(objPPRC$Evec) %*% 
         ((rotatedModel$`1`$Sigma_x[,,1]) %*% 
            t(rotatedModel$`1`$Sigma_x[,,1])) %*% 
         objPPRC$Evec)


cat("Det objPRC$rotation:\n")
print(det(objPPRC$Evec))
cat("objPPRC$Evec%*%t(objPPRC$Evec):\n")
print(objPPRC$Evec%*%t(objPPRC$Evec))
cat("True model:\n")
PCMLik(X = values, tree = tree, model = trueModel)
cat("Best model (original data):\n")
PCMLik(X = values, tree = tree, model = bestModel)
cat("Rotated best model (rotated data):\n")
rotatedModel$X0[] <- NA
PCMLik(X = valuesPPRC, tree = tree, model = rotatedModel)

bestModel$`1`$Sigma_x[,,1]%*%t(bestModel$`1`$Sigma_x[,,1])
rotatedModel$`1`$Sigma_x[,,1]%*%t(rotatedModel$`1`$Sigma_x[,,1])

modelAOnOriginalData <- MixedGaussian(
  k = PCMNumTraits(trueModel), 
  modelTypes = MGPMDefaultModelTypes(), 
  mapping = c(`1`=1),
  Sigmae_x = attr(trueModel, "spec")$Sigmae_x)

PCMParamSetByName(modelAOnOriginalData, bestModel, deepCopySubPCMs = TRUE)
modelAVec <- PCMParamGetShortVector(modelAOnOriginalData)
modelAOnOriginalData$`1`$Sigma_x[] <- 0
PCMParamLoadOrStore(modelAOnOriginalData, modelAVec, 0, PCMNumTraits(modelAOnOriginalData), R = PCMNumRegimes(modelAOnOriginalData), load = TRUE)

modelAOnRotatedData <- MixedGaussian(
  k = PCMNumTraits(trueModel), 
  modelTypes = MGPMDefaultModelTypes(), 
  mapping = c(`1`=1),
  Sigmae_x = attr(trueModel, "spec")$Sigmae_x)

PCMParamSetByName(modelAOnRotatedData, rotatedModel, deepCopySubPCMs = TRUE)
modelAVec <- PCMParamGetShortVector(modelAOnRotatedData)
modelAOnRotatedData$`1`$Sigma_x[] <- 0
PCMParamLoadOrStore(modelAOnRotatedData, modelAVec, 0, PCMNumTraits(modelAOnRotatedData), R = PCMNumRegimes(modelAOnRotatedData), load = TRUE)

cat("model A, original data:")
PCMLik(X = values, tree = tree, model = modelAOnOriginalData)
cat("model A, rotated data:")
PCMLik(X = valuesPPRC, tree = tree, model = modelAOnRotatedData)

fitModelAOnOriginalData <- PCMFit(
   X = values, model = modelAOnOriginalData, tree = tree, metaI = PCMInfoCpp,
  numGuessInitVecParams = 50000, numCallsOptim = 50)

fitModelBOnOriginalData <- PCMFit(
  X = values, model = bestModel, tree = tree, metaI = PCMInfoCpp,
  numGuessInitVecParams = 50000, numCallsOptim = 50)

fitModelAOnRotatedData <- PCMFit(
  X = valuesPPRC, model = modelAOnRotatedData, tree = tree, metaI = PCMInfoCpp,
  numGuessInitVecParams = 50000, numCallsOptim = 50)

fitModelBOnRotatedData <- PCMFit(
  X = valuesPPRC, model = rotatedModel, tree = tree, metaI = PCMInfoCpp,
  numGuessInitVecParams = 50000, numCallsOptim = 50)

logLik(fitModelAOnOriginalData)
logLik(fitModelBOnOriginalData)
logLik(fitModelAOnRotatedData)
logLik(fitModelBOnRotatedData)

AIC(fitModelAOnOriginalData)
AIC(fitModelBOnOriginalData)
AIC(fitModelAOnRotatedData)
AIC(fitModelBOnRotatedData)
library(PCMBase)
library(PCMFit)
library(MGPMSimulations)
library(ape)

# id simulation for 
id <- 41
testData_t5_NULL[
  id, 
  list(
    N = nobs, 
    k = numTraits, p = df, 
    `model-type` = LETTERS[1:6][unlist(mapping)],
    `logLik(true model)` = unlist(logLik),
    `AIC(true model)` = unlist(AIC))]

# phylogeny
tree <- testData_t5_NULL$treeWithRegimes[[id]]
# number of tips in tree
N <- PCMTreeNumTips(tree)

# X: design matrix (each column vector corresponds to one trait)
X <- t(testData_t5_NULL$X[[id]][, seq_len(N)])
colnames(X) <- paste0("V", seq_len(ncol(X)))

# True model B used to simulate the data
trueModelOnOriginalData <- testData_t5_NULL$model[[id]]
# number of traits
k <- PCMNumTraits(trueModelOnOriginalData)

# Best model found using the RCP algorithm
bestModelOnOriginalData <- 
  fits_MGPM_A_F_best_clade_2_RR_t5_NULL[IdGlob == id]$model[[1]]

# matrix representation of the tree (using the vcv function from package ape)
C <- vcv(tree)
# column vector of N 1's
one <- rep(1, N)

# column vector b
b <- as.vector(
  t(solve(t(one) %*% solve(C) %*% one) %*% t(one) %*% solve(C) %*% X))
# the R matrix
R <- (N-1)^(-1) * t(X - one %*% t(b)) %*% solve(C) %*% (X - one %*% t(b))
# the V matrix
V <- eigen(R)$vectors

# The pPC scores matrix
S <- X %*% V - one %*% (t(b) %*% V)

rotatedBestModelOnOriginalData <- bestModelOnOriginalData
rotatedBestModelOnOriginalData$X0[] <-
  rotatedBestModelOnOriginalData$X0[] %*% V - 1 %*% (t(b) %*% V)

rotatedBestModelOnOriginalData$`1`$Sigma_x[,,1] <- 
  chol(t(V)%*% rotatedBestModelOnOriginalData$`1`$Sigma_x[,,1] %*%
         t(rotatedBestModelOnOriginalData$`1`$Sigma_x[,,1]) %*% V)

modelAFromRotatedBestModelOnOriginalData <- MixedGaussian(
  k = k, 
  modelTypes = MGPMDefaultModelTypes(), 
  mapping = c(`1`=2),
  Sigmae_x = structure(
    0, class = c("MatrixParameter", "_Omitted"), 
    description = "Omitted upper triangular Choleski factor of the non-phylogenetic variance-covariance"))

modelAFromRotatedBestModelOnOriginalData$X0[] <- 
  rotatedBestModelOnOriginalData$X0

diag(modelAFromRotatedBestModelOnOriginalData$`1`$Sigma_x[,,1]) <- 
  diag(rotatedBestModelOnOriginalData$`1`$Sigma_x[,,1])

PCMLik(X = t(X), tree = tree, model = bestModelOnOriginalData)
PCMLik(X = t(S), tree = tree, model = rotatedBestModelOnOriginalData)
PCMLik(X = t(S), tree = tree, model = modelAFromRotatedBestModelOnOriginalData)



venelin/MGPMSimulations documentation built on July 20, 2019, 11:03 p.m.