library(PCMBase)
library(PCMBaseCpp)
library(PCMFit)
library(data.table)
library(MGPMSimulations)
library(fpc)

options(PCMBase.Value.NA = -1e20)
options(PCMBase.Lmr.mode = 11)
options(PCMBase.Threshold.EV = 1e-9)

For all simulated data in t5, the first step has been to fit the true model. This had two objectives:

For all true model fits, the MLE has been searched for through multiple gradient descent runs starting from different initial points in the model parameter space.

# Let id be the integer index of the row for a dataset in testData_t5.
id <- 1

options(PCMBase.Value.NA = -1e20)
options(PCMBase.Lmr.mode = 11)
options(PCMBase.Threshold.EV = 1e-7)

tree <- testData_t5$treeWithRegimes[[id]]
values <- testData_t5$X[[id]][, 1:PCMTreeNumTips(tree)]
model <- testData_t5$model[[id]]
vecParams <- PCMParamGetShortVector(model, k = PCMNumTraits(model), R = PCMNumRegimes(model))
# Generating initial parameter values. 
matParInit <- matrix(vecParams, nrow = 1L)

# Calling PCMFit
fit <- PCMFit(

  X = values, tree = tree, model = model, positiveValueGuard = 10000,

  metaI = PCMInfoCpp,

  matParInit = matParInit,

  numCallsOptim = 400L,
  numRunifInitVecParams = 100000L,
  numGuessInitVecParams = 50000L,

  verbose = FALSE)
# Generating initial parameter values. 
matParInit1 <- matrix(vecParams, nrow = 1L)
matParInitGuess <- GuessInitVecParams(model, n = 10000)
matParInitGuessVaryTheta <- GuessInitVecParams(model, n = 10000, varyTheta = TRUE)
matParInitJitter <- jitterModelParams(
  model,
  numJitterRootRegimeFit = 10000,  sdJitterAllRegimeFits = 0.05,
  numJitterAllRegimeFits = 10000, sdJitterRootRegimeFit = 0.05)

matParInit <- rbind(matParInit1,
                     matParInitGuess,
                     matParInitGuessVaryTheta,
                     matParInitJitter),

# Calling PCMFit
fit <- PCMFit(

  X = values, tree = tree, model = model, positiveValueGuard = 10000,

  metaI = PCMInfoCpp,

  matParInit = matParInit,

  numCallsOptim = 60L,
  numRunifInitVecParams = 0L,
  numGuessInitVecParams = 0L,

  verbose = FALSE)

Loading the fit-objects in memory

load("testData_t5_trueDistributions.RData")
resultData_t5_fitTrueModels <- rbindlist(
  lapply(
    seq_len(nrow(testData_t5)),
    #110:113,
    function(id) {
      if(id %% 10L == 0) {
        cat("id:", id, "\n")
      }

      prefixFiles = paste0("FitTrueModel_t5_id_", id) 
      file <- paste0("Results_t5_FitTrueModel_t1/", prefixFiles, 
                     "/FinalResult_", prefixFiles, ".RData")
      if(file.exists(file)) {
        load(file)
        fit_1 <- fit
      } else {
        fit_1 <- structure(0.0, class = "NAFIT")
      }

      prefixFiles = paste0("FitTrueModel_t5_2_id_", id) 
      file <- paste0("Results_t5_FitTrueModel_t2/", prefixFiles, 
                     "/FinalResult_", prefixFiles, ".RData")
      if(file.exists(file)) {
        load(file)
        fit_2 <- fit
      } else {
        fit_2 <- structure(0.0, class = "NAFIT")
      }

      logLik.NAFIT <- AIC.NAFIT <- function(object, ...) as.double(NA)

      fit <- if(!is.na(logLik(fit_1)) && logLik(fit_1) > logLik(fit_2)) fit_1 else fit_2

      model <- fit$modelOptim

      tree <- testData_t5$treeWithRegimes[[id]]
      X <- testData_t5$X[[id]]

      metaI <- PCMInfo(X, tree, model)

      mu <- as.vector(PCMMean(tree, model, metaI = metaI))
      Sigma <- PCMVar(tree, model, metaI = metaI)

      trueMu <- testData_t5_trueDistributions[
        testData_t5[
          id, 
          list( 
            IdTree, IdClusteringForTree, 
            IdMappingForClustering, IdParamForMapping)], mu[[1L]]]
      trueSigma <- testData_t5_trueDistributions[
        testData_t5[
          id, 
          list( 
            IdTree, IdClusteringForTree, 
            IdMappingForClustering, IdParamForMapping)], Sigma[[1L]]]

      dMahalanobis <- mahalanobis(mu, trueMu, trueSigma)
      dBhattacharyya <- bhattacharyya.dist(mu, trueMu, Sigma, trueSigma)

      testData_t5[id, 
                  list(
                    IdGlob, IdTree, IdClusteringForTree, 
                    IdMappingForClustering, IdParamForMapping, IdSimulationForParam,
                    clusterNodes, mapping,
                    model = model,
                    logLik = logLik(fit), AIC = AIC(fit), 
                    dMahalanobis = dMahalanobis, 
                    dBhattacharyya = dBhattacharyya,
                    logLik_fit1 = logLik(fit_1), AIC_fit1 = AIC(fit_1), 
                    logLik_fit2 = logLik(fit_2), AIC_fit2 = AIC(fit_2))]
    }))

usethis::use_data(resultData_t5_fitTrueModels, overwrite = TRUE)
dtLogLiks[, summary(logLik_1 - logLik)]
dtLogLiks[, summary(logLik_2 - logLik)]
dtLogLiks[, summary(logLik_2 - logLik_1)]

par(mfrow = c(2, 1))

pl <- dtLogLiks[, hist(logLik_1 - logLik, breaks = 100, xlim = c(0, 32))]
pl <- dtLogLiks[, hist(logLik_2 - logLik, breaks = 100, xlim = c(0, 32))]


venelin/MGPMSimulations documentation built on July 20, 2019, 11:03 p.m.