R/collect_esses.R

Defines functions collect_esses

Documented in collect_esses

#' @title Collect esses
#' @description Collect esses
#' @inheritParams default_params_doc
#' @return a dataframe with parameters and esses
#' @author Giovanni Laudanno, Richèl J.C. Bilderbeek
#' @export
collect_esses <- function(
  project_folder_name = getwd()
) {
  razzo::check_project_folder_name(project_folder_name) # nolint

  ##### Satisfy R CMD check #####
  tree <- NULL; rm(tree) # nolint, fixes warning: no visible binding for global variable
  seed <- NULL; rm(seed) # nolint, fixes warning: no visible binding for global variable
  best_or_gen <- NULL; rm(best_or_gen) # nolint, fixes warning: no visible binding for global variable

  # retrieve information from files
  folder <- razzo::get_data_paths(project_folder_name, full_names = FALSE) # nolint internal function
  paths <- file.path(project_folder_name, folder)

  # information needed
  traces_names <- c(
    "Sample",
    "posterior",
    "likelihood"
  )
  setting_string_names <- c(
    "tree",
    "best_or_gen"
  )
  esses <- data.frame(matrix(
    NA,
    ncol = 1 + length(setting_string_names) + 1
  ))
  colnames(esses) <- c(
    "folder",
    "ess_likelihood",
    setting_string_names
  )
  # Reading files and store data
  i <- 0
  for (p in seq_along(paths)) {
    parameters <- readRDS(file.path(paths[p], "parameters.RDa")) # nolint internal function
    burn_in_fraction <-
      parameters$pir_params$error_measure_params$burn_in_fraction
    # pirouette checks that all experiments' MCMCs are identical
    sample_interval <-
      parameters$pir_params$experiments[[1]]$inference_model$mcmc$store_every
    files_log <- list.files(paths[p], pattern = "*.log")
    # In a failed run, there may not file
    if (length(files_log) == 0) next
    files_log <- files_log[!grepl(pattern = "evidence", x = files_log)]
    if (length(files_log) == 0) next
    for (f in seq_along(files_log)) {
      is_twin <- grepl(files_log[f], pattern = "twin")
      is_best <- grepl(files_log[f], pattern = "best")
      is_generative <- grepl(files_log[f], pattern = "gen")
      x <- utils::read.table(
        file.path(paths[p], files_log[f]),
        row.names = NULL
      )
      x1 <- x[1, ]
      x2 <- x[-1, ]
      colnames(x2) <- levels(droplevels(
        unlist(
          x1,
          use.names = FALSE
        )
      ))
      x3 <- x2[, colnames(x2) %in% traces_names]
      data_table <- data.frame(x3, row.names = NULL)
      if (is_twin == TRUE) {
        data_table$tree <- "twin"
      } else {
        data_table$tree <- "true"
      }
      if (is_best == TRUE) {
        data_table$best_or_gen <- "best"
      }
      if (is_generative == TRUE) {
        data_table$best_or_gen <- "gen"
      }
      razzo::check_traces_in_df(traces_names = traces_names, df = data_table)
      testit::assert(all(traces_names %in% names(data_table)))
      traces <- data.frame(apply(
        data.frame(data_table[, traces_names]),
        MARGIN = 2,
        FUN = as.numeric
      ))
      # Remove burn-ins, burn_in_fraction obtained earlier
      clean_traces <- tracerer::remove_burn_ins(
        traces = traces,
        burn_in_fraction = burn_in_fraction
      )
      # Calculate the correct ESSes, sample_interval obtained earlier
      i <- i + 1
      esses[i, ]$ess_likelihood <- unlist(tracerer::calc_esses(
        clean_traces,
        sample_interval = sample_interval
      ))["likelihood"]
      for (par_name in setting_string_names) {
        par_value <- unique(unname(data_table[names(data_table) == par_name]))
        testit::assert(length(par_value) == 1)
        esses[i, ][names(esses) == par_name] <- par_value
      }
      esses[i, ]$folder <- folder[p]
    }
  }

  esses$tree <- as.factor(esses$tree)
  esses$best_or_gen <- as.factor(esses$best_or_gen)

  # Remove duplicates (no idea how they got in)
  esses <- unique(esses)

  # Order by true/twin then gen/best
  plyr::arrange(df = esses, folder, tree, plyr::desc(best_or_gen))
}
richelbilderbeek/razzo documentation built on March 10, 2021, 1:17 p.m.