R/load_nm_run.R

Defines functions load_nm_run_directory

Documented in load_nm_run_directory

#' Load a NONMEM run data from a directory
#'
#' @param control_stream \code{nonmem_control_stream} object. Control stream data, obtained from a prior
#'   call to \code{parse_nm_control_stream()} function.
#'
#' @inheritParams load_nm_run
#'
load_nm_run_directory <-
  function(path,
             dataset_separator = NA,
             dataset_na_string = ".",
             read_initial_values = TRUE,
             load_tables = TRUE,
             control_stream = NULL,
             update_progress = NULL,
             verbose = FALSE) {
    run_directory <- path
    run_directory <- normalizePath(run_directory)

    if (!dir.exists(run_directory)) {
      stop(simpleError("Run directory not found."))
    }

    loop <- 1
    while (loop != 0) {
      run_files <- list.files(run_directory, full.names = TRUE) %>%
        .[file.exists(.)] %>%
        normalizePath() %>%
        unique() # needed because of symbolik links

      run_files_df <- tibble(file = run_files,
                             name = basename(file),
                             name_sans_ext = tools::file_path_sans_ext(name),
                             ext = tools::file_ext(name),
                             datetime = file.info(run_files)$mtime)

      run_xml_file <- run_files_df %>%
        filter(tolower(ext) == "xml") %>%
        arrange(desc(datetime)) %>%
        slice(1)

      xml_file_found <- (nrow(run_xml_file) == 1)

      if (xml_file_found) {

        run_results_files <- run_files_df %>%
          filter(name_sans_ext == run_xml_file$name_sans_ext)

        control_stream_file <- run_results_files %>% filter(ext %in% c("con", "mod", "ctl", "nmctl")) %>% pull(file)
        estimation_file <- run_results_files %>% filter(ext == "ext") %>% pull(file)
        phi_file <- run_results_files %>% filter(ext == "phi") %>% pull(file)
        xml_file <- run_results_files %>% filter(ext == "xml") %>% pull(file)
        report_file <- run_results_files %>% filter(ext %in% c("rep", "lst", "out", "res")) %>% pull(file) %>% first()

        run_name <- run_xml_file$name_sans_ext

        break
      }

      if (!xml_file_found && is.null(control_stream)) {
        stop(simpleError("No XML document found in run files."))
      }

      loop <- loop + 1

      if (loop != 0) {
        print("No XML document found in run files, retrying.")
        Sys.sleep(3)
      }

      if (loop > 3) {
        stop(simpleError("No XML document found in run files"))
      }
    }

    if (is.function(update_progress)) {
      update_progress(detail = "Reading XML data")
    }

    xml_lines <- read_lines(xml_file) %>% na.omit() %>% str_replace_all("\\&", "and") %>% str_trim()

    # XML file parsing
    # safe_xml_parsing <- safely(xmlParse)
    quiet_parse <- quietly(xmlParse)

    # load_xml <- safe_xml_parsing(xml_lines,
    #   encoding = "UTF-8",
    #   options = 1
    # ) # recover on error

    load_xml_q <- quiet_parse(xml_lines,
                                 encoding = "UTF-8",
                                 options = 1
    ) # recover on error

    if (!is.null(load_xml_q$output) && load_xml_q$output != "") {

      # bug in NONMEM 7.4.1: monitor tag mismatch
      err_msg <- load_xml_q$output
      err_pattern <- "Opening and ending tag mismatch: monitor line (\\d+)"
      if (str_detect(err_msg, err_pattern)) {
        err_line <- str_match_all(err_msg, err_pattern)[[1]][, 2] %>% as.integer()

        # 2nd try
        load_xml_q <- quiet_parse(xml_lines[-err_line], encoding = "UTF-8")
      }

      if (!is.null(load_xml_q$error)) {
        stop(load_xml_q$error)
      }
    }

    xml_report <- load_xml_q$result

    root_node <- xmlRoot(xml_report)
    nonmem_node <- root_node[["nonmem"]]

    if (is.null(nonmem_node)) {
      stop(simpleError("NONMEM run not recognized: XML result file is invalid."))
    }

    problem_node <- nonmem_node[["problem"]]

    nm_version <- nonmem_node %>% xmlGetAttr(name = "version")

    if (nm_version < numeric_version("7.2")) {
      stop(simpleError(sprintf("NONMEM version %s is not supported, please use NONMEM 7.2 or higher.", nm_version)))
    }

    # report file
    report <- NULL

    if (!is.na(report_file)) {
      report <- read_file(report_file)
    }  # , locale = readr::locale(encoding = "ISO-8859-1"))

    # control stream
    cs_text <- root_node[["control_stream"]] %>% xmlValue()
    # control_stream <- read_file(file = control_stream_file)

    cs_data <- control_stream

    if (is.null(cs_data)) {
      safe_parse_cs <- safely(parse_nm_control_stream)

      safe_cs <- safe_parse_cs(content = cs_text, read_initial_values = read_initial_values, verbose = verbose)

      if (!is.null(safe_cs$error)) {
        stop(simpleError("Error reading control stream file."))
      }

      cs_data <- safe_cs$result
    }

    dv_name <- "DV"

    # if dependent column has another name than default 'DV'
    if (nrow(dv_alias_row <- cs_data$input %>% filter(synonym == "DV"))) {
      dv_name <- dv_alias_row$column
    }

    dataset_file <-
      run_files[str_detect(run_files, fixed(str_replace_all(basename(cs_data$dataset_file), "\\.", "\\\\.")))]

    if (length(dataset_file) == 0 || !file.exists(dataset_file)) {
      # if dataset not found in run folder, look up the file tree
      merged_path <- str_c(run_directory, cs_data$dataset_file, sep = "/")

      if (!file.exists(normalizePath(merged_path))) {
        stop(simpleError(sprintf("Dataset %s not found in run files.", cs_data$dataset_file)))
      }

      dataset_file <- merged_path
    }


    estimations <- NULL
    last_estimation <- NULL
    if (!is.null(problem_node)) {
      # estimations
      estimation_nodes <-
        xmlElementsByTagName(problem_node, "estimation")
      # estimation_nodes <- getNodeSet(problem_node, namespaces = "nm", "//nm:estimation")

      if (is.function(update_progress)) {
        update_progress(detail = "Reading estimation informations")
      }

      # remove CHAIN estimations before digging into results
      useful_est_nodes <- Filter(function(x) {
        meth_node <- xmlElementsByTagName(x, "estimation_method")
        if (length(meth_node) == 0) {
          return(NULL)
        }
        return(xmlValue(meth_node[[1]]) != "chain")
      }, estimation_nodes)

      estimations <- map(seq_along(useful_est_nodes), function(i_node) {
        est_node <- useful_est_nodes[[i_node]]

        term_status <- NULL

        if (!is.null(est_node[["termination_status"]])) {
          term_status <- est_node[["termination_status"]] %>% xmlValue() %>% as.numeric()
        }

        estimation <-
          list(
            number = i_node,
            minimization = !is.null(term_status),
            termination_status = term_status,
            failed = ifelse(is.null(term_status),
              FALSE, # No estimation
              ifelse(is.na(term_status), TRUE, term_status == 8)
            ),
            final_ofv = est_node[["final_objective_function"]] %>% xmlValue() %>% as.numeric(),
            termination_information = est_node[["termination_information"]] %>% xmlValue() %>% str_trim(),
            method = est_node[["estimation_method"]] %>% xmlValue(),
            title = est_node[["estimation_title"]] %>% xmlValue(),
            nfuncevals = est_node[["termination_nfuncevals"]] %>% xmlValue() %>% as.numeric(),
            significant_digits = est_node[["termination_sigdigits"]] %>% xmlValue() %>% as.numeric(),
            eigenratio = NA,
            eigenvalues = NA,
            correlation = NA,
            duration = est_node[["estimation_elapsed_time"]] %>%
              xmlValue() %>%
              as.numeric()
          )

        if (verbose) {
          print(sprintf("Estimation %s (%s)", i_node, estimation$title))
        }

        # <nm:parallel_est nm:parafile='fpilinux8.pnm' nm:protocol='FPI' nm:nodes='8'/>
        parallel_node <- est_node[["parallel_est"]]

        if (!is.null(parallel_node)) {
          estimation$parallel <-
            list(
              protocol = parallel_node %>% xmlGetAttr("protocol"),
              nodes = parallel_node %>% xmlGetAttr("nodes") %>% as.numeric()
            )
        }

        msgs_node <- est_node[["termination_txtmsg"]]

        if (!is.null(msgs_node)) {
          term_msgs <- map_df(xmlChildren(msgs_node), function(x) {
            msg_id <- x %>% xmlValue()

            nonmem_txtmsgs[nonmem_txtmsgs$id == msg_id, ]
          }, .id = NULL)

          estimation$termination_messages <- term_msgs
        }

        # THETAs
        thetas_node <- est_node[["theta"]]
        if (!is.null(thetas_node)) {
          thetas_se_node <- est_node[["thetase"]]

          thetas_ids <-
            paste0("THETA", xmlSApply(thetas_node, function(x)
              xmlGetAttr(x, name = "name")))
          thetas_values <-
            xmlSApply(thetas_node, xmlValue) %>%
            as.numeric()

          if (!is.null(thetas_se_node)) {
            thetas_se <-
              xmlSApply(thetas_se_node, xmlValue) %>%
              as.numeric()

            # theta_se = 10000000000 <=> fixed theta
            thetas_se <- infinite_as_na(thetas_se)
          } else {
            thetas_se <- NA
          }

          # make a data frame...
          thetas_df <- tibble(
            id = thetas_ids,
            estimate = thetas_values,
            se = thetas_se,
            rse = abs(thetas_se / thetas_values),
            ci_low = thetas_values - 2 * thetas_se,
            ci_up = thetas_values + 2 * thetas_se
          )

          estimation$thetas <- thetas_df
        } else {
          estimation$failed <- TRUE
        }

        # SUBPOPs (if mixture model)
        subpop_names <- paste0("SUBPOP", seq_len(cs_data$subpopulations))

        # SHRINKAGEs
        eta_shrinkage <- ebv_shrinkage <- epsilon_shrinkage <- NULL

        # OMEGAs
        # eta bars
        eta_bar_node <- est_node[["etabar"]]
        eta_bar_se_node <- est_node[["etabarse"]]
        eta_bar_n_node <- est_node[["etabarn"]]
        eta_bar_pval_node <- est_node[["etabarpval"]]

        if (all(!is.null(c(eta_bar_node, eta_bar_se_node, eta_bar_n_node, eta_bar_pval_node)))) {
          eta_bar <- load_matrix(eta_bar_node)
          eta_bar_se <- load_matrix(eta_bar_se_node)
          eta_bar_n <- load_matrix(eta_bar_n_node)
          eta_bar_pval <- load_matrix(eta_bar_pval_node)

          etas_names <-
            xmlSApply(
              eta_bar_node %>% xmlChildren() %>% .[[1]],
              function(x)
                xmlGetAttr(x, name = "cname")
            ) %>%
            unname()

          etas_id_col <- rep(etas_names, cs_data$subpopulations)

          eta_bars_df <- tibble(
            id = etas_id_col,
            value = eta_bar[, 1],
            se = eta_bar_se[, 1]
          )

          if (!is.null(eta_bar_n)) {
            eta_bars_df$n <- eta_bar_n[, 1]
          }

          if (cs_data$subpopulations > 1) {
            eta_bars_df$subpop <- rep(subpop_names, each = length(etas_names))
          }

          if (!is.null(eta_bar_pval)) {
            eta_bars_df$pvalue <- eta_bar_pval[, 1]
          }

          estimation$eta_bars <- eta_bars_df
        }

        omega_node <- est_node[["omega"]]
        omegase_node <- est_node[["omegase"]]
        etas_ids <- NULL
        if (!is.null(omega_node)) {
          omega_matrix <- load_lower_triangular_matrix(omega_node)

          etas_ids <- setNames(seq_len(nrow(omega_matrix)), paste0("ETA", seq_len(nrow(omega_matrix))))
          colnames(omega_matrix) <- rownames(omega_matrix) <- names(etas_ids)

          omega_df <- load_matrix_estimate_table(
            estimate_matrix_node = est_node[["omega"]],
            se_matrix_node = omegase_node
          ) %>%
            rename(eta1 = row, eta2 = col) %>%
            mutate(
              eta1 = plyr::mapvalues(eta1, from = etas_ids, to = names(etas_ids), warn_missing = FALSE),
              eta2 = plyr::mapvalues(eta2, from = etas_ids, to = names(etas_ids), warn_missing = FALSE)
            )

          omegac_df <- load_matrix_estimate_table(
            estimate_matrix_node = est_node[["omegac"]],
            se_matrix_node = est_node[["omegacse"]]
          ) %>%
            rename(eta1 = row, eta2 = col) %>%
            mutate(
              eta1 = plyr::mapvalues(eta1, from = etas_ids, to = names(etas_ids), warn_missing = FALSE),
              eta2 = plyr::mapvalues(eta2, from = etas_ids, to = names(etas_ids), warn_missing = FALSE)
            )

          fixed_omegas_indices <- which(omega_df$estimate == 2.0000000000000000E-012)

          if (length(fixed_omegas_indices) > 0) {
            message(paste("Fixed omega indices", paste(fixed_omegas_indices, collapse = ", ")))
          }

          fixed_omegas <- setdiff(
            cs_data$parameters$definition %>% filter(type == "eta") %>% pull(id),
            unique(c(omega_df$eta1, omega_df$eta2))
          )

          estimation$omega <- omega_df %>%
            filter(!(row_number() %in% fixed_omegas_indices)) %>%
            mutate(cv = purrr::pmap_dbl(
              list(eta1 = eta1, eta2 = eta2, estimate = estimate),
              function(eta1, eta2, estimate) {
                ifelse(eta1 == eta2, sqrt(exp(estimate) - 1), NA_real_)
              }
            ))
          # mutate(cv = ifelse(eta1 == eta2, sqrt(exp(estimate) - 1), 0))

          estimation$omegac <- omegac_df %>% filter(!(row_number() %in% fixed_omegas_indices))
          estimation$omega_matrix <- omega_matrix

          if (!is.null(omegase_node)) {
            omegase_matrix <- load_lower_triangular_matrix(omegase_node)
            colnames(omegase_matrix) <- rownames(omegase_matrix) <- names(etas_ids)

            estimation$omegase_matrix <- omegase_matrix
          }

          # ETA EBV shrinkages
          eta_shrink_tag <- ifelse(nm_version < numeric_version("7.4"), "etashrink", "etashrinksd")
          ebv_shrink_tag <- ifelse(nm_version < numeric_version("7.4"), "ebvshrink", "ebvshrinksd")

          eta_shrinkage_matrix <- load_matrix(est_node[[eta_shrink_tag]])
          ebv_shrinkage_matrix <- load_matrix(est_node[[ebv_shrink_tag]])

          if (!is.null(eta_shrinkage_matrix)) {
            eta_shrinkage <- tibble(parameter = etas_id_col, shrinkage = eta_shrinkage_matrix[, 1] / 100)

            if (cs_data$subpopulations > 1) {
              eta_shrinkage$subpop <- rep(subpop_names, each = length(etas_names))
            }
          }
          if (!is.null(ebv_shrinkage_matrix)) {
            ebv_shrinkage <- tibble(parameter = etas_id_col, shrinkage = ebv_shrinkage_matrix[, 1] / 100)

            if (cs_data$subpopulations > 1) {
              ebv_shrinkage$subpop <- rep(subpop_names, each = length(etas_names))
            }
          }
        }

        # SIGMAs
        sigma_node <- est_node[["sigma"]]
        sigmase_node <- est_node[["sigmase"]]
        eps_ids <- NULL
        if (!is.null(sigma_node)) {
          sigma_matrix <- load_lower_triangular_matrix(sigma_node)

          eps_ids <- setNames(seq_len(nrow(sigma_matrix)), paste0("EPS", seq_len(nrow(sigma_matrix))))
          colnames(sigma_matrix) <- rownames(sigma_matrix) <- names(eps_ids)

          sigma_df <- load_matrix_estimate_table(
            estimate_matrix_node = est_node[["sigma"]],
            se_matrix_node = sigmase_node
          ) %>%
            rename(epsilon1 = row, epsilon2 = col) %>%
            mutate(
              epsilon1 = plyr::mapvalues(epsilon1, from = eps_ids, to = names(eps_ids), warn_missing = FALSE),
              epsilon2 = plyr::mapvalues(epsilon2, from = eps_ids, to = names(eps_ids), warn_missing = FALSE)
            )

          sigmac_df <- load_matrix_estimate_table(
            estimate_matrix_node = est_node[["sigmac"]],
            se_matrix_node = est_node[["sigmacse"]]
          ) %>%
            rename(epsilon1 = row, epsilon2 = col, sigmac_square = estimate) %>%
            mutate(
              epsilon1 = plyr::mapvalues(epsilon1, from = eps_ids, to = names(eps_ids), warn_missing = FALSE),
              epsilon2 = plyr::mapvalues(epsilon2, from = eps_ids, to = names(eps_ids), warn_missing = FALSE)
            )

          estimation$sigma <- sigma_df
          estimation$sigmac <- sigmac_df
          estimation$sigma_matrix <- sigma_matrix

          if (!is.null(sigmase_node)) {
            sigmase_matrix <- load_lower_triangular_matrix(sigmase_node)
            colnames(sigmase_matrix) <- rownames(sigmase_matrix) <- names(eps_ids)

            estimation$sigmase_matrix <- sigmase_matrix
          }

          # EPS shrinkage
          eps_shrink_tag <- ifelse(nm_version < numeric_version("7.4"), "epsshrink", "epsshrinksd")
          eps_shrinkage_matrix <- load_matrix(est_node[[eps_shrink_tag]])

          eps_id_col <- rep(names(eps_ids), cs_data$subpopulations)

          if (!is.null(eps_shrinkage_matrix)) {
            epsilon_shrinkage <- tibble(
              parameter = eps_id_col,
              shrinkage = eps_shrinkage_matrix[, 1] / 100
            )

            if (cs_data$subpopulations > 1) {
              epsilon_shrinkage$subpop <- rep(subpop_names, each = length(eps_ids))
            }
          }
        }

        # gathering SHRINKAGEs
        if (!all(map_lgl(list(eta_shrinkage, ebv_shrinkage, epsilon_shrinkage), is.null))) {
          estimation$shrinkage <- bind_rows(list(
            ETA = eta_shrinkage,
            EBV = ebv_shrinkage,
            EPS = epsilon_shrinkage
          ),
          .id = "type"
          )
        }

        # covariance
        covariance_matrix <- NULL
        covariance_node <- est_node[["covariance"]]

        if (!is.null(covariance_node)) {
          covariance_matrix <- load_lower_triangular_matrix(covariance_node)

          # item names
          cov_m_names <- map(
            covariance_node %>%
              xmlChildren() %>%
              .[[length(.)]] %>%
              xmlChildren(),
            function(x)
              xmlGetAttr(x, "cname")
          ) %>% unlist(use.names = FALSE)

          colnames(covariance_matrix) <- rownames(covariance_matrix) <- cov_m_names
        }

        estimation$covariance_matrix <- covariance_matrix

        # covariance
        correlation_matrix <- NULL
        correlation_node <- est_node[["correlation"]]

        if (!is.null(correlation_node)) {
          correlation_matrix <- load_lower_triangular_matrix(correlation_node)

          # item names
          cov_m_names <- map(
            correlation_node %>%
              xmlChildren() %>%
              .[[length(.)]] %>%
              xmlChildren(),
            function(x)
              xmlGetAttr(x, "cname")
          ) %>% unlist(use.names = FALSE)

          colnames(correlation_matrix) <- rownames(correlation_matrix) <- cov_m_names

          # look for correlation between parameters
          temp_correlation_matrix <- correlation_matrix
          diag(temp_correlation_matrix) <- NA

          estimation$correlation <- any(!is.na(temp_correlation_matrix) & abs(temp_correlation_matrix) >= 0.96)
        }

        estimation$correlation_matrix <- correlation_matrix

        # eigenvalues
        eigenvalues_node <- est_node[["eigenvalues"]]

        if (!is.null(eigenvalues_node)) {
          eigenvalues <-
            xmlSApply(eigenvalues_node, xmlValue) %>%
            as.numeric()

          estimation$eigenvalues <- eigenvalues
          estimation$eigenratio <- max(eigenvalues) / min(eigenvalues)
        }

        if (estimation$failed) {
          warning(simpleWarning(sprintf("Estimation %s (%s) failed.", estimation$number, estimation$method)))
        }

        estimation
      })

      not_failed_estimations <- Filter(function(x) {
        !x$failed
      }, estimations)

      last_estimation <- last(not_failed_estimations)
    }

    params_df <- cs_data$parameters$definition

    # if not explicitly written ETAXXX = ETA(1)... :
    params_df <- params_df %>%
      mutate(
        name = ifelse(is.na(name), id, name),
        n = ifelse(is.na(n), str_extract(id, "[0-9]+$"), n),
        column = NA
      )

    etas_df <- params_df %>% filter(type == "eta")

    consistent_omega_dim <- TRUE
    if (!is.null(last_estimation)) {
      omega_params_df <- sigma_params_df <- NULL

      if (!is.null(last_estimation$omega_matrix)) {
        if (nrow(etas_df) != ncol(last_estimation$omega_matrix)) {
          consistent_omega_dim <- FALSE
          warning(simpleWarning(sprintf(
            "The number of ETAs (%s) declared is not consistent with OMEGA matrix dimensions (%s).",
            nrow(etas_df), paste(rep(ncol(last_estimation$omega_matrix), 2), collapse = "x")
          )))
        }

        omega_n <- seq_len(ncol(last_estimation$omega_matrix))
        if (is_matrix_diagonal(last_estimation$omega_matrix)) {
          omega_x <- omega_n
          omega_y <- omega_n
        } else {
          omega_x <- rep(omega_n, omega_n)
          omega_y <- map(omega_n, function(x) 1:(omega_n)[x]) %>% unlist()
        }
        omega_x_names <- plyr::mapvalues(omega_x, from = etas_df$n, to = etas_df$name)
        omega_y_names <- plyr::mapvalues(omega_y, from = etas_df$n, to = etas_df$name)
        omega_ids <- sprintf("OMEGA(%s,%s)", omega_x, omega_y)

        omega_params_df <- tibble(
          id = sprintf("OMEGA(%s,%s)", omega_x, omega_y),
          type = "omega",
          name = sprintf("OMEGA(%s,%s)", omega_x_names, omega_y_names)
        )
      }

      if (!is.null(last_estimation$sigma_matrix)) {
        sigma_n <- seq_len(ncol(last_estimation$sigma_matrix))
        if (is_matrix_diagonal(last_estimation$sigma_matrix)) {
          sigma_x <- sigma_n
          sigma_y <- sigma_n
        } else {
          sigma_x <- rep(sigma_n, sigma_n)
          sigma_y <- map(sigma_n, function(x) 1:(sigma_n)[x]) %>% unlist()
        }

        sigma_params_df <- tibble(
          id = sprintf("SIGMA(%s,%s)", sigma_x, sigma_y),
          type = "sigma",
          name = sprintf("SIGMA(EPS%s,EPS%s)", sigma_x, sigma_y)
        )
      }

      params_df <- params_df %>% bind_rows(omega_params_df, sigma_params_df)
    }

    temp_p_df <- params_df %>% # look for duplicated names (e.g. IOV ETAs)
      group_by(name) %>%
      mutate(temp_name = pmap_chr(
        list(dplyr::n(), row_number(), name),
        ~ifelse(..1 > 1, str_c(name, "_", ..2), ..3)
      )) %>%
      ungroup()

    # update omega matrix and omega tables row/col names
    estimations <- map(estimations, function(x) {
      if (!is.null(x$thetas)) {
        x$thetas <- x$thetas %>%
          mutate(name = plyr::mapvalues(id, from = params_df$id, to = temp_p_df$temp_name, warn_missing = FALSE)) %>%
          select(id, name, everything())
      }

      if (!is.null(x$eta_bars)) {
        x$eta_bars <- x$eta_bars %>%
          mutate(name = plyr::mapvalues(id, from = params_df$id, to = temp_p_df$temp_name, warn_missing = FALSE)) %>%
          select(id, name, everything())
      }

      if (!is.null(x$omega)) {
        x$omega <- x$omega %>%
          mutate(
            eta1 = plyr::mapvalues(eta1, from = params_df$id, to = temp_p_df$temp_name, warn_missing = FALSE),
            eta2 = plyr::mapvalues(eta2, from = params_df$id, to = temp_p_df$temp_name, warn_missing = FALSE)
          )
      }

      if (!is.null(x$omegac)) {
        x$omegac <- x$omegac %>%
          mutate(
            eta1 = plyr::mapvalues(eta1, from = params_df$id, to = temp_p_df$temp_name, warn_missing = FALSE),
            eta2 = plyr::mapvalues(eta2, from = params_df$id, to = temp_p_df$temp_name, warn_missing = FALSE)
          )
      }

      if (!is.null(x$sigma)) {
        x$sigma <- x$sigma %>%
          mutate(
            epsilon1 = plyr::mapvalues(epsilon1, from = params_df$id, to = temp_p_df$temp_name, warn_missing = FALSE),
            epsilon2 = plyr::mapvalues(epsilon2, from = params_df$id, to = temp_p_df$temp_name, warn_missing = FALSE)
          )
      }

      if (!is.null(x$sigmac)) {
        x$sigmac <- x$sigmac %>%
          mutate(
            epsilon1 = plyr::mapvalues(epsilon1, from = params_df$id, to = temp_p_df$temp_name, warn_missing = FALSE),
            epsilon2 = plyr::mapvalues(epsilon2, from = params_df$id, to = temp_p_df$temp_name, warn_missing = FALSE)
          )
      }

      if (!is.null(x$shrinkage)) {
        x$shrinkage <- x$shrinkage %>%
          mutate(parameter = plyr::mapvalues(parameter, from = params_df$id, to = temp_p_df$temp_name, warn_missing = FALSE))
      }

      if (!is.null(x$omega_matrix) && consistent_omega_dim) {
        colnames(x$omega_matrix) <- temp_p_df %>% filter(type == "eta") %>% pull(temp_name)
        rownames(x$omega_matrix) <- temp_p_df %>% filter(type == "eta") %>% pull(temp_name)
      }

      x
    })

    if (length(estimation_file) == 1 && file.exists(estimation_file)) {
      ext_data <- parse_nm_ext(filepath = estimation_file)

      if (!is.null(ext_data) && nrow(ext_data) > 0) {
        ext_data <- ext_data %>% filter(problem == 1)

        merge_ext_data <- function(x) {
          iterations_data <- ext_data %>% filter(number == x$number) %>% pull(iterations) %>% .[[1]]

          if (nrow(iterations_data) == 0) return(NULL)

          iterations_data <- iterations_data %>%
            filter(ITERATION > -1e9) %>% # remove last lines
            mutate(row_index = row_number())

          it_params <- colnames(iterations_data)[colnames(iterations_data) %in% params_df$id]
          names(it_params) <- plyr::mapvalues(it_params, from = params_df$id, to = temp_p_df$temp_name, warn_missing = FALSE)

          iterations_data %>%
            select(-row_index) %>%
            rename(!!!it_params)
        }
        estimations <- estimations %>%
          modify_if(~.$number %in% ext_data$number, ~update_list(., iterations = ~merge_ext_data(.)))
      }
    }

    if (length(phi_file) == 1 && file.exists(phi_file)) {
      phi_data <- parse_nm_phi(filepath = phi_file)

      if (!is.null(phi_data) && nrow(phi_data) > 0) {
        phi_data <- phi_data %>% filter(problem == 1)

        if (!is.null(cs_data$covariance$sir)) { # do not read the SIR table
          phi_data <- phi_data %>% filter(method != "Importance Sampling of Variance-Covariance (SIR)")
        }

        # for(est_i in phi_data$number){
        #   estimations[[est_i]]$phi <- phi_data[est_i,]$individuals[[1]]
        # }

        estimations <- estimations %>%
          modify_if(~.$number %in% phi_data$number, ~update_list(., phi = ~phi_data[.$number, ]$individuals[[1]]))
      }
    }

    if (is.function(update_progress)) {
      update_progress(detail = basename(dataset_file))
    }


    # readr--------------
    data_lines <- read_lines(dataset_file)
    data_lines <- data_lines %>% str_trim()
    first_line_index <- 1


    lines_to_ignore <- vector("integer")
    lines_to_keep <- vector("integer")
    ignored_lines <- vector("character")

    if (cs_data$ignore$C) {
      lines_to_ignore <- which(!str_detect(data_lines, "^\\s*[^C]"))
    }

    if (cs_data$ignore$`#`) {
      lines_to_ignore <- c(lines_to_ignore, which(!str_detect(data_lines, "^\\s*[^#]")))
    }

    if (cs_data$ignore$`@`) {
      # id_lines_to_remove <- which(!str_detect(substr(data_lines, 1, 1), "[0-9\\.]"))
      # id_lines_to_remove <- str_which(data_lines, "^[a-zA-Z]")
      id_lines_to_remove <- str_which(data_lines, "^[^0-9,;]")

      lines_to_ignore <- c(lines_to_ignore, id_lines_to_remove)
    }

    if (!is.null(cs_data$ignore$txt) && !any(c(cs_data$ignore$C, cs_data$ignore$`#`, cs_data$ignore$`@`))) {
      lines_to_ignore <- which(!str_detect(data_lines, sprintf("^\\s*[^%s]", cs_data$ignore$txt)))
    }

    lines_to_keep <- setdiff(seq_along(data_lines), lines_to_ignore)

    if (length(lines_to_ignore) > 0) {
      ignored_lines <- data_lines[lines_to_ignore]
      data_lines <- data_lines[-lines_to_ignore]
    }

    first_data_line <- data_lines[first_line_index]

    if (is.na(dataset_separator)) {
      dataset_separator <- ifelse(str_detect(first_data_line, ","),
        ",",
        ifelse(str_detect(first_data_line, "\t"),
          "\t",
          " "
        )
      )
    }

    ds_readr_func <- switch(dataset_separator,
      "," = readr::read_csv,
      "\t" = readr::read_tsv,
      " " = readr::read_table2, {
        function(...) readr::read_delim(delim = dataset_separator, ...)
      }
    )

    has_header <- str_detect(str_trim(first_data_line), "[A-Za-z]")

    input_selected_cols <- cs_data$input %>% filter(!dropped)
    dropped_cols <- cs_data$input %>% filter(dropped)

    # analyse header
    if ((first_kept_line <- lines_to_keep[1]) > 1) {
      top_lines <- ignored_lines[lines_to_ignore < first_kept_line]

      ds_file_colnames <- vector("character")
      potential_colnames <- na.omit(c(input_selected_cols$column, input_selected_cols$synonym))

      for (t_line in rev(top_lines)) {
        # if(cs_data$ignore$C)
        #   t_line <- t_line %>% str_replace("^\\s*C", "")

        # ds_file_colnames <- str_split(t_line, dataset_separator) %>% unlist() %>% toupper()
        ds_file_colnames <- str_split(t_line, boundary("word")) %>% unlist() %>% toupper()

        # if half of the potential colnames (from $INPUT) are found in the lines, consider that it is the header
        if (mean(map_lgl(potential_colnames, ~any(str_detect(ds_file_colnames, word(.))))) > 0.5) {
          break
        }
      }

      if (length(ds_file_colnames) < nrow(cs_data$input)) {
        cs_data$input <- cs_data$input %>% filter(column %in% ds_file_colnames |
          synonym %in% ds_file_colnames)
      }
    }

    keep_indices <- which(!cs_data$input$dropped)


    dataset <- ds_readr_func(paste(data_lines, collapse = "\n"),
      na = dataset_na_string,
      guess_max = length(data_lines),
      # col_types = cols(.default = col_number()),
      col_names = FALSE
    )

    keep_indices <- keep_indices[keep_indices <= ncol(dataset)]

    dataset <- dataset %>% select(keep_indices)
    colnames(dataset) <- cs_data$input$column[keep_indices]

    cols_check <- input_selected_cols %>% mutate(is_present = column %in% colnames(dataset))

    if (any(!cols_check$is_present)) {
      # get the column from the column order in the dataset
      temp_df <- cols_check %>%
        mutate(n = row_number())

      cols_not_in_dataset <- temp_df %>%
        filter(!is_present) %>%
        mutate(source_column = ifelse(colnames(dataset)[n] %in% cols_check$column,
          NA,
          colnames(dataset)[n]
        ))

      found_columns <- cols_not_in_dataset %>% filter(!is.na(source_column))
      missing_columns <- cols_not_in_dataset %>% filter(is.na(source_column))

      if (nrow(missing_columns) > 0) {
        warning(simpleWarning(str_c("$INPUT", str_c(missing_columns$column, collapse = ", "), "not found in dataset.", sep = " ")))
      }

      if (nrow(found_columns) > 0) {
        input_selected_cols[found_columns$n, ]$column <- found_columns$source_column
        input_selected_cols[found_columns$n, ]$synonym <- found_columns$column
      }

      input_selected_cols <- input_selected_cols %>% filter(!(column %in% missing_columns$column))
    }

    columns_with_synonym <- input_selected_cols %>% filter(!is.na(synonym))
    rename_col <- columns_with_synonym %>% filter(column %in% colnames(dataset) & !(column %in% nm_reserved_names))

    c_to_rename <- setNames(rename_col$column, nm = rename_col$synonym)

    # rename column with their synonym
    if (length(c_to_rename) > 0) {
      dataset <- dataset %>%
        mutate_at(.vars = vars(c_to_rename), as.numeric) %>%
        rename(!!!c_to_rename)
    }

    if (!is.null(cs_data$ignore$data)) {
      ignore_quos <- map(seq_len(nrow(cs_data$ignore$data)), function(i) {
        ignore_cond <- cs_data$ignore$data[i, ]
        rhs_val <- ifelse(is.character(dataset[[ignore_cond$column]]), sprintf("'%s'", ignore_cond$value), ignore_cond$value)
        formula_text <- sprintf("is.na(%s) | !(%s %s %s)", ignore_cond$column, ignore_cond$column, ignore_cond$operator, rhs_val)

        parse_quo(formula_text, env = caller_env())
      })

      dataset <- dataset %>% filter(!!!(ignore_quos))
    }

    # add EVID column if missing
    if (!("EVID" %in% colnames(dataset))) {
      dataset <- dataset %>% mutate(EVID = ifelse(is.na(DV), 1L, 0L))
    }

    # Fix EVID column if is na -> observation event
    dataset <- dataset %>% mutate(EVID = ifelse(is.na(EVID), 0L, EVID))

    # add MDV column if missing
    if (!("MDV" %in% colnames(dataset))) {
      dataset <- dataset %>% mutate(MDV = ifelse(EVID == 0L, 0L, 1L))
      # dataset$MDV <- 0L#factor(0, levels = c(0, 1))
    } else {
      dataset <- dataset %>%
        mutate(MDV = as.integer(ifelse(is.na(MDV),
          ifelse(is.na(DV), 1L, 0L),
          MDV
        ))) # %>%
      # mutate(MDV = as.factor(MDV))
    }


    nrow_ds <- nrow(dataset)

    run_tables <- list()
    covariates_df <- NULL
    cat_covs_levels <- NULL
    # loading tables
    if (load_tables && !is.null(cs_data$tables) && nrow(cs_data$tables) > 0) {
      run_tables <- cs_data$tables %>%
        split(.$file) %>%
        map(function(x) {
          tab <- x$file

          file_pattern <- str_replace_all(tab, "\\.", "\\\\.")

          file_detection <- str_detect(run_files, file_pattern)

          file_path <- NULL

          if (any(file_detection)) {
            file_path <- first(run_files[file_detection])
          }

          if (!is.null(file_path) && file.exists(file_path)) {
            if (is.function(update_progress)) {
              update_progress(detail = sprintf("%s table", tab))
            }

            n_skip <- as.integer(!x$notitle) + as.integer(!x$noheader)

            # readr way
            # tab_readr_func <- switch(str_sub(x$format, 1, 1),
            #                     "," = readr::read_csv,
            #                     "t" = readr::read_tsv,
            #                     readr::read_table2)
            # base way
            tab_readr_func <- switch(str_sub(x$format, 1, 1),
              "," = read.csv,
              "t" = read.delim,
              read.table
            )

            # remove the repetition of the headers every 900 rows
            # if(!x$noheader & !x$oneheader){
            tab_lines <- read_lines(file_path) %>%
              str_subset("^\\s*[0-9]")

            con <- textConnection(
              str_c(tab_lines, collapse = "\n")
            )

            safe_read <- safely(tab_readr_func)

            tab_data <- safe_read(con,
              header = FALSE,
              quote = "\"", comment.char = "", na.strings = ".",
              check.names = FALSE
            )

            if (!is.null(tab_data$error)) {
              warning(simpleWarning(sprintf("%s could not be read: %s", tab, tab_data$error$message)))
              return(NULL)
            }

            tab_data <- tab_data$result

            tab_col_names <- unlist(x$columns)

            if (!x$noappend) {
              # tab_col_names <- tab_col_names[tab_col_names != "PRED"] # NONMEM omits PRED column in the table if it appends it also at the end...
              append_cols <- c(dv_name, "PRED", "RES", "WRES")

              id_append <- which(tab_col_names %in% c("PRED", "RES", "WRES"))

              if (last(tab_col_names) == dv_name) {
                # NONMEM omits DV column if it is the last requested column
                tab_col_names <- tab_col_names[-length(tab_col_names)]
              }

              if (length(id_append) > 0) {
                tab_col_names <- tab_col_names[-id_append]
              }

              tab_col_names <- c(tab_col_names, append_cols)
            }

            if (length(tab_col_names) != ncol(tab_data)) {
              warning(simpleWarning(sprintf("%s could not be read.", tab)))
              return(NULL)
            }

            colnames(tab_data) <- tab_col_names

            # remove potential duplicated columns
            col_names <- colnames(tab_data)

            if (dv_name != "DV" && all(c(dv_name, "DV") %in% col_names)) {
              tab_data <- tab_data %>% select(-DV)
            }

            dup_columns <- which(duplicated(col_names))

            if (length(dup_columns) > 0) {
              tab_data <- tab_data[, -dup_columns]
            }

            if (dv_name != "DV" && !x$noappend) {
              tab_data <- tab_data %>% rename(!!!setNames(dv_name, "DV"))
            }

            nm_tab <- tab_data %>% convert_nm_table()

            nm_tab
          } else {
            warning(simpleWarning(sprintf("%s is missing.", tab)))

            NULL
          }
        })

      run_tables <- compact(run_tables)

      # column checks
      required_columns <- c("ID", "TIME", "DV")
      map(required_columns, function(x) {
        if (!(x %in% colnames(dataset))) {
          stop(simpleError(paste(x, "column is required.")))
        }
      })
    }

    compartments <- cs_data$model_compartments

    if (is.null(compartments)) { # if $PRED subroutine
      n_cmt <- 1L

      if ("CMT" %in% colnames(dataset)) {
        n_cmt <- max(dataset$CMT)
      }

      compartments <- tibble(cmt = seq_len(n_cmt), name = str_c("CMT_", cmt))
    }

    if ("CMT" %in% colnames(dataset)) {
      dataset <- dataset %>%
        mutate(CMT = abs(CMT)) # because of NONMEM "disabling CMT"
    } else {
      subrout <- ifelse(!is.null(cs_data$subroutine$ADVAN), cs_data$subroutine$ADVAN, "PRED")
      default_dv_cmt <- switch(subrout,
        "ADVAN1" = 1,
        "ADVAN10" = 1,
        "ADVAN2" = 2,
        "ADVAN3" = 1,
        "ADVAN4" = 2,
        "ADVAN11" = 1,
        "ADVAN12" = 2,
        1
      )

      dataset$CMT <- default_dv_cmt
    }

    # update compartments info
    dv_cmts <- dataset %>%
      filter(!is.na(DV) & MDV == 0 & CMT != 0) %>%
      select(CMT) %>%
      unique() %>%
      unlist(use.names = FALSE)

    # no CMT found for DV, but CMT=0 exists
    if(length(dv_cmts) == 0 & any(dataset$CMT == 0)){
      ds_cmts <- unique(dataset$CMT)

      dv_cmts <- compartments$cmt[!compartments$cmt %in% ds_cmts]

      compartments$cmt[compartments$cmt == dv_cmts] <- 0
      dv_cmts <- 0
    }

    compartments$dv_target <- compartments$cmt %in% dv_cmts

    # NONMEM output compartment used ? -> CMT with number = number of compartments + 1
    output_cmt <- nrow(compartments) + 1
    if (output_cmt %in% as.numeric(as.character(dv_cmts))) {
      compartments <- bind_rows(
        compartments,
        tibble(
          cmt = output_cmt, name = "Output", dv_target = TRUE
        )
      )
    }


    dataset <- dataset %>% convert_nm_table()

    # merged table: will contains all columns
    pmxploitab <- dataset

    for (i in seq_along(run_tables)) {
      tab <- run_tables[[i]]

      # populate table set with FIRSTONLY option
      if (cs_data$tables[cs_data$tables$file == names(run_tables)[i], ]$firstonly) {
        tab <- tab %>% left_join(select(dataset, ID), by = "ID")
      }

      tab_col_names <- colnames(tab)
      new_col_names <-
        tab_col_names[!(tab_col_names %in% c(colnames(pmxploitab), columns_with_synonym$synonym))] # , columns_with_synonym$column))]

      if (length(new_col_names) > 0) {
        if (nrow(tab) == nrow(pmxploitab)) {
          pmxploitab <- pmxploitab %>%
            bind_cols(select(tab, one_of(new_col_names)))
        } else {
          if(length(estimations) > 0) # do not warn for simulations runs
            warning(simpleWarning(sprintf("%s's number of rows is different than the dataset's.", names(run_tables)[i])))
        }
      }
    }

    names(run_tables) <- tolower(names(run_tables))

    # look for individual parameters in patab table (Xpose convention)
    if (!is.null(run_tables$patab)) {
      all_cols <- colnames(run_tables$patab)

      params_df <- params_df %>%
        mutate(column = ifelse(type == "eta", ifelse(name %in% all_cols, name, id), NA))

      par_names <- setdiff(all_cols, c(
        params_df$id,
        params_df$name,
        na.omit(params_df$column),
        nm_reserved_names,
        columns_with_synonym$synonym
      ))

      if (length(par_names) > 0) {
        individual_params_df <- tibble(
          id = par_names, type = "individual", name = par_names,
          column = par_names
        )

        params_df <- bind_rows(params_df, individual_params_df)
      }

      params_df <- params_df %>%
        mutate(column = ifelse(!is.na(column),
          ifelse(id %in% colnames(run_tables$patab) |
            column %in% colnames(run_tables$patab),
          column, NA
          ), NA
        ))
    } else {
      # look for ETAs in another tab
      temp_etas_df <- params_df %>%
        mutate(n_row = row_number()) %>%
        filter(type == "eta" & is.na(column))

      if (nrow(temp_etas_df) > 0) {
        temp_etas_df <- temp_etas_df %>%
          mutate(short_name = paste0(substring("ETA", 1, 4 - nchar(n)), n)) %>%
          mutate(column = ifelse(name %in% colnames(pmxploitab), name,
            ifelse(short_name %in% colnames(pmxploitab), short_name, column)
          )) %>%
          filter(!is.na(column)) %>%
          select(-short_name)
      }

      if (any(!is.na(temp_etas_df$column))) {
        params_df[temp_etas_df$n_row, ]$column <- temp_etas_df$column
      }
    }

    params_df$id <- factor(params_df$id, levels = unique(params_df$id))
    params_df$type <- factor(params_df$type, levels = unique(params_df$type))

    params_df <- params_df %>%
      arrange(type, id)

    covariates_df <- tibble(
      column = character(),
      type = character(),
      name = character()
    )

    # look for categorical covariates in cotab table (Xpose convention)
    if (!is.null(run_tables$catab)) {
      all_cols <- setdiff(colnames(run_tables$catab), columns_with_synonym$synonym)

      cat_cov_names <- setdiff(all_cols, nm_reserved_names)

      if (length(cat_cov_names) > 0) {
        cat_covs_df <- tibble(column = cat_cov_names, type = "categorical", name = cat_cov_names)
        covariates_df <- bind_rows(covariates_df, cat_covs_df)
      }
    }
    # look for continuous covariates in cotab table (Xpose convention)
    if (!is.null(run_tables$cotab)) {
      all_cols <- setdiff(colnames(run_tables$cotab), columns_with_synonym$synonym)

      cont_cov_names <- setdiff(all_cols, nm_reserved_names)

      if (length(cont_cov_names) > 0) {
        cont_covs_df <- tibble(column = cont_cov_names, type = "continuous", name = cont_cov_names)
        covariates_df <- bind_rows(covariates_df, cont_covs_df)
      }
    }

    # if covariates columns are in patab, remove theme from parameters
    if (any(covariates_df$column %in% params_df$column)) {
      params_df <- params_df %>%
        filter(!(type == "individual" & (column %in% covariates_df$column)))
    }

    covariates_df$type <- factor(covariates_df$type, levels = c("categorical", "continuous"))

    covariates_df <- covariates_df %>%
      arrange(type, name)


    independent_variables <- tibble(column = character(), name = character(), unit = character())
    prediction_types <- NULL
    residual_types <- NULL

    if (load_tables) {
      pmxploitab_cols <- colnames(pmxploitab)

      if (any(c("TIME", "TAD") %in% pmxploitab_cols)) {
        independent_variables <- independent_variables %>%
          add_row(
            column = intersect(c("TIME", "TAD"), pmxploitab_cols),
            name = column,
            unit = NA_character_
          )
      }

      prediction_types <- intersect(nm_predictions, pmxploitab_cols)

      residual_types <- intersect(nm_residuals, pmxploitab_cols)
    }

    n_obs <- NA

    if ("DV" %in% colnames(dataset)) {
      n_obs <- dataset %>% filter(!is.na(DV) & MDV != 1) %>% nrow()
    }

    run_tables$dataset <- dataset

    r_number <- str_extract(basename(run_name), "[0-9]+$")

    n_nodes <- NA

    if (length(estimations) > 0) {
      n_nodes <- estimations %>% map_dbl(~ifelse(!is.null(.$parallel), .$parallel$nodes, 1)) %>% max()
    }

    run_short_name <- basename(run_name)
    run_id <- ifelse(!is.na(r_number), r_number, run_short_name)

    info <- list(
      run_id = run_id,
      run_name = run_short_name,
      run_number = as.integer(r_number),
      path = run_directory,
      problem = cs_data$problem,
      control_stream_file = basename(control_stream_file),
      dataset_file = basename(dataset_file),
      report_file = basename(report_file),
      nodes = n_nodes,
      nm_version = nm_version,
      nm_tran_warnings = root_node[["nmtran"]] %>% xmlValue(),
      number_of_subjects = dataset$ID %>% unique() %>% length(),
      number_of_observations = n_obs,
      start_time = root_node[["start_datetime"]] %>%
        xmlValue() %>%
        ymd_hms(),
      stop_time = root_node[["stop_datetime"]] %>%
        xmlValue() %>%
        ymd_hms()
    )

    duration_interval <- interval(info$start_time, info$stop_time)

    info$duration <- duration_interval / seconds(1)

    compartments <- mutate(compartments, type = "continuous", unit = NA_character_)
    parameters <- mutate(params_df, unit = NA_character_)
    covariates <- mutate(covariates_df, unit = NA_character_)

    model <- list(
      compartments = compartments,
      parameters = params_df,
      covariates = covariates_df,
      independent_variables = independent_variables,
      predictions = prediction_types,
      residuals = residual_types
    )

    files <- list(
      control_stream = control_stream,
      report = report
    )

    # AIC & BIC
    # Set number of parameters as : n (estimated theta) + n (estimated eta) + n (estimated epsilon)
    estimations <- estimations %>%
      modify(~update_list(.,
        n_parameters = ~ifelse(!is.null(.$thetas), nrow(.$thetas), 0L) + # n (estimated theta)
          ifelse(!is.null(.$omega), nrow(filter(.$omega, eta1 == eta2)), 0L) + # + n (estimated eta)
          ifelse(!is.null(.$sigma), nrow(filter(.$sigma, epsilon1 == epsilon2)), 0L)
      )) # n (estimated epsilon)

    estimations <- estimations %>%
      modify(~update_list(.,
        aic = ~.$final_ofv + 2L * .$n_parameters,
        bic = ~.$final_ofv + log(n_obs) * .$n_parameters
      ))

    # set estimation steps names
    names(estimations) <- map_chr(estimations, ~str_c(.$number, .$method, sep = ": "))

    mdata_file <- paste0(run_directory, "/pmxploit_metadata.rds")

    attr(model, "pmxploit_version") <- packageDescription("pmxploit")$Version

    if (file.exists(mdata_file)) {
      model_copy <- model
      model <- readRDS(mdata_file)

      if (!identical(attr(model, "pmxploit_version"), packageDescription("pmxploit")$Version)) {
        warning(simpleWarning("Run metadata were saved with a different version of pmxploit package, you may need to reset it (delete `pmxploit_metadata.rds` file)"))
      }

      model$internal <- model_copy
      model$has_metadata <- TRUE
    } else {
      model$has_metadata <- FALSE
    }

    # look for duplicated covariates in catab and cotab
    if (any(dup_covs <- duplicated(model$covariates$column))) {
      dup_covs_names <- model$covariates[dup_covs, ]$column

      covs_summary <- pmxploitab %>%
        select(one_of(dup_covs_names)) %>%
        summarise_all(funs(length(unique(.)))) %>%
        gather(column, n_unique) %>%
        mutate(type = factor(ifelse(n_unique <= 20, "categorical", "continuous"),
          levels = c("categorical", "continuous")
        ), name = column) %>%
        select(-n_unique)

      message(simpleMessage(sprintf(
        "The following covariate(s) found both in catab and cotab: %s will be considered as: %s%s.\n",
        paste(covs_summary$column, collapse = "/"),
        paste(covs_summary$type, collapse = "/"),
        ifelse(length(dup_covs_names) > 1, " respectively", "")
      )))



      model$covariates <- model$covariates %>%
        filter(!(column %in% dup_covs_names)) %>%
        bind_rows(covs_summary)
    }


    cat_cols <- model$covariates %>% filter(type == "categorical")

    for (cc in cat_cols$column) {
      cat_values <- pmxploitab[[cc]]
      pmxploitab[[cc]] <- factor(cat_values, levels = sort(unique(cat_values)), ordered = TRUE)

      cov_levels <- unique(cat_values) %>% as.character() %>% sort()
      names(cov_levels) <- cov_levels
      levels_list <- list(cov_levels)
      names(levels_list) <- cc

      cat_covs_levels <- c(cat_covs_levels, levels_list)
    }

    if (!model$has_metadata) {
      model$categorical_covariates_levels <- cat_covs_levels
    } else {
      model$internal$categorical_covariates_levels <- cat_covs_levels
    }

    run_tables$pmxploitab <- pmxploitab

    run_data <- list(
      info = info,
      control_stream = cs_data,
      model = model,
      estimations = estimations,
      files = files,
      tables = run_tables,
      xml = xml_report
    )

    class(run_data) <- "nonmem_run"

    run_data
  }

#' Load a NONMEM run data
#'
#' Loads NONMEM run results data from either a folder or a archive file (tar.gz, tgz or zip).
#'
#' @param path character. Run folder or archive file path.
#' @param temp_directory (optional) character. When \code{path} is an archive file,
#' sets the path of the temporary directory where the archive files will be extracted.
#' @param load_tables logical. If \code{TRUE} (default), loads output tables.
#' @param read_initial_values logical. If \code{TRUE} (default), parses initial
#'  parameter values from the control stream.
#' @param keep_tempfiles logical. If \code{TRUE}, \code{temp_directory} will not be deleted
#' once run data is loaded.
#' @param extract_everything logical. If \code{TRUE}, when \code{path} is an archive file,
#' extracts all the content of the archive. Otherwise, extracts only the files required for
#' post-processing analysis (default).
#' @param dataset_separator (optional) character. Character used as column
#'   separator in the dataset. Default is \code{NA} for automatic detection.
#' @param dataset_na_string character. Character string corresponding to missing
#'   values in the dataset. Default is \code{"."}.
#' @param update_progress (otional) function of the form \code{function(detail, ...)\{\}}.
#'   Useful to follow loading progression.
#' @param verbose logical. If \code{TRUE}: prints output messages.
#'
#'
#' @return A NONMEM run object. See \code{\link{load_nm_run_directory}}.
#' @export
#'
#' @examples
#' \dontrun{
#' run <- load_nm_run("path/to/my/run_folder")
#'
#' run <- load_nm_run("path/to/my/run_archive.tar.gz")
#' }
load_nm_run <-
  function(path,
             temp_directory = str_c(tempdir(), "/pmxploit"),
             load_tables = TRUE,
             read_initial_values = TRUE,
             keep_tempfiles = FALSE,
             extract_everything = FALSE,
             dataset_separator = NA,
             dataset_na_string = ".",
             update_progress = NULL,
             verbose = FALSE) {
    if (!file.exists(path)) {
      stop(simpleError("Run not found."))
    }

    if (file.info(path)$isdir) {
      run_directory <- path

      run <- load_nm_run_directory(
        path = run_directory,
        dataset_separator = dataset_separator,
        dataset_na_string = dataset_na_string,
        read_initial_values = read_initial_values,
        load_tables = load_tables,
        update_progress = update_progress,
        verbose = verbose
      )
    } else {
      run_archive <- path

      run_archive <- normalizePath(run_archive)

      if (!(str_detect(tolower(run_archive), "(\\.tar\\.gz|\\.zip|\\.tgz)$"))) {
        stop(simpleError("Run archive file has to be a *.tar.gz, *.tgz or *.zip file."))
      }

      is_zip <- (tolower(tools::file_ext(run_archive)) == "zip")

      is_windows <- (tolower(Sys.info()["sysname"]) == "windows")

      archive_dir <- dirname(run_archive)

      if (is.function(update_progress)) {
        update_progress(value = 0.10, detail = "Extracting files...")
      }

      uncompress_f <- (if(is_zip) unzip else  untar)

      quiet_uncompress <- function(...) {
        q_uncompress <- quietly(uncompress_f)

        q_uncompress(...)$result
      }

      # skip TAR messages on Windows (many warnings)
      uncompress_function <- (if(is_windows) quiet_uncompress else uncompress_f)

      # Create tempdir
      repeat {
        if (!is.null(temp_directory)) {
          if (dir.exists(temp_directory)) {
            unlink(temp_directory, recursive = TRUE)
          }
          run_directory <- temp_directory
        } else {
          run_directory <-
            paste(archive_dir, paste0("pmxploit", sample(1:10000, 1)), sep = "/")
        }

        if (!dir.exists(run_directory)) {
          archive_files <- uncompress_function(run_archive, list = TRUE)

          if(is_zip) archive_files <- archive_files$Name

          files_extensions <- tools::file_ext(archive_files) %>% unique()

          cs_exts <- c("ctl", "con", "mod", "nmctl")

          if (!(any(cs_exts %in% files_extensions) && "xml" %in% files_extensions)) {
            stop(simpleError("NONMEM run not recognized: no XML result file found."))
          }

          # dur <- system.time({

          cs_file <- tibble(filename = archive_files) %>%
            mutate(
              ext = ordered(tools::file_ext(filename), cs_exts),
              file_sans_ext = tools::file_path_sans_ext(basename(filename))
            ) %>%
            filter(ext %in% cs_exts) %>%
            mutate(dir = dirname(filename)) %>%
            filter(dir == ".") %>%
            group_by(file_sans_ext) %>%
            mutate(group_len = dplyr::n()) %>%
            filter(group_len == max(group_len)) %>%
            ungroup() %>%
            arrange(ext) %>%
            slice(1)

          uncompress_function(
            run_archive, files = cs_file$filename,
            exdir = run_directory
          )

          cs_file_path <- cs_file$filename

          # parse control stream data
          cs_path <- str_c(run_directory, basename(cs_file_path), sep = "/")
          cs_content <- read_file(file = cs_path)

          safe_parse_cs <- safely(parse_nm_control_stream)

          safe_cs <- safe_parse_cs(content = cs_content, read_initial_values = read_initial_values, verbose = verbose)

          if (!is.null(safe_cs$error)) {
            stop(simpleError(sprintf("Error reading control stream file, \"%s\".", cs_path)))
          }

          cs_data <- safe_cs$result

          # look for ID column
          if (!("ID" %in% c(cs_data$input$column, cs_data$input$synonym))) {
            id_col_index <- ifelse(cs_data$ignore$C & cs_data$input$column[1] == "C", 2L, 1L)

            cs_data$input[id_col_index, ]$column <- "ID"
          }

          if (extract_everything) {
            uncompress_function(run_archive, exdir = run_directory)
          } else {
            run_name <- tools::file_path_sans_ext(basename(cs_file_path))

            archive_files_df <- tibble(file = archive_files) %>%
              mutate(
                file2 = ifelse(substring(file, 1, 2) == "./", substring(file, 3), file), # trick when tar.gz archives that contain subdirectories
                dir_name = dirname(file),
                base_name = basename(file),
                extension = tools::file_ext(file)
              ) %>%
              filter(dir_name == "." & base_name != ".")

            files_pattern <-
              sprintf(
                "^%s$", c(
                  cs_data$dataset_file %>% str_replace_all("\\.", "\\\\."),
                  sprintf("%s\\.rep", run_name),
                  sprintf("%s\\.lst", run_name),
                  sprintf("%s\\.ext", run_name),
                  sprintf("%s\\.phi", run_name),
                  sprintf("%s\\.xml", run_name)
                )
              )

            if (load_tables && !is.null(cs_data$tables) && nrow(cs_data$tables) > 0) {
              tabs_pattern <- paste0("^", cs_data$tables$file %>% str_replace_all("\\.", "\\\\."))

              pattern <- sprintf("(%s)", paste(c(files_pattern, tabs_pattern), collapse = "|"))
            } else {
              pattern <- sprintf("(%s)", paste(files_pattern, collapse = "|"))
            }

            archive_files_df <- archive_files_df %>%
              mutate(match = str_detect(file2, pattern)) %>%
              filter(match == TRUE)

            files_list <- archive_files_df$file

            # extra_files <- c(cs_data$extra_files, cs_data$subroutine$FILES)

            if (length(cs_data$extra_files) > 0) {
              files_list <- c(files_list, str_c("./", cs_data$extra_files))
            }

            if (length(mdata_file <- str_subset(archive_files, fixed("pmxploit_metadata.rds"))) == 1) {
              files_list <- c(mdata_file, files_list)
            }

            uncompress_function(
              run_archive, files = files_list,
              exdir = run_directory
            )
          }
          break
        }
      }

      run <- load_nm_run_directory(
        path = run_directory,
        load_tables = load_tables,
        update_progress = update_progress,
        dataset_separator = dataset_separator,
        dataset_na_string = dataset_na_string,
        control_stream = cs_data,
        verbose = verbose
      )

      # run$info$directory <- normalizePath(dirname(run_archive))
      run$info$path <- normalizePath(run_archive)

      # Cleanup ?
      if (keep_tempfiles) {
        # run$info$temp_directory <- run_directory
        print(sprintf(
          "Temp files have been extracted to '%s' directory.", run_directory
        ))
      } else {
        unlink(run_directory, recursive = TRUE)
      }
    }

    invisible(run)
  }
pnolain/pmxploit documentation built on Jan. 31, 2024, 1:16 p.m.