R/vs_nonmem.R
In mapbayr: MAP-Bayesian Estimation of PK Parameters

Documented in bar_phi merge_phi plot_phi read_nmphi summarise_phi

#' Compare results to NONMEM .phi
#'
#' @name vs_nonmem
#' @param x full path to a .phi file generated by NONMEM
#' @param mapbayr_phi results of mapbayr estimations, in the form of a tibble data.frame, typically obtained from `get_phi()`
#' @param nonmem_phi results of NONMEM estimations, in the form of a tibble data.frame, typically obtained from `read_nmphi()`
#' @param merged_phi merged results of estimations, typically obtained from `merge_phi()`
#' @param summarised_phi summarized results of estimations, typically obtained from `summarise_phi()`
#' @param only_ETA filter the data with `type=="ETA"` (a logical, default is `TRUE`)
#' @param group one or multiple variables to `group_by()`
#' @param levels a named vector of length 3 in order to classify the absolute differences. Default cut-offs are 0.1% and 10% in the parameters space.
#' @param xaxis optional. A character value, that correspond to a variable in data, passed to the x-axis to plot multiple bars side-by-side.
#' @param facet a formula, that will be passed to `ggplot2::facet_wrap()`
#'
#' @return
#'  - read_nmphi: a tibble data.frame with a format close to the original .phi file
#'  - merge_phi: a long-form tibble data.frame with results of mapbayr and NONMEM
#'  - summarise_phi: a summarized tibble data.frame classifying the performance of mapbayr and NONMEM
#'  - plot_phi, bar_phi: a `ggplot2` object
#'
#' @details
#'
#' These functions were made to easily compare the results of mapbayr to NONMEM. For instance, it could be useful in the case of the transposition of a pre-existing NONMEM model into mapbayr. For this, you need to code your model in both mapbayr and NONMEM, and perform the MAP-Bayesian estimation on the **same dataset**. Ideally, the latter contains a substantial number of patients. NONMEM returns the estimations results into a .phi file.
#'
#' Use `read_nmphi()` to parse the NONMEM .phi file into a convenient tibble data.frame with the columns:
#'
#'  - `SUBJECT_NO`, `ID`: Subject identification.
#'  - `ETA1`, `ETA2`, ..., `ETAn`: Point estimates of eta.
#'  - `ETC1_1`, `ETC2_1`, `ETC2_2`, ...,  `ETCn_n`: Variance-covariance matrix of estimation.
#'  - `OBJ`: objective function value
#'
#' Use `get_phi()` to access to the estimations of mapbayr with the same "phi" format.
#'
#' Use `merge_phi()` to combine mapbayr and NONMEM "phi files" into a single long-form data.frame with the columns:
#'
#'  - `SUBJECT_NO`, `ID`: Subject identification.
#'  - `variable` name and its `type`: ETA (point estimate), VARIANCE (on-diagonal element of the matrix), COVARIANCE (off-diagonal), and OBJ.
#'  - `mapbayr` and `nonmem`: corresponding values
#'  - `adiff`: absolute difference between `mapbayr` and `nonmem` values.
#'
#' Use `plot_phi()` to graphically represent `adiff` *vs* `variable`. Alternatively, the table returned by `merge_phi()` is easy to play with in order to derive performance statistics or the graphical plot of your choice.
#'
#' Use `summarise_phi()` to classify the estimation as "Excellent", "Acceptable" or "Discordant", over the whole dataset or by `group`.
#'
#' Use `bar_phi()` to graphically represent the proportion of the aforementioned classification as bar plot.
#'
#' @examples
#' library(mapbayr)
#' nmphi <- read_nmphi(system.file("nm001", "run001.phi", package = "mapbayr"))
#' mapbayrphi <- get_phi(est001)
#'
#' merged <- merge_phi(mapbayrphi, nmphi)
#' plot_phi(merged)
#'
#' summarised <- summarise_phi(merged)
#' bar_phi(summarised)
#'
#'
#' # Analyse the results of multiple runs simultaneously
#'
#' #Example dataset that represents 3 runs
#' merge3 <- dplyr::bind_rows(merged, merged, merged, .id = "RUN")
#' merge3$adiff <- 10 ^ runif(nrow(merge3), -8, 0)
#'
#' summarised3 <- summarise_phi(merge3, group = RUN)
#' bar_phi(summarised3, xaxis = "RUN")
#'
#' @rdname vs_nonmem
#' @export
read_nmphi <- function(x){
  tab <- utils::read.table(x, skip = 1)
  names(tab) <- tab[1,]
  tab <- tab[-1,]
  tab <- tab %>%
    as_tibble() %>%
    rename_with(gsub, pattern = "[()]", replacement = "") %>%
    rename_with(gsub, pattern = ",", replacement = "_") %>%
    mutate(across(everything(), as.double))
  tab
}

is.variance <- function(x){
  str_extract_all(x, "\\d+") %>%
    sapply(function(x) x[1]==x[2])
}

#' @rdname vs_nonmem
#' @export
merge_phi <- function(mapbayr_phi, nonmem_phi){
  stopifnot(
    mapbayr_phi$SUBJECT_NO == nonmem_phi$SUBJECT_NO,
    mapbayr_phi$ID == nonmem_phi$ID,
    names(mapbayr_phi) == names(nonmem_phi)
  )

  full_join(
    pivot_longer(mapbayr_phi, cols = -c("SUBJECT_NO", "ID"), names_to = "variable", values_to = "mapbayr"),
    pivot_longer(nonmem_phi, cols = -c("SUBJECT_NO", "ID"), names_to = "variable", values_to = "nonmem"),
    by = c("SUBJECT_NO", "ID", "variable")
  ) %>%
    mutate(type = case_when(
      str_detect(variable, "ETA") ~ "ETA",
      str_detect(variable, "OBJ") ~ "OBJ",
      is.variance(variable) ~ "VARIANCE",
      TRUE ~ "COVARIANCE"
    ), .after = "variable") %>%
    mutate(adiff = abs(.data$mapbayr-.data$nonmem))

}

#' @rdname vs_nonmem
#' @export
plot_phi <- function(merged_phi, only_ETA = TRUE){
  dat <- merged_phi
  if(only_ETA) dat <- filter(dat, .data$type == "ETA")

  dat$variable <- factor(dat$variable, levels = sort_etanames(unique(dat$variable)))

  dat %>%
    ggplot(aes(.data$variable, .data$adiff, group = .data$ID)) +
    geom_line() +
    scale_y_log10(name = "absolute difference")
}

classify <- function(adiff, levels = c(Excellent = 0, Acceptable = 0.001, Discordant = 0.1)){
  val <- log(1 + levels)
  nam <- names(levels)
  ans <- case_when(
    adiff > val[3] ~ nam[3],
    adiff > val[2] ~ nam[2],
    adiff >= val[1] ~ nam[1]
  )
  factor(ans, levels = nam, ordered = TRUE)
}

#' @rdname vs_nonmem
#' @export
summarise_phi <- function(merged_phi, group, only_ETA = TRUE, levels = c(Excellent = 0, Acceptable = 0.001, Discordant = 0.1)){
  dat <- merged_phi
  if(only_ETA) dat <- filter(dat, .data$type == "ETA")

  dat %>%
    mutate(classif = classify(.data$adiff, levels = levels)) %>%
    group_by(across({{group}})) %>%
    group_by(.data$ID, .add = TRUE) %>%
    summarise(Performance = max(.data$classif), .groups = "drop_last") %>%
    group_by(.data$Performance, .add = TRUE) %>%
    summarise(count = dplyr::n(), .groups = "drop_last") %>%
    mutate(prop = .data$count/sum(.data$count)) %>%
    mutate(perc = my_percent(.data$prop))
}

#' @rdname vs_nonmem
#' @export
bar_phi <- function(summarised_phi, xaxis = NULL, facet = NULL){
  if(is.null(xaxis)){
    p <- ggplot(summarised_phi, aes(x = "", fill = .data$Performance))
  } else {
    p <- ggplot(summarised_phi, aes(x = .data[[xaxis]], fill = .data$Performance))
  }

  fillscale <- c("forestgreen", "orange", "firebrick")
  names(fillscale) <- levels(summarised_phi$Performance)

  p <- p +
    ggplot2::geom_col(aes(y = .data$prop), col = 1, width = 1, position = ggplot2::position_stack(reverse = TRUE)) +
    scale_y_continuous(NULL, labels = my_percent) +
    scale_fill_manual(values = fillscale) +
    theme_bw() +
    theme(legend.position = "bottom") +
    ggplot2::coord_flip()

  if(!is.null(facet)){
    p <- p +
      facet_wrap(facet)
  }
  return(p)
}