knitr::opts_chunk$set(echo = TRUE)

library(tidyverse)
library(broom)
devtools::load_all()

Introduction

Here is a first draft implementation of broom-like methods for `crmPack, as suggested in issue 323.

The original version of this specification did not account for slots whose values were lists. This update corrects that omission.

Whilst variants of "How do I convert an S4 object to a data.frame/tibble?" are commonly asked questions on StackOverflow [see, for example, here and here], there appears to be no existing package to do this. However, the approach generally suggested in answers to these questions is essentially the same as suggested here:

Principles

The general principles I propose for tidying crmPack objects are as follows:

Issue 652 has introduced a common super class for all other crmPack classes. This can be leveraged to provide a single tidy() method that will tidy the majority of class-to-tibble conversions in crmPack. A helper function to manage interval conversions (as discussed under Exceptions below) would allow almost every exception to the general rule to be handled in a simple, two line sub class-specific method.

Exceptions

Helper functions

It is likely that tidying crmPack objects of different classes will share common steps. These common steps are candidates for being handled by helper functions. Functionality for various helper functions include:

Please also read

Please also read Issue 407 regarding inconsistent slot names.

Implementation

tidy() is a generic defined in broom. An end-user of crmPack may not have installed broom. Therefore, we have two options:

After discussion, we take the first approach and make crmPack Depend on broom.

Some helper functions

To handle the attributes:

h_handle_attributes <- function(x, .ignore = c("names", "class", "description", "row.names")) {
  a <- attributes(x)
  valid_names <- setdiff(names(a), .ignore)
  lapply(
    valid_names,
    function(n) {
      z <- attr(x, n)
      if (length(z) == 1) {
        rv <- tibble::tibble(X = z)
      } else {
        rv <- tibble::tibble(X = list(z))
      }
      names(rv) <- n
      rv
    }
  ) %>%
    dplyr::bind_cols()
}

Other helpers.

h_tidy_slot <- function(obj, slot_name, col = NULL, attributes = FALSE) {
  if (is.list(slot(obj, slot_name))) {
    return(lapply(slot(obj, slot_name), tidy))
  }

  a <- h_handle_attributes(slot(obj, slot_name))
  if (is(slot(obj, slot_name), "CrmPackClass")) {
    rv <- slot(obj, slot_name) %>%
      tidy() %>%
      dplyr::bind_cols()
  } else {
    if (is.null(col)) {
      col <- slot_name
    }
    rv <- tibble::tibble({{ col }} := slot(obj, slot_name))
  }
  if (nrow(a) > 0 & attributes) {
    # rv <- rv %>% dplyr::bind_cols(a)
    print(slot_name)
    print(a)
  }
  rv
}

h_tidy_all_slots <- function(obj) {
  slot_names <- slotNames(obj)
  rv <- list()
  for (nm in slot_names) {
    if (!is.function(slot(obj, nm))) {
      rv[[nm]] <- h_tidy_slot(obj, nm)
    }
  }
  # Column bind of all list elements have the same number of rows
  if (length(rv) > 1 & length(unique(sapply(rv, nrow))) == 1) {
    rv <- rv %>% dplyr::bind_cols()
  } 
      rv[[nm]] <- h_slot_to_tibble(obj, nm) %>% dplyr::bind_cols()
    }
  }
  rv
}

h_tidy_class <- function(d, obj) {
  cls <- class(obj)
  class(d) <- c(paste0("tbl_", cls[1]), class(d))
  d
}

h_range_to_minmax <- function(
    x,
    col,
    min_col = "min",
    max_col = "max",
    range_min = -Inf,
    range_max = Inf) {
  vals <- x %>% dplyr::pull({{ col }})
  tibble(
    {{ min_col }} := c(range_min, vals),
    {{ max_col }} := c(vals, range_max)
  )
}

The default tidy function

#' @param x (`CrmPackClass`)\cr The objecte to be tidied
#' @param ... \cr Not used at present
#' @rdname tidy
#' @aliases tidy-CrmCPackClass
#' @example examples/CrmPackClass-method-tidy.R
#' @export
setMethod(
  f = "tidy",
  signature = signature(x = "CrmPackClass"),
  definition = function(x, ...) {
    rv <- h_tidy_all_slots(x) %>% h_tidy_class(x)
    if (length(rv) == 1) {
      rv[[names(rv)[1]]]
    } else {
      rv
    }
  }
)

Simple examples

CohortSizeConst is a trivial example and illustrates the default approach for all classes.

x <- CohortSizeConst(size = 3) %>% tidy()
x
class(x)

IncrementsRelative illustrate how ranges are handled. Each class that includes a range will require individual handling because there's no generic way to identify which slot defines the range, nor how it should be handled.

# tidy-IncrementsRelative ----

#' @rdname tidy
#' @aliases tidy-IncrementsRelative
#' @example examples/IncrementsRelative-method-tidy.R
#' @export
setMethod(
  f = "tidy",
  signature = signature(x = "IncrementsRelative"),
  definition = function(x, ...) {
    h_tidy_all_slots(x) %>%
      dplyr::bind_cols() %>%
      h_range_to_minmax(intervals) %>%
      dplyr::filter(max > 0) %>%
      tibble::add_column(increment = x@increments) %>%
      h_tidy_class(x)
  }
)

x <- IncrementsRelative(
  intervals = c(0, 20),
  increments = c(1, 0.33)
) %>%
  tidy()

x
class(x)

CohortSizeMax contains a slot whose value is a list.

cs_max <- maxSize(
  CohortSizeConst(3), 
  CohortSizeDLT(intervals = 0:1, cohort_size = c(1, 3))
)
cs_max %>% tidy()

CohortSizeParts is slightly more complex and requires custom handling because the part index is implicit rather than explicit.

# tidy-CohortSizeParts ----

#' @rdname tidy
#' @aliases tidy-CohortSizeParts
#' @example examples/CohortSizeParts-method-tidy.R
#' @export
setMethod(
  f = "tidy",
  signature = signature(x = "CohortSizeParts"),
  definition = function(x, ...) {
    tibble::tibble(
      part = seq_along(x@cohort_sizes),
      cohort_size = x@cohort_sizes
    ) %>%
      h_tidy_class(x)
  }
)

x <- CohortSizeParts(cohort_sizes = c(1, 3)) %>% tidy()
x
class(x)

NextBestNCRM also requires custom handling because the overdose_prob slot applies only to the "Overdose" element of the range.

# tidy-NextBestNCRM ----

#' @rdname tidy
#' @aliases tidy-NextBestNCRM
#' @example examples/NextBestNCRM-method-tidy.R
#' @export
setMethod(
  f = "tidy",
  signature = signature(x = "NextBestNCRM"),
  definition = function(x, ...) {
    rv <- h_tidy_all_slots(x) %>%
      dplyr::bind_cols() %>%
      h_range_to_minmax(target, range_min = 0, range_max = 1) %>%
      add_column(max_prob = c(NA, NA, x@max_overdose_prob)) %>%
      add_column(Range = c("Underdose", "Target", "Overdose"), .before = 1)
  }
)

x <- NextBestNCRM(
  target = c(0.2, 0.35),
  overdose = c(0.35, 1),
  max_overdose_prob = 0.25
) %>% tidy()
x
class(x)

In addition, consideration of how to handle the case when the lower end of overdose is above the upper end of target because crmPack does not follow the usual convention of classifying p(Tox) into four categories, namely, "Underdosing", "Target", "Overdosing" and "Toxicity". This implementation gives incorrect output:

NextBestNCRM(
  target = c(0.2, 0.35),
  overdose = c(0.6, 1),
  max_overdose_prob = 0.25
) %>% tidy()

Should the output be similar to

tibble(
  Range = c("Underdose", "Target", "Overdose"),
  min = c(0, 0.2, 0.6),
  max = c(0.2, 0.35, 1),
  max_prob = c(NA, NA, 0.25)
)

or similar to

tibble(
  Range = c("Underdose", "Target", " ", "Overdose"),
  min = c(0, 0.2, 0.35, 0.6),
  max = c(0.2, 0.35, 0.6, 1),
  max_prob = c(NA, NA, NA, 0.25)
)

?

More complex examples

Various sub-classes of GeneralModel demonstrate how tidy() handles both slots of different classes and nesting. Here is LogisticLogNormal as an example.

x <- LogisticLogNormal(
  mean = c(-0.85, 1),
  cov = matrix(c(1, -0.5, -0.5, 1), nrow = 2),
  ref_dose = 50
) 

x %>% tidy()

The cov and prec slots of the ModelParamsLogNormal class both have an attribute set. Unfortunately, it's called dim in both cases. As might be expected, the value of dim is the same for both slots. As dim is uninteresting, this can be easily dealt with: by ignoring it.

The McmcOptions used to create a Samples object are added as attributes to each member of the data slot. So this would also benefit from custom handling.

setMethod(
  f = "tidy",
  signature = signature(x = "Samples"),
  definition = function(x, ...) {
    rv <- lapply(
      slotNames(x),
      function(nm) {
        if (nm == "data") {
          lapply(
            names(x@data),
            function(nm) {
              as_tibble(get(x, nm))
            }
          ) %>%
            dplyr::bind_rows() %>%
            tidyr::pivot_wider(
              names_from = Parameter,
              values_from = value
            ) %>%
            dplyr::bind_cols(h_handle_attributes(get(x, names(x@data)[1])))
        } else {
          slot(x, nm) %>%
            tidy() %>%
            dplyr::bind_cols()
        }
      }
    )
    names(rv) <- c("data", "options")
    rv <- rv %>% h_tidy_class(x)
    rv
  }
)

options <- McmcOptions(
  burnin = 100,
  step = 1,
  samples = 2000
)

emptydata <- Data(doseGrid = c(1, 3, 5, 10, 15, 20, 25, 40, 50, 80, 100))

model <- LogisticLogNormal(
  mean = c(-0.85, 1),
  cov =
    matrix(c(1, -0.5, -0.5, 1),
      nrow = 2
    ),
  ref_dose = 56
)
x <- mcmc(emptydata, model, options)
y <- x %>% tidy()
class(y)

ModelParamsNormal and Samples are the only two class I've found that set attributes. Both need special handling, so I've not been able to demonstrate "default" handling of parameters. Are there any other classes that set attributes?

Since the options tibble is a one row copy of data contained in every row of the data tibble, it's value is questionable. There is a case for returning only the data element of the default list.

A useful side effect

By introducing the tidy method, we have an immediate benefit of greatly improving the presentation of crmPack objects in reports. For example:

y[["data"]] %>%
  head(20) %>%
  kable(
    digits = c(0, 0, 4, 4, 0, 0, 0, 0, 0, NA),
    table.attr = "style = 'width: 100%;'"
  )

and

NextBestNCRM(
  target = c(0.2, 0.35),
  overdose = c(0.6, 1),
  max_overdose_prob = 0.25
) %>%
  tidy() %>%
  kable(table.attr = "style = 'width: 40%;'")

Though whether we should introduce an additional dependency on knitr is moot.

Default tidying for all crmPack classes

Errors indicate classes that will require custom processing, or for which tidying is inappropriate.

crmPack_class_list <- getClasses(asNamespace("crmPack"))

for (cls in crmPack_class_list) {
  if (!isClassUnion(cls)) {
    constructor_name <- paste0(".Default", cls)
    if (exists(constructor_name, mode = "function")) {
      print(cls)
      tryCatch({
        x <- do.call(paste0(".Default", cls), list())
        print(x %>% tidy())
      },
      error = function(e) print(paste0("Unable to tidy ", cls, " objects."))
      )
    } else {
      print(paste0("No default constructor for ", cls))
    }
  }
}


Roche/crmPack documentation built on July 16, 2024, 2:15 a.m.