R/make_df.R
In quickcode: Quick and Essential 'R' Tricks for Better Scripts

Documented in make_dosing_df

#' Create Subject-by-Time Dosing Records (Base R) with Optional Subject-Level Covariates
#'
#' @description
#' Creates a dosing/event-style dataset by expanding a set of subject IDs
#' (`ID`) across a vector of dosing times (`time`). It assigns a dose value to each subject
#' and time combination, and optionally adds additional columns via `...` that can be
#' constant (length 1) or subject-specific (length equal to `length(ID)`).
#'
#' This utility is useful for quickly generating dosing records for simulation, exploratory
#' analyses, and creating NONMEM-/nlmixr-/Monolix-friendly input structures where each row
#' represents a dosing event (or event-like record) at a given time.
#'
#' @param ID
#' A vector of subject identifiers (numeric, integer, or character). Typically unique.
#'
#' @param time
#' A vector of dosing times. Can be numeric or integer (e.g., hours, days).
#'
#' @param dose
#' Dose value(s). May be:
#' \itemize{
#'   \item length 1: one common dose applied to all subjects and all times
#'   \item length equal to `length(ID)`: subject-specific doses mapped by the *position*
#'         of each subject in `ID` (i.e., `dose[i]` corresponds to `ID[i]`), then repeated
#'         across all times for that subject
#' }
#'
#' @param ...
#' Named additional columns to include in the output.
#' Each argument in `...` must be either:
#' \itemize{
#'   \item length 1: a constant value repeated for all rows (e.g., `STUDY = "CNTO1275"`)
#'   \item length equal to `length(ID)`: subject-level values mapped by the *position* of
#'         each subject in `ID` (e.g., `WT = c(30, 45)` with `ID = c(1, 2)` assigns `WT=30`
#'         to subject 1 and `WT=45` to subject 2 across all times)
#' }
#' All `...` arguments must be named. Names must not conflict with reserved output column
#' names (`ID`, `time`, `dose`).
#'
#' @details
#' ## Mapping rule (important)
#' For any vector argument of length `length(ID)` (including `dose` or any `...` column),
#' values are assigned by the *index/position* of `ID`, not by sorted ID or factor level.
#' For example:
#' \preformatted{
#' ID   = c(10, 20)
#' VAR5 = c(4,  6)
#' }
#' results in subject `10 -> 4` and subject `20 -> 6` for all time points.
#'
#' ## Duplicate IDs
#' If you supply any subject-level vector (i.e., `dose` length > 1, or any `...` argument
#' length > 1), `ID` must be unique. Duplicate IDs create ambiguous mappings and will
#' trigger an error.
#'
#' ## Output structure
#' The output includes:
#' \itemize{
#'   \item `ID`: subject identifier
#'   \item `time`: dosing time
#'   \item `dose`: assigned dose
#'   \item any additional columns provided via `...`
#' }
#' Rows are sorted by `ID` then `time`.
#'
#' ## When to use (common scenarios)
#' \enumerate{
#'   \item **Create dosing records for simulation** (e.g., one dose level across many subjects,
#'         or subject-specific doses).
#'   \item **Build NONMEM/nlmixr event datasets** where dosing rows (e.g., `EVID=1`) are
#'         generated programmatically; you can add `EVID`, `CMT`, `RATE`, `STUDY`, etc. via `...`.
#'   \item **Explore exposure metrics** by generating standardized schedules (e.g., QD times)
#'         and attaching covariates like weight, age group, cohort, regimen labels.
#'   \item **Scenario testing** for pediatric dosing cutoffs, titration strategies (subject-specific),
#'         or bridging comparisons where regimen differs by subgroup.
#' }
#'
#' ## When not to use
#' Use a different approach if you need:
#' \itemize{
#'   \item time-varying covariates with length equal to `length(time)` (shared across subjects),
#'   \item fully specified matrices of values length `length(ID) * length(time)`,
#'   \item multiple event types (dose + observations) in the same call (although you can merge
#'         the result with an observation dataset after creation).
#' }
#'
#' @return
#' A `data.frame` with `length(ID) * length(time)` rows and columns `ID`, `time`, `dose`,
#' plus any additional columns passed via `...`.
#'
#' @examples
#' ## Example 1: Simple dataset (10 subjects, dose 40 at times 1:10)
#' df1 <- make_dosing_df(ID = 1:10, time = 1:10, dose = 40)
#' head(df1)
#'
#' ## Example 2: Subject-specific dose (one dose per subject)
#' df2 <- make_dosing_df(ID = c(1, 2, 3), time = 1:5, dose = c(40, 60, 80))
#' head(df2)
#'
#' ## Example 3: Add constant columns via ...
#' df3 <- make_dosing_df(
#'   ID = 1:3, time = c(0, 7, 14), dose = 45,
#'   STUDY = "CNTO1275JPA3001", ROUTE = "SC", CMT = 1
#' )
#' df3
#'
#' ## Example 4: Add subject-level covariates via ...
#' ## VAR5 is mapped by position: ID=1 gets 4, ID=2 gets 6
#' df4 <- make_dosing_df(
#'   ID = c(1, 2), time = 1:3, dose = 40,
#'   VAR5 = c(4, 6), WT = c(35.2, 51.0), SEX = c("M", "F")
#' )
#' df4
#'
#' ## Example 5: NONMEM-style dosing rows (add EVID and AMT)
#' df_nm <- make_dosing_df(ID = 1:2, time = c(0, 28, 56), dose = c(45, 45),
#'                         EVID = 1, AMT = NA)  # AMT placeholder (optional)
#' df_nm$AMT <- df_nm$dose
#' df_nm
#'
#' @export

make_dosing_df <- function(ID, time, dose, ...) {
  #--------------------------
  # Input checks / coercion
  #--------------------------
  if (missing(ID) || missing(time) || missing(dose)) {
    stop("Please provide ID, time, and dose.")
  }

  ID   <- as.vector(ID)
  time <- as.vector(time)
  dose <- as.vector(dose)

  if (length(ID) == 0L) stop("ID must be non-empty.")
  if (length(time) == 0L) stop("time must be non-empty.")

  extra <- list(...)
  if (length(extra) > 0L && (is.null(names(extra)) || any(names(extra) == ""))) {
    stop("All additional columns passed via ... must be named, e.g., SEX=c('M','F') or STUDY='A'.")
  }

  # If any per-ID vectors are provided (dose length(ID) or any extra length(ID)),
  # require unique IDs to avoid ambiguous mapping.
  needs_id_map <- (length(dose) > 1L) ||
    any(vapply(extra, function(x) length(as.vector(x)) > 1L, logical(1)))

  if (needs_id_map && anyDuplicated(ID)) {
    stop("ID contains duplicates. Per-ID vectors (dose or ...) require unique IDs for unambiguous assignment.")
  }

  #--------------------------
  # Build ID x time dataset
  #--------------------------
  out <- expand.grid(
    ID = ID,
    time = time,
    KEEP.OUT.ATTRS = FALSE,
    stringsAsFactors = FALSE
  )

  # Sort nicely
  out <- out[order(out$ID, out$time), , drop = FALSE]
  rownames(out) <- NULL

  #--------------------------
  # Helper: assign scalar or per-ID vector to expanded rows
  #--------------------------
  assign_by_id <- function(x, ID, out_ID) {
    x <- as.vector(x)

    if (length(x) == 1L) {
      return(rep(x, length(out_ID)))
    }

    if (length(x) == length(ID)) {
      # map by POSITION of ID vector (ID[i] -> x[i])
      id2x <- setNames(x, as.character(ID))
      return(unname(id2x[as.character(out_ID)]))
    }

    stop("Additional column must be length 1 or length(ID).")
  }

  #--------------------------
  # Assign dose
  #--------------------------
  out$dose <- assign_by_id(dose, ID, out$ID)

  #--------------------------
  # Assign extra columns from ...
  #--------------------------
  if (length(extra) > 0L) {
    # Prevent overwriting existing columns
    reserved <- names(out)
    bad_names <- intersect(names(extra), reserved)
    if (length(bad_names) > 0L) {
      stop("These names in ... conflict with existing columns: ",
           paste(bad_names, collapse = ", "),
           ". Please rename them.")
    }

    for (nm in names(extra)) {
      out[[nm]] <- assign_by_id(extra[[nm]], ID, out$ID)
    }
  }

  out
}