R/empirical-measures.R
In beezdemand: Behavioral Economic Easy Demand

Documented in get_empirical_measures

#' Calculate Empirical Demand Measures
#'
#' @description
#' Calculates empirical (model-free) measures of demand from purchase task data.
#' These metrics characterize consumption patterns without fitting a demand curve model.
#'
#' This is the modern replacement for [GetEmpirical()], returning a structured
#' S3 object with dedicated methods for printing, summarizing, and visualizing.
#'
#' @param data A data frame in long format with columns for subject ID, price, and consumption
#' @param x_var Character string specifying the column name for price (default: "x")
#' @param y_var Character string specifying the column name for consumption (default: "y")
#' @param id_var Character string specifying the column name for subject ID (default: "id")
#'
#' @return An S3 object of class `beezdemand_empirical` containing:
#' \itemize{
#'   \item \strong{measures} - Data frame with one row per subject and columns:
#'     \itemize{
#'       \item \emph{id} - Subject identifier
#'       \item \emph{Intensity} - Consumption at lowest price (demand intensity)
#'       \item \emph{BP0} - Breakpoint 0: first price where consumption = 0
#'       \item \emph{BP1} - Breakpoint 1: last price with non-zero consumption
#'       \item \emph{Omaxe} - Empirical Omax: maximum total expenditure (price × consumption)
#'       \item \emph{Pmaxe} - Empirical Pmax: price at which maximum expenditure occurs
#'     }
#'   \item \strong{call} - The matched call
#'   \item \strong{data_summary} - List with n_subjects, has_zeros, and complete_cases
#' }
#'
#' @details
#' ## Empirical Measures
#'
#' \itemize{
#'   \item \strong{Intensity} - The consumption value at the lowest price point.
#'     Reflects unrestricted demand or preferred level of consumption.
#'
#'   \item \strong{BP0} (Breakpoint 0) - The first price at which consumption drops to zero.
#'     If consumption never reaches zero, BP0 = NA. Indicates the price threshold
#'     where the commodity becomes unaffordable or undesirable.
#'
#'   \item \strong{BP1} (Breakpoint 1) - The highest price at which consumption is still
#'     non-zero. If all consumption is zero, BP1 = NA. Represents the upper limit
#'     of the commodity's value to the consumer.
#'
#'   \item \strong{Omaxe} (Empirical Omax) - The maximum observed expenditure across all
#'     prices (max of price × consumption). Represents peak spending on the commodity.
#'
#'   \item \strong{Pmaxe} (Empirical Pmax) - The price at which maximum expenditure occurs.
#'     If maximum expenditure is zero, Pmaxe = 0. If multiple prices have the same
#'     maximum expenditure, the highest price is returned.
#' }
#'
#' @note
#' \itemize{
#'   \item Data must not contain duplicate prices within a subject (will error)
#'   \item Missing values (NA) in consumption are preserved in calculations where applicable
#'   \item Breakpoints require at least one zero consumption value to be meaningful
#' }
#'
#' @seealso
#' \itemize{
#'   \item [GetEmpirical()] - Legacy function (superseded)
#'   \item [plot.beezdemand_empirical()] - Visualization method
#'   \item [summary.beezdemand_empirical()] - Extended summary
#' }
#'
#' @examples
#' \donttest{
#' data(apt, package = "beezdemand")
#'
#' # Calculate empirical measures
#' emp <- get_empirical_measures(apt)
#' print(emp)
#'
#' # View measures table
#' emp$measures
#'
#' # Extended summary with distribution info
#' summary(emp)
#'
#' # Visualize distribution of measures
#' plot(emp)  # histogram by default
#' plot(emp, type = "matrix")  # scatterplot matrix
#' }
#'
#' @export
get_empirical_measures <- function(data,
                                   x_var = "x",
                                   y_var = "y",
                                   id_var = "id") {
  # Validate inputs
  if (!is.data.frame(data)) {
    stop("'data' must be a data frame", call. = FALSE)
  }

  # Use CheckCols for backward compatibility with column name handling
  dat <- CheckCols(data, xcol = x_var, ycol = y_var, idcol = id_var)

  # Get unique subjects
  ps <- unique(dat$id)
  ps <- as.character(ps)
  np <- length(ps)

  # Initialize results data frame
  cnames <- c("id", "Intensity", "BP0", "BP1", "Omaxe", "Pmaxe")
  dfres <- data.frame(
    matrix(vector(), np, length(cnames), dimnames = list(c(), cnames)),
    stringsAsFactors = FALSE
  )

  # Calculate empirical measures for each subject
  for (i in seq_len(np)) {
    dfres[i, "id"] <- ps[i]

    adf <- dat[dat$id == ps[i], ]

    # Check for duplicates
    if (any(duplicated(adf$x))) {
      stop(paste0("Duplicates found where id = ", ps[i], ". Check data and rerun."),
           call. = FALSE)
    }

    # Calculate expenditure
    adf[, "expend"] <- adf$x * adf$y

    # Intensity: consumption at minimum price
    dfres[i, "Intensity"] <- adf[which(adf$x == min(adf$x), arr.ind = TRUE), "y"]

    # BP0: first price where consumption = 0
    if (0 %in% adf$y) {
      for (j in nrow(adf):1) {
        if (adf$y[j] == 0) {
          next
        } else {
          dfres[i, "BP0"] <- adf$x[j + 1]
          break
        }
      }
    } else {
      dfres[i, "BP0"] <- NA
    }

    # BP1: last price with non-zero consumption
    dfres[i, "BP1"] <- if (sum(adf$y) > 0) max(adf[adf$y != 0, "x"]) else NA

    # Omaxe: maximum expenditure
    dfres[i, "Omaxe"] <- max(adf$expend)

    # Pmaxe: price at maximum expenditure
    dfres[i, "Pmaxe"] <- if (dfres[i, "Omaxe"] == 0) {
      0
    } else {
      adf[max(which(adf$expend == max(adf$expend))), "x"]
    }
  }

  # Build S3 object
  result <- structure(
    list(
      measures = dfres,
      call = match.call(),
      data_summary = list(
        n_subjects = np,
        has_zeros = any(dfres$BP0 > 0, na.rm = TRUE),  # any subjects reach zero consumption?
        complete_cases = sum(complete.cases(dfres))  # subjects with no NA measures
      )
    ),
    class = "beezdemand_empirical"
  )

  return(result)
}