R/core_stats.R

Defines functions unpack_obj.stats_dm_list unpack_obj.stats_dm copy_class_attributes.stats_dm copy_class_attributes internal_aggregate aggregate_stats.fit_stats aggregate_stats.delta_funs aggregate_stats.quantiles aggregate_stats.cafs aggregate_stats validate_stats_dm.stats_dm validate_stats_dm.fit_stats validate_stats_dm.sum_dist validate_stats_dm.delta_funs validate_stats_dm.quantiles validate_stats_dm.cafs validate_stats_dm new_stats_dm calc_stats.fits_ids_dm calc_stats.drift_dm calc_stats.data.frame calc_stats calc_stats_pred_obs calc_ic calc_delta_funs calc_quantiles calc_quantiles_pred calc_quantiles_obs calc_cafs calc_cafs_pred calc_cafs_obs

Documented in aggregate_stats aggregate_stats.cafs aggregate_stats.delta_funs aggregate_stats.fit_stats aggregate_stats.quantiles calc_cafs calc_cafs_obs calc_cafs_pred calc_delta_funs calc_ic calc_quantiles calc_quantiles_obs calc_quantiles_pred calc_stats calc_stats.data.frame calc_stats.drift_dm calc_stats.fits_ids_dm calc_stats_pred_obs copy_class_attributes copy_class_attributes.stats_dm internal_aggregate new_stats_dm unpack_obj.stats_dm unpack_obj.stats_dm_list validate_stats_dm validate_stats_dm.cafs validate_stats_dm.delta_funs validate_stats_dm.fit_stats validate_stats_dm.quantiles validate_stats_dm.stats_dm validate_stats_dm.sum_dist

# FUNCTIONS FOR CALCULATING THE CAF ---------------------------------------

#' Calculate CAFs
#'
#' Backend functions to calculate conditional accuracy functions for RT
#' vectors or pdfs
#'
#' @param rts_u,rts_l vectors of RTs for the upper and lower boundary
#' @param pdf_u,pdf_l density values for the upper and lower boundary
#' @param one_cond character label
#' @param n_bins number of bins to use for the CAFs
#'
#' @returns a data.frame with the "Cond" label, the "Bin"s and "P_U" for
#' the CAFs
#'
#' @details
#' for RTs:
#' first elements are attributed to a bin (with bins calculated across all RTs
#' using equally spaced quantiles),
#' then accuracy per bin is calculated.
#'
#' for Densities: Add density values, calculate a CDF and force it between 0 and
#' 1. Then determine the indices that cut the CDF into bins by considering
#' equally spaced quantiles. Then calculate the ratio of probability mass per
#' bin.
#'
#' @keywords internal
calc_cafs_obs <- function(rts_u, rts_l, one_cond, n_bins) {
  # cut (all) RTs into n bins
  rts <- c(rts_u, rts_l)
  probs <- seq(0, 1, length.out = n_bins + 1)
  borders <- stats::quantile(rts, probs = probs)
  bins <- cut(rts, breaks = borders, labels = FALSE, include.lowest = TRUE)
  stopifnot(sort(unique(bins)) == 1:n_bins)

  # create a vector to code accuracy
  corr <- rep(c(1, 0), times = c(length(rts_u), length(rts_l)))

  # separate accuracy vector by the calcualted bins, and get mean accuracy per
  # bin
  caf <- tapply(corr, bins, mean)
  caf <- data.frame(
    Cond = one_cond,
    Bin = as.numeric(names(caf)),
    P_U = as.numeric(caf)
  )
  return(caf)
}

#' @rdname calc_cafs_obs
calc_cafs_pred <- function(pdf_u, pdf_l, one_cond, n_bins) {
  stopifnot(length(pdf_u) == length(pdf_l))

  # make a cdf and scale it to a value between 0 and 1
  cdf <- pdf_u + pdf_l
  cdf <- cumsum(cdf)
  cdf <- cdf - min(cdf)
  cdf <- cdf / max(cdf)

  # get the quantiles that determine the bins
  probs <- seq(0, 1, length.out = n_bins + 1)
  probs <- probs[2:(length(probs) - 1)]

  # get the indices that determine the "cuts"
  x <- 1:length(cdf)
  x_borders <- stats::approx(x = cdf, y = x, xout = probs, ties = "mean")$y
  x_borders <- append(x_borders, min(x), after = 0) # ensure that x_borders
  x_borders <- append(x_borders, max(x)) # contains the lower and upper part

  # "label" the indices by the bins
  bins <- cut(x, breaks = x_borders, labels = FALSE, include.lowest = TRUE)
  stopifnot(unique(bins) == 1:n_bins)

  # determine the probability mass per bin and pdf
  sum_u <- tapply(pdf_u, bins, sum)
  sum_l <- tapply(pdf_l, bins, sum)

  # calculate the ratio and pass back
  caf <- sum_u / (sum_u + sum_l)
  caf <- data.frame(
    Cond = one_cond,
    Bin = as.numeric(names(caf)),
    P_U = as.numeric(caf)
  )
  return(caf)
}

# internal function for input checking, data wrangling, and default values
#' Calculate CAFs
#'
#' Function that calls the underlying CAF calculation functions
#' [dRiftDM::calc_cafs_obs] and [dRiftDM::calc_cafs_pred]. Does input checks
#' and the data wrangling
#'
#' @param pdf_u,pdf_l either NULL or density vectors
#' @param rts_u,rts_l either NULL or RT vectors
#' @param one_cond a label for the data.frame
#' @param n_bins the number of bins, default is 5
#' @param b_coding used for accessing the upper boundary label, determines
#' the corresponding column of the returned data.frame (e.g., P_`corr`).
#'
#' @returns a data.frame with "Source", "Cond", "Bin"s, "P_<u_label>" for
#' the CAFs of type c("cafs", "sum_dist", "stats_dm", "data.frame")
#'
#' @details
#' if pdf_u and pdf_l are not NULL, returns CAFs of the densities
#'
#' if rts_u and rts_l are not NULL, returns CAFs of the response times
#'
#' if all are not NULL, returns both.
#'
#' @seealso [dRiftDM::new_stats_dm()]
#'
#' @keywords internal
calc_cafs <- function(pdf_u = NULL, pdf_l = NULL, rts_u = NULL, rts_l = NULL,
                      one_cond, n_bins = NULL, b_coding) {
  # default settings and parameter extraction
  if (is.null(n_bins)) {
    n_bins <- 5
  }

  if (!is_numeric(n_bins) | length(n_bins) != 1) {
    stop("n_bins must a valid numeric")
  }
  if (n_bins <= 1) {
    stop("argument n_bins nust be larger than 1")
  }

  u_name <- names(b_coding$u_name_value)


  # check pdf_l and pdf_u
  if (xor(is.null(pdf_l), is.null(pdf_u))) {
    stop("pdf_l and pdf_u either have to be both NULL or not")
  }

  # check rts_u and rts_l
  if (xor(is.null(rts_u), is.null(rts_l))) {
    stop("rts_u and rts_l either have to be both NULL or not")
  }

  # calculate the respective statistics
  # if pdfs are supplied ...
  result_pred <- NULL
  if (!is.null(pdf_u)) {
    result_pred <- calc_cafs_pred(
      pdf_u = pdf_u, pdf_l = pdf_l,
      one_cond = one_cond, n_bins = n_bins
    )
    result_pred <- cbind(Source = "pred", result_pred)
    colnames(result_pred)[which(names(result_pred) == "P_U")] <-
      paste("P", u_name, sep = "_")
  }

  result_obs <- NULL
  if (!is.null(rts_u)) {
    result_obs <- calc_cafs_obs(
      rts_u = rts_u, rts_l = rts_l,
      one_cond = one_cond, n_bins = n_bins
    )
    result_obs <- cbind(Source = "obs", result_obs)
    colnames(result_obs)[which(names(result_obs) == "P_U")] <-
      paste("P", u_name, sep = "_")
  }



  # combine
  result <- rbind(result_obs, result_pred)

  # make it stats_dm class and pass back
  stats_dm_obj <- new_stats_dm(
    stat_df = result, type = "cafs",
    b_coding = b_coding
  )
  return(stats_dm_obj)
}






# FUNCTIONS FOR CALCULATING QUANTILES -------------------------------------

#' Calculate Quantiles
#'
#' Backend functions to calculate quantiles for RT vectors or pdfs
#'
#' @param rts_u,rts_l vectors of RTs for the upper and lower boundary
#' @param pdf_u,pdf_l density values for the upper and lower boundary
#' @param one_cond character label
#' @param probs numeric vector with values between 0 and 1 for the probability
#' levels
#' @param t_vec the time space (required for the pdfs)
#'
#' @param skip_if_contr_low numeric. If the relative contribution of the upper
#' or lower PDF to the overall PDF is too low (default 0.01%), return NAs for
#' this PDF.
#'
#' @returns a data.frame with the "Cond" label, the "Prob"s and "Quant_U" and
#' "Quant_L" for the quantiles
#'
#' @details
#' for RTs:
#' straightforward via [stats::quantile].
#'
#' for Densities: Calculate CDF (for each pdf separately here), and then map
#' the desired probability level via the CDF (y-axis) to the time space (x-axis)
#'
#' @keywords internal
calc_quantiles_obs <- function(rts_u, rts_l, one_cond, probs) {
  quants_rts_u <- stats::quantile(rts_u, probs = probs)
  quants_rts_l <- stats::quantile(rts_l, probs = probs)
  quants <- data.frame(
    Cond = one_cond,
    Prob = probs,
    Quant_U = unname(quants_rts_u),
    Quant_L = unname(quants_rts_l)
  )
  return(quants)
}

#' @rdname calc_quantiles_obs
calc_quantiles_pred <- function(pdf_u, pdf_l, t_vec, one_cond, probs,
                                skip_if_contr_low = 0.0001) {
  stopifnot(length(pdf_u) == length(pdf_l))
  stopifnot(length(pdf_u) == length(t_vec))

  quants <-
    apply(cbind(pdf_u, pdf_l), MARGIN = 2, function(a_pdf, t_vec, probs) {
      cdf <- cumsum(a_pdf)
      cdf <- cdf - min(cdf)
      cdf <- cdf / max(cdf)
      return(stats::approx(x = cdf, y = t_vec, xout = probs, ties = "mean")$y)
    }, t_vec = t_vec, probs = probs)

  colnames(quants) <- c("Quant_U", "Quant_L")

  quants <- as.data.frame(quants)
  quants <- cbind(Cond = one_cond, Prob = probs, quants)

  sum_pdf_l <- sum(pdf_l)
  sum_pdf_u <- sum(pdf_u)

  if (sum_pdf_u / (sum_pdf_u + sum_pdf_l) <= skip_if_contr_low) {
    quants["Quant_U"] <- NA
  }
  if (sum_pdf_l / (sum_pdf_u + sum_pdf_l) <= skip_if_contr_low) {
    quants["Quant_L"] <- NA
  }

  return(quants)
}


#' Calculate Quantiles
#'
#' Function that calls the underlying quantile calculation functions
#' [dRiftDM::calc_quantiles_obs] and [dRiftDM::calc_quantiles_pred]. Does
#' input checks and the data wrangling
#'
#' @param pdf_u,pdf_l either NULL or density vectors
#' @param t_vec the time space (required for the pdfs)
#' @param rts_u,rts_l either NULL or RT vectors
#' @param one_cond character label
#' @param probs numeric vector with values between 0 and 1 for the probability
#' levels. Default is [dRiftDM::drift_dm_default_probs()].
#' @param b_coding used for accessing the upper/lower boundary labels,
#' determines the corresponding columns of the returned data.frame
#' (e.g., Quant_`corr`).
#'
#' @returns a data.frame with "Source", "Cond", "Prob"s, "Quant_<u_label>",
#' "Quant_<l_label>" of type
#' c("quantiles", "sum_dist", "stats_dm", "data.frame")
#'
#' @details
#' if pdf_u and pdf_l are not NULL, returns quantiles for the densities
#'
#' if rts_u and rts_l are not NULL, returns quantiles for the response times
#'
#' if all are not NULL, returns both.
#'
#' @seealso [dRiftDM::new_stats_dm()]
#'
#' @keywords internal
calc_quantiles <- function(pdf_u = NULL, pdf_l = NULL, t_vec = NULL,
                           rts_u = NULL, rts_l = NULL, one_cond,
                           probs = NULL, b_coding) {
  # default settings and parameter extraction
  if (is.null(probs)) {
    probs <- drift_dm_default_probs()
  }

  if (!is.numeric(probs) | length(probs) < 2) {
    stop("probs must a valid numeric vector of length > 1")
  }

  if (min(probs) <= 0 | max(probs) >= 1) {
    stop("argument probs must be in the range ]0, 1[")
  }

  u_name <- names(b_coding$u_name_value)
  l_name <- names(b_coding$l_name_value)

  # check pdf_l and pdf_u
  if (xor(is.null(pdf_l), is.null(pdf_u))) {
    stop("pdf_l and pdf_u either have to be both NULL or not")
  }

  # check rts_u and rts_l
  if (xor(is.null(rts_u), is.null(rts_l))) {
    stop("rts_u and rts_l either have to be both NULL or not")
  }

  # calculate the respective statistics
  # if pdfs are supplied ...
  result_pred <- NULL
  if (!is.null(pdf_u)) {
    result_pred <- calc_quantiles_pred(
      pdf_u = pdf_u, pdf_l = pdf_l,
      t_vec = t_vec, one_cond = one_cond,
      probs = probs
    )
    result_pred <- cbind(Source = "pred", result_pred)

    colnames(result_pred)[which(names(result_pred) == "Quant_U")] <-
      paste("Quant", u_name, sep = "_")
    colnames(result_pred)[which(names(result_pred) == "Quant_L")] <-
      paste("Quant", l_name, sep = "_")
  }


  # if rts are supplied ...
  result_obs <- NULL
  if (!is.null(rts_u)) {
    result_obs <- calc_quantiles_obs(
      rts_u = rts_u, rts_l = rts_l,
      one_cond = one_cond, probs = probs
    )
    result_obs <- cbind(Source = "obs", result_obs)

    colnames(result_obs)[which(names(result_obs) == "Quant_U")] <-
      paste("Quant", u_name, sep = "_")
    colnames(result_obs)[which(names(result_obs) == "Quant_L")] <-
      paste("Quant", l_name, sep = "_")
  }



  # maybe combine
  result <- rbind(result_obs, result_pred)

  # make it dm_* class and pass back
  stats_dm_obj <- new_stats_dm(
    stat_df = result, type = "quantiles",
    b_coding = b_coding
  )
  return(stats_dm_obj)
}




#' Calculate delta function(s)
#'
#' Given a dataset providing the quantiles ([dRiftDM::calc_quantiles]),
#' calculates delta function(s) for the character vectors minuends and
#' subtrahends
#'
#' @param quantiles_dat a data.frame of quantiles ([dRiftDM::calc_quantiles])
#' @param minuends,subtrahends character vectors (with equal length), specifying
#' the conditions to use for the delta function: minuend - subtrahend
#' @param dvs character, indicating which quantile columns to use. Default
#' is "Quant_<u_label>". If multiple dvs are provided, then minuends and
#' subtrahends must have the same length, and matching occurs pairwise. In this
#' case, if only one minuend/subtrahend is specified, minuend and subtrahend are
#' recycled to the necessary length.
#' @param b_coding a [dRiftDM::b_coding] object, necessary to build default
#' dvs
#'
#' @details
#' Takes the quantile data_frame, [stats::reshape] it to wide, and then
#' access the relevant `dv` columns, together with minuends and subtrahends
#' to calculate the delta functions.
#'
#' @returns a data.frame with columns "Source", "Prob", the "Quant_<u_label>",
#' "Quant_<l_label". May have the following additional columns:
#'
#'  * if only one dv: as many Delta_<minuend_subtrahend> and
#'  Avg_<minuends_subtrahends> as minuends and subtrahends.
#'
#'  * if more than one dv: as many Delta_<u/l-label>_<minuend_subtrahend> and
#'  Avg_<u/l-label>_<minuends_subtrahends> as minuends and subtrahends.
#'
#'  The data.frame is of type
#'  c("delta_funs", "sum_dist", "stats_dm", "data.frame")
#'
#'
#' @seealso [dRiftDM::new_stats_dm()]
#'
#' @keywords internal
calc_delta_funs <- function(quantiles_dat, minuends = NULL, subtrahends = NULL,
                            dvs = NULL, b_coding) {
  # input checks on data frame
  if (!is.data.frame(quantiles_dat)) {
    stop("the provided quantiles_dat is not a data.frame")
  }

  u_name <- names(b_coding$u_name_value)
  l_name <- names(b_coding$l_name_value)
  quant_name_u <- paste("Quant", u_name, sep = "_")
  quant_name_l <- paste("Quant", l_name, sep = "_")

  nec_columns <- c("Source", "Cond", "Prob", quant_name_u, quant_name_l)
  if (any(colnames(quantiles_dat) != nec_columns)) {
    stop(
      "the provided quantiles_dat provides unexpected column names",
      "\n\tprovided: ", paste(colnames(quantiles_dat), collapse = " "),
      "\n\tnecessary: ", paste(nec_columns, collapse = " ")
    )
  }

  # input checks on minuends/subtrahends
  if (is.null(minuends)) {
    stop("calc_delta_funs was called but the argument minuends not provided")
  }
  if (is.null(subtrahends)) {
    stop("calc_delta_funs was called but the argument subtrahends not provided")
  }
  if (!is.character(minuends) | length(minuends) < 1) {
    stop("minuends must be a character vector of length >= 1")
  }
  if (!is.character(subtrahends) | length(subtrahends) < 1) {
    stop("subtrahends must be a character vector of length >= 1")
  }
  if (length(subtrahends) != length(minuends)) {
    stop("different length of minuends and subtrahends")
  }
  if (!all(minuends %in% unique(quantiles_dat$Cond))) {
    stop("Conds specified in minuends are not provided within quantiles_dat")
  }
  if (!all(subtrahends %in% unique(quantiles_dat$Cond))) {
    stop("Conds specified in subtrahends are not provided within quantiles_dat")
  }

  # input checks on dvs
  if (is.null(dvs)) {
    dvs <- quant_name_u
  }
  dvs <- sapply(dvs, function(x) {
    match.arg(x, c(quant_name_u, quant_name_l))
  })
  dvs <- unname(dvs)

  if (length(dvs) > 1 & length(dvs) != length(minuends)) {
    if (length(minuends) == 1) {
      minuends <- rep(minuends, length(dvs))
      subtrahends <- rep(subtrahends, length(dvs))
    } else {
      stop(
        "if several dvs are provided, the length must match",
        " minuends/subtrahends"
      )
    }
  }

  # reduce and make wide format
  quantiles_dat <- quantiles_dat[c("Source", "Cond", "Prob", dvs)]

  n_probs <- length(unique(quantiles_dat$Prob))
  n_source <- length(unique(quantiles_dat$Source))
  n_cond <- length(unique(quantiles_dat$Cond))
  if (nrow(quantiles_dat) != n_probs * n_source * n_cond) {
    stop(
      "quantiles_dat doesn't uniquely code rows solely by Probs, Source, ",
      "and Cond"
    )
  }
  quantiles_dat <- stats::reshape(quantiles_dat,
    idvar = c("Source", "Prob"),
    timevar = "Cond", direction = "wide", sep = "_"
  )

  # calculate delta functions
  if (length(dvs) == 1) {
    delta_names <- paste("Delta",
      paste(minuends, subtrahends, sep = "_"),
      sep = "_"
    )
    avg_names <- paste("Avg",
      paste(minuends, subtrahends, sep = "_"),
      sep = "_"
    )
  } else {
    delta_names <- paste("Delta", gsub("^Quant_", "", dvs), sep = "_")
    avg_names <- paste("Avg", gsub("^Quant_", "", dvs), sep = "_")
    delta_names <- paste(delta_names,
      paste(minuends, subtrahends, sep = "_"),
      sep = "_"
    )
    avg_names <- paste(avg_names,
      paste(minuends, subtrahends, sep = "_"),
      sep = "_"
    )
  }
  minuends_wide <- paste(dvs, minuends, sep = "_")
  subtrahends_wide <- paste(dvs, subtrahends, sep = "_")


  for (i in seq_along(minuends)) {
    vals_minuends <- quantiles_dat[[minuends_wide[i]]]
    vals_subtrahends <- quantiles_dat[[subtrahends_wide[i]]]

    quantiles_dat[[delta_names[i]]] <- vals_minuends - vals_subtrahends
    quantiles_dat[[avg_names[i]]] <- 0.5 * vals_minuends + 0.5 * vals_subtrahends
  }

  # make it stats_dm class and pass back
  # since delta_funs depends on quantiles, this will remove the quantiles class
  stats_dm_obj <- new_stats_dm(
    stat_df = quantiles_dat, type = "delta_funs",
    b_coding = b_coding
  )
  return(stats_dm_obj)
}



# FUNCTIONS FOR FIT STATISTICS --------------------------------------------



#' Calculate Information Criteria (AIC and BIC)
#'
#' Computes/Summarizes the Log-Likelihood, Akaike Information Criterion (AIC),
#' and the Bayesian Information Criterion (BIC)
#'
#' @param drift_dm_obj an object of type [dRiftDM::drift_dm]
#' @param ... further arguments (only relevant: k, for the penality of
#' [stats::AIC])
#'
#' @details The functions calls [dRiftDM::logLik.drift_dm], and subsequently
#' [stats::AIC] and [stats::BIC]
#'
#' @return A custom object of class `stats_dm`
#' (c("fit_stats", "stats_dm", "data.frame")), containing a data frame with
#' columns:
#' * `Log_Like`: the input log-likelihood value
#' * `AIC`: the calculated AIC value
#' * `BIC`: the calculated BIC value
#'
#'
#' @seealso [dRiftDM::new_stats_dm()], [dRiftDM::logLik.drift_dm]
#'
#' @keywords internal
calc_ic <- function(drift_dm_obj, ...) {
  dots <- list(...)
  if (is.null(dots$k)) {
    k <- 2
  } else {
    k <- dots$k
  }

  ll <- logLik(drift_dm_obj)
  aic <- stats::AIC(ll, k = k)
  bic <- stats::BIC(ll)

  result <- data.frame(Log_Like = ll, AIC = aic, BIC = bic)
  stats_obj <- new_stats_dm(stat_df = result, type = "fit_stats")
  return(stats_obj)
}





# ACCESS FUNCTION FOR EACH STATISTIC --------------------------------------

#' Calculate Statistics for Model Prediction and/or Observed Data
#'
#' This function derives statistics that can be calculated for both model
#' predictions and observed data. However, it does not calculate it, but
#' rather calls the respective backend functions.
#' Supported statistics currently include:
#' - Conditional Accuracy Functions (CAFs; [dRiftDM::calc_cafs()])
#' - Quantiles ([dRiftDM::calc_quantiles()])
#' - Delta Functions ([dRiftDM::calc_delta_funs()])
#'
#' @param type character string, specifying the type of statistic to calculate.
#'   Available options are `"cafs"`, `"quantiles"`, and `"delta_funs"`.
#' @param b_coding list for boundary coding (see [dRiftDM::b_coding]).
#' @param conds character vector, specifying the conditions to include in
#' calculations (used for labeling and subsetting the model PDFs and the
#' observed data).
#' @param ... Additional parameters passed on to the specific statistic
#' calculation function (see Details).
#'
#' @details
#'
#' When calling this function the arguments `all_rts_u`/`all_rts_l` and/or
#' `all_pdfs` must be specified (see
#' [dRiftDM::re_evaluate_model], [dRiftDM::obs_data]). Otherwise, the backend
#' functions won't work properly. Further arguments are:
#'
#' - for CAFS: `n_bins` controls the number of bins, with a default of 5.
#' - for Quantiles and Delta Functions: `probs` c ontrols the quantiles to
#' calculate. Default is `seq(0.1, 0.9, 0.1)`
#' (see [dRiftDM::drift_dm_default_probs()]).
#'
#' This function gets called by [dRiftDM::calc_stats]
#'
#'
#' @return A data frame with the calculated statistic across `conds`
#'  (ordered according to `Source`).
#'
#' @keywords internal
calc_stats_pred_obs <- function(type, b_coding, conds, ...) {
  dotdot <- list(...)

  if (!is.character(type) | length(type) != 1) {
    stop("type must be a single character vector of length 1")
  }

  # only these types are available for both observed and predicted data
  type <- match.arg(type, c("cafs", "quantiles", "delta_funs"))

  # iterate through the requested types and calculate the stats
  if (type == "quantiles") {
    result <-
      lapply(conds, function(one_cond) {
        pdf_u <- dotdot$all_pdfs[[one_cond]]$pdf_u
        pdf_l <- dotdot$all_pdfs[[one_cond]]$pdf_l
        rts_u <- dotdot$all_rts_u[[one_cond]]
        rts_l <- dotdot$all_rts_l[[one_cond]]

        calc_quantiles(
          pdf_u = pdf_u, pdf_l = pdf_l, t_vec = dotdot$t_vec,
          rts_u = rts_u, rts_l = rts_l,
          one_cond = one_cond, probs = dotdot$probs,
          b_coding = b_coding
        )
      })
    result <- do.call("rbind", result)
  }
  if (type == "cafs") {
    result <-
      lapply(conds, function(one_cond) {
        pdf_u <- dotdot$all_pdfs[[one_cond]]$pdf_u
        pdf_l <- dotdot$all_pdfs[[one_cond]]$pdf_l
        rts_u <- dotdot$all_rts_u[[one_cond]]
        rts_l <- dotdot$all_rts_l[[one_cond]]

        calc_cafs(
          pdf_u = pdf_u, pdf_l = pdf_l, rts_u = rts_u,
          rts_l = rts_l, one_cond = one_cond,
          n_bins = dotdot$n_bins, b_coding = b_coding
        )
      })
    result <- do.call("rbind", result)
  }
  if (type == "delta_funs") {
    interim <- calc_stats_pred_obs(
      type = "quantiles", b_coding = b_coding, conds = conds, ...
    )
    result <- calc_delta_funs(
      quantiles_dat = interim,
      minuends = dotdot$minuends,
      subtrahends = dotdot$subtrahends,
      dvs = dotdot$dvs, b_coding = b_coding
    )
  }

  result <- result[order(result$Source), ]
  rownames(result) <- NULL

  return(result)
}




# METHODS FOR drift_dm and data.frame OBJECTS TO CALCULATE STATS  ----------


#' Calculate Statistics
#'
#' @description
#'
#' `calc_stats` provides an interface for calculating statistics/metrics on
#' model predictions and/or observed data. Supported statistics include
#' Conditional Accuracy Functions (CAFs), Quantiles, Delta Functions, and fit
#' statistics. Results can be aggregated across individuals.
#'
#' @param object an object for which statistics are calculated. This can be a
#' [data.frame] of observed data, a [dRiftDM::drift_dm] object, or a
#' `fits_ids_dm` object (see [dRiftDM::estimate_model_ids]).
#' @param type a character vector, specifying the statistics to calculate.
#' Supported values include `"cafs"`, `"quantiles"`, `"delta_funs"`, and
#' `"fit_stats"`.
#' @param ... additional arguments passed to the respective method and the
#' underlying calculation functions (see Details for mandatory arguments).
#' @param conds optional character vector specifying conditions to include.
#' Conditions must match those found in the `object`.
#' @param split_by_ID logical. If `TRUE`, statistics are calculated separately
#' for each individual ID in `object` (when `object` is a [data.frame]). Default
#' is `TRUE`.
#' @param b_coding a list for boundary coding (see [dRiftDM::b_coding]). Only
#' relevant when `object` is a [data.frame]. For other `object` types, the
#' `b_coding` of the `Object` is used.
#' @param average logical. If `TRUE`, averages the statistics across individuals
#' where applicable. Default is `FALSE`.
#' @param verbose integer, indicating if information about the progress
#'  should be displayed. 0 -> no information, 1 -> a progress bar. Default is 0.
#' @param round_digits integer, controls the number of digits shown.
#'  Default is 3.
#' @param print_rows integer, controls the number of rows shown.
#' @param some logical. If `TRUE`, a subset of randomly sampled rows is shown.
#' @param show_header logical. If `TRUE`, a header specifying the type of
#'  statistic will be displayed.
#' @param show_note logical. If `TRUE`, a footnote  is displayed indicating
#' that the underlying [data.frame] can be accessed as usual.
#' @param x an object of type `stats_dm` or `stats_dm_list`, as returned by
#' the function `calc_stats()`.
#'
#' @details
#' `calc_stats` is a generic function to handle the calculation of different
#' statistics/metrics for the supported object types. Per default, it returns
#' the requested statistics/metrics.
#'
#'
#'
#' ## Conditional Accuracy Function (CAFs)
#'
#' CAFs are a way to quantify response accuracy against speed. To calculate
#' CAFs, RTs (whether correct or incorrect) are first binned and then the
#' percent correct responses per bin is calculated.
#'
#' When calculating model-based CAFs, a joint CDF combining both the pdf
#' of correct and incorrect responses is calculated. Afterwards, this CDF
#' is separated into even-spaced segments and the contribution of
#' the pdf associated with a correct response relative to the joint CDF is
#' calculated.
#'
#' The number of bins can be controlled by passing the argument `n_bins`.
#' The default is 5.
#'
#' ## Quantiles
#'
#'  For observed response times, the function [stats::quantile] is used with
#'  default settings.
#'
#'  Which quantiles are calcuated can be controlled by providing the
#'  probabilites, `probs`, with values in \eqn{[0, 1]}. Default is
#'  `seq(0.1, 0.9, 0.1)`.
#'
#' ## Delta Functions
#'
#'  Delta functions calculate the difference between quantiles
#'  of two conditions against their mean:
#'  - \eqn{Delta_i = Q_{i,j} - Q_{i,k}}
#'  - \eqn{Avg_i = 0.5 \cdot Q_{i,j} + 0.5 \cdot Q_{i,k}}
#'
#'  With i indicating a quantile, and j and k two conditions.
#'
#'  To calculate delta functions, users have to specify:
#'  - `minuends`: character vector, specifying condition(s) j. Must be in
#'    `conds(drift_dm_obj)`.
#'  - `subtrahends`: character vector, specifying condition(s) k. Must be in
#'    `conds(drift_dm_obj)`
#'  - `dvs`: character, indicating which quantile columns to use.
#'     Default is "Quant_<u_label>". If multiple dvs are provided,
#'     then minuends and subtrahends must have the same length,
#'     and matching occurs pairwise. In this case, if only one
#'     minuend/subtrahend is specified, minuend and subtrahend are recycled to
#'     the necessary length.
#'
#'
#' ## Fit Statistics
#'
#' Calculates the Akaike and Bayesian Information Criteria (AIC and BIC). Users
#' can provide a `k` argument to penalize the AIC statistic (see [stats::AIC]
#' and [dRiftDM::AIC.fits_ids_dm])
#'
#' @returns
#' If `type` is a single character string, then a subclass of [data.frame] is
#' returned, containing the respective statistic. Objects of type `sum_dist`
#' will have an additional attribute storing the boundary encoding (see also
#' [dRiftDM::b_coding]). The reason for returning subclasses of [data.frame] is
#' to provide custom `plot()` methods (e.g., [dRiftDM::plot.cafs]). To get rid
#' of the subclass label and additional attributes (i.e., to get just the plain
#' underlying [data.frame], users can use [dRiftDM::unpack_obj()]).
#'
#' If `type` contains multiple character strings (i.e., is a character vector) a
#' subclass of [list] with the calculated statistics is returned. The list will
#' be of type `stats_dm_list` (to easily create multiple panels using the
#' respective [dRiftDM::plot.stats_dm_list()] method).
#'
#' The print methods `print.stats_dm()` and `print.stats_dm_list()` each
#' invisibly return the supplied object `x`.
#'
#'
#' @examples
#' # Example 1: Calculate CAFs and Quantiles from a model ---------------------
#' # get a model for demonstration purpose
#' a_model <- ssp_dm(dx = .0025, dt = .0025, t_max = 2)
#' # and then calculate cafs and quantiles
#' some_stats <- calc_stats(a_model, type = c("cafs", "quantiles"))
#' print(some_stats)
#'
#' # Example 2: Calculate a Delta Function from a data.frame ------------------
#' # get a data set for demonstration purpose
#' some_data <- ulrich_simon_data
#' conds(some_data) # relevant for minuends and subtrahends
#' some_stats <- calc_stats(
#'   a_model,
#'   type = "delta_funs",
#'   minuends = "incomp",
#'   subtrahends = "comp"
#' )
#' print(some_stats, print_rows = 5)
#'
#'
#' # Example 3: Calculate Quantiles from a fits_ids_dm object -----------------
#' # get an auxiliary fits_ids_dm object
#' all_fits <- get_example_fits_ids()
#' some_stats <- calc_stats(all_fits, type = "quantiles")
#' print(some_stats, print_rows = 5) # note the ID column
#'
#' # one can also request that the statistics are averaged across individuals
#' print(
#'   calc_stats(all_fits, type = "quantiles", average = TRUE)
#' )
#'
#' @export
calc_stats <- function(object, type, ...) {
  if (length(type) > 1) {
    all_stats <- sapply(type, function(one_type) {
      calc_stats(object = object, type = one_type, ...)
    }, simplify = FALSE, USE.NAMES = TRUE)

    class(all_stats) <- c("stats_dm_list")
    return(all_stats)
  }

  UseMethod("calc_stats")
}


#' @rdname calc_stats
#' @export
calc_stats.data.frame <- function(object, type, ..., conds = NULL, verbose = 0,
                                  average = FALSE, split_by_ID = TRUE,
                                  b_coding = NULL) {
  obs_data <- object

  # verbose check
  if (!(verbose %in% c(0, 1))) {
    stop("verbose must be 0 or 1")
  }

  # get b_coding
  if (is.null(b_coding)) {
    b_coding <- drift_dm_default_b_coding()
  }

  #  split by ID, but check if it actually exists
  if (split_by_ID & ("ID" %in% colnames(obs_data))) {
    obs_data <- check_raw_data(
      obs_data = obs_data,
      b_coding_column = b_coding$column,
      u_value = b_coding$u_name_value,
      l_value = b_coding$l_name_value
    )
    list_obs_data <- split(x = obs_data, f = obs_data$ID)


    # create a progress bar
    if (verbose == 1) {
      n_iter <- length(list_obs_data)
      pb <- progress::progress_bar$new(
        format = "calculating [:bar] :percent; done in: :eta",
        total = n_iter, clear = FALSE, width = 60
      )
      pb$tick(0)
    }

    all_results <- lapply(names(list_obs_data), function(id) {
      stat <- calc_stats(
        object = list_obs_data[[id]], type = type, ...,
        conds = conds, verbose = 0, average = FALSE, split_by_ID = FALSE,
        b_coding = b_coding
      )
      stat_id <- cbind(ID = try_cast_integer(id), stat)
      stat_id <- copy_class_attributes(old = stat, new = stat_id)
      if (verbose == 1) pb$tick()
      return(stat_id)
    })
    results <- do.call("rbind", all_results) # preserves class and attributes
    results <- results[order(results$ID), ]
    rownames(results) <- NULL
  } else { # else "ignore" ID column (also the case when it does not exist)

    # turn to list of rts, as stored in any dm_object
    rts <- obs_data_to_rt_lists(obs_data = obs_data, b_coding = b_coding)

    all_rts_u <- rts$rts_u
    all_rts_l <- rts$rts_l

    stopifnot(all(names(all_rts_u) == names(all_rts_l)))


    # get the conds
    data_conds <- unique(obs_data$Cond)
    if (is.null(conds)) {
      conds <- data_conds
    } else {
      conds <- match.arg(arg = conds, choices = data_conds, several.ok = TRUE)
    }

    # call the internal calc_stats function
    results <- calc_stats_pred_obs(
      type = type, b_coding = b_coding,
      conds = conds,
      all_rts_u = all_rts_u,
      all_rts_l = all_rts_l, ...
    )
  }

  # average if necessary
  if (average) {
    results <- aggregate_stats(results)
  }

  return(results)
}


#' @rdname calc_stats
#' @export
calc_stats.drift_dm <- function(object, type, ..., conds = NULL) {
  drift_dm_obj <- object
  type <- match.arg(type, c("cafs", "quantiles", "delta_funs", "fit_stats"))


  # get b_coding
  b_coding <- attr(drift_dm_obj, "b_coding")

  # get all rts and pdfs for quick reference
  if (is.null(drift_dm_obj$pdfs)) {
    drift_dm_obj <- re_evaluate_model(
      drift_dm_obj = drift_dm_obj,
      eval_model = TRUE
    )
  }

  # if fit_stats requested
  if (type == "fit_stats") {
    result <- calc_ic(drift_dm_obj, ...)
    return(result)
  }


  # otherwise cafs, quantiles, delta_funs ....
  # extract pdfs and check if at least 1% of the PDFs is missing
  all_pdfs <- drift_dm_obj$pdfs
  dt <- drift_dm_obj$prms_solve[["dt"]]
  check_loss <- sapply(all_pdfs, function(one_set_pdfs) {
    sum_both <- sum(one_set_pdfs$pdf_u) * dt + sum(one_set_pdfs$pdf_l) * dt
    return(sum_both < .99)
  }, simplify = TRUE, USE.NAMES = TRUE)
  if (any(check_loss)) {
    warning(
      "calc_stats called with missing probability mass for some ",
      "conditions (likely occured after truncating pdfs to the ",
      "time space). Some statistics scale the pdfs! ",
      "Interprete 'quantiles' etc. accordingly (or increase t_max)."
    )
  }


  # get time space (needed for quantiles)
  t_max <- drift_dm_obj$prms_solve[["t_max"]]
  nt <- drift_dm_obj$prms_solve[["nt"]]
  t_vec <- seq(0, t_max, length.out = nt + 1)

  # get the conds
  model_conds <- conds(drift_dm_obj)
  if (is.null(conds)) {
    conds <- model_conds
  } else {
    conds <- match.arg(arg = conds, choices = model_conds, several.ok = TRUE)
  }

  # and call the underlying internal function calc_stats_pred_obs
  result <- calc_stats_pred_obs(
    type = type, b_coding = b_coding,
    conds = conds, all_pdfs = all_pdfs,
    all_rts_u = drift_dm_obj$obs_data$rts_u,
    all_rts_l = drift_dm_obj$obs_data$rts_l,
    t_vec = t_vec, ...
  )

  return(result)
}


#' @rdname calc_stats
#' @export
calc_stats.fits_ids_dm <- function(object, type, ..., verbose = 1,
                                   average = FALSE) {
  fits_ids <- object

  if (!(verbose %in% c(0, 1))) {
    stop("verbose must be 0 or 1")
  }

  # create a progress bar
  if (verbose == 1) {
    n_iter <- length(fits_ids$all_fits)
    pb <- progress::progress_bar$new(
      format = "calculating [:bar] :percent; done in: :eta",
      total = n_iter, clear = FALSE, width = 60
    )
    pb$tick(0)
  }
  #


  # call statistic across individuals
  all_results <-
    lapply(names(fits_ids$all_fits), function(id) {
      stat <- calc_stats(object = fits_ids$all_fits[[id]], type = type, ...)
      stat_id <- cbind(ID = try_cast_integer(id), stat)
      stat_id <- copy_class_attributes(old = stat, new = stat_id)
      if (verbose == 1) pb$tick()
      return(stat_id)
    })
  results <- do.call("rbind", all_results) # preserves class and attributes
  results <- results[order(results$ID), ]
  rownames(results) <- NULL

  # average if desired
  if (average) {
    results <- aggregate_stats(results)
  }

  return(results)
}


# CREATE stats_dm objects -------------------------------------------------


#' Create a New stats_dm Object
#'
#' @description
#' `new_stats_dm` initializes a `stats_dm` object to label statistic types and
#' store necessary attributes for the custom methods (such as `plot_*`)
#'
#'
#' @param stat_df a `data.frame`, containing calculated statistics to be
#' encapsulated within the `stats_dm` class.
#' @param type a character string, specifying the type of statistic provided by
#' `stat_df`. Valid options include `"cafs"`, `"quantiles"`, `"delta_funs"`,
#' and `"fit_stats"`.
#' @param ... Additional arguments passed to set attributes. For `"cafs"`,
#' `"quantiles"`, and `"delta_funs"`, a `b_coding` attribute is required.
#'
#' @details
#' `new_stats_dm` sets up the `stat_df` object by assigning it the class
#' `stats_dm`, along with additional classes based on the specified `type`.
#' For"cafs", "quantiles", "delta_funs", this will be c(`<type>`, "sum_dist",
#' "stats_dm", "data.frame")". For fit statistics, this will be c("fit_stats",
#' "stats_dm", "data.frame")".
#'
#' For Conditional Accuracy Functions (CAFs), Quantiles, and Delta Functions,
#' the function requires a `b_coding` argument, which specifies boundary coding
#' details and is set as an attribute.
#'
#' The function performs validation through [dRiftDM::validate_stats_dm] to
#' ensure that the `stats_dm` object is well formatted.
#'
#' @return An object of class `stats_dm`, with additional classes and attributes
#' depending on `type`.
#'
#' @keywords internal
new_stats_dm <- function(stat_df, type, ...) {
  # input checks
  stopifnot(is.data.frame(stat_df))
  type <- match.arg(
    arg = type,
    choices = c("cafs", "quantiles", "delta_funs", "fit_stats")
  )

  # define the stat_df object as an object of class stats_dm
  class(stat_df) <- c("stats_dm", "data.frame")

  # if cafs, quantiles, delta_funs, add b_coding and more info about object
  dots <- list(...)
  if (type %in% c("cafs", "quantiles", "delta_funs")) {
    b_coding <- dots$b_coding
    stopifnot(!is.null(b_coding))
    attr(stat_df, "b_coding") <- b_coding
    class(stat_df) <- c(type, "sum_dist", class(stat_df))

    # if fit_stats, add the object info
  } else if (type == "fit_stats") {
    class(stat_df) <- c(type, class(stat_df))
  }

  # check if everything went well
  stat_df <- validate_stats_dm(stat_df)
  return(stat_df)
}




# VALIDATE stats_dm objects -----------------------------------------------


#' Validate a stats_dm Object
#'
#' @description
#' `validate_stats_dm` is an internal (i.e., not exported) generic function to
#' ensure that `stats_dm` objects, as well as their specific subclasses
#' (`cafs`, `quantiles`, `delta_funs`, `sum_dist`, and `fit_stats`), meet the
#' necessary structural and column requirements. Each method performs
#' class-specific validation checks.
#'
#' @param stat_df A `data.frame` of class `stats_dm`, `cafs`, `quantiles`,
#' `delta_funs`, `sum_dist`, or `fit_stats` containing the calculated statistics
#' to be validated.
#'
#' @details
#' The validation process checks for required columns and structure based on the
#' class of `stat_df`. Each class has specific requirements:
#'
#' - **`validate_stats_dm.stats_dm`:** Ensures `stat_df` is a `data.frame`.
#' - **`validate_stats_dm.cafs`:** Checks for the presence of `"Bin"`, `"Cond"`,
#'   and exactly one column prefixed with `"P_"`
#' - **`validate_stats_dm.quantiles`:** Requires `"Prob"`, `"Cond"`, and exactly
#'   two columns prefixed with `"Quant_"`
#' - **`validate_stats_dm.delta_funs`:** Ensures `"Prob"` exists, at least two
#'   columns prefixed with `"Quant_"`, and at least one column  each `Avg_`
#'   and `Delta_`
#' - **`validate_stats_dm.sum_dist`:** Checks for a `"Source"` column.
#' - **`validate_stats_dm.fit_stats`:** Checks for `"Log_Like"`, `"AIC"`, and
#' `"BIC"` columns.
#'
#'
#' @return Returns the unmodified `stat_df` for convenience.
#'
#' @keywords internal
validate_stats_dm <- function(stat_df) {
  UseMethod("validate_stats_dm")
}


#' @rdname validate_stats_dm
#' @export
validate_stats_dm.cafs <- function(stat_df) {
  NextMethod() # to validate sum_dist objects


  if (!("Bin" %in% colnames(stat_df))) {
    stop("no column 'Bin' in stats_dm")
  }

  if (!("Cond" %in% colnames(stat_df))) {
    stop("no column 'Cond' in stats_dm")
  }

  if (sum("P_" == substr(colnames(stat_df), 1, 2)) != 1) {
    stop("no unique column 'P_' in stats_dm")
  }
  return(stat_df)
}

#' @rdname validate_stats_dm
#' @export
validate_stats_dm.quantiles <- function(stat_df) {
  NextMethod() # to validate sum_dist objects

  if (!("Prob" %in% colnames(stat_df))) {
    stop("no column 'Prob' in stats_dm")
  }

  if (!("Cond" %in% colnames(stat_df))) {
    stop("no column 'Cond' in stats_dm")
  }

  if (sum("Quant_" == substr(colnames(stat_df), 1, 6)) != 2) {
    stop("couldn't find two Quant_ columns")
  }

  return(stat_df)
}

#' @rdname validate_stats_dm
#' @export
validate_stats_dm.delta_funs <- function(stat_df) {
  NextMethod() # to validate sum_dist objects

  if (!("Prob" %in% colnames(stat_df))) {
    stop("no column 'Prob' in stats_dm")
  }

  if (sum("Quant_" == substr(colnames(stat_df), 1, 6)) < 2) {
    stop("couldn't find at least two Quant_ columns")
  }

  if (sum("Avg_" == substr(colnames(stat_df), 1, 4)) < 1) {
    stop("couldn't find a Avg_ column")
  }
  if (sum("Delta_" == substr(colnames(stat_df), 1, 6)) < 1) {
    stop("couldn't find a Delta_ column")
  }
  return(stat_df)
}

#' @rdname validate_stats_dm
#' @export
validate_stats_dm.sum_dist <- function(stat_df) {
  NextMethod() # to validate stats_dm objects

  if (!("Source" %in% colnames(stat_df))) {
    stop("no column 'Source' in stats_dm")
  }
  if (is.null(attr(stat_df, "b_coding"))) {
    stop("no attribute b_coding in stats_dm")
  }
  return(stat_df)
}



#' @rdname validate_stats_dm
#' @export
validate_stats_dm.fit_stats <- function(stat_df) {
  NextMethod() # to validate stats_dm objects

  if (!("Log_Like" %in% colnames(stat_df))) {
    stop("no column 'Log_Like' in stats_dm")
  }

  if (!("AIC" %in% colnames(stat_df))) {
    stop("no column 'AIC' in stats_dm")
  }

  if (!("BIC" %in% colnames(stat_df))) {
    stop("no column 'BIC' in stats_dm")
  }
  return(stat_df)
}


#' @rdname validate_stats_dm
#' @export
validate_stats_dm.stats_dm <- function(stat_df) {
  if (!is.data.frame(stat_df)) {
    stop("stats_dm object to validate is not of type data.frame")
  }
  return(stat_df)
}





# AGGREGATE stats_dm OBJECTS ----------------------------------------------

#' Aggregate Statistics ACROSS ID
#'
#' @description
#' `aggregate_stats` is a (not exported) generic function to aggregate
#' `stats_dm` objects across `ID`s. Since the column names may vary by the
#' statistic type, the behavior of aggregate depends on the subclass of
#' `stats_dm` (`cafs`, `quantiles`, `delta_funs`, or `fit_stats`).
#'
#' @param stat_df A `data.frame` of class `stats_dm`
#'
#' @details
#' For each supported subclass, `aggregate_stats` calls
#' [dRiftDM::internal_aggregate()] with the relevant arguments
#'
#' @return If no `"ID"` column exists in `stat_df` returns `stat_df` as-is.
#' If an `"ID"` column exists, then statistics are aggregated across it.
#'
#' @seealso [dRiftDM::new_stats_dm], [dRiftDM::calc_stats],
#' [dRiftDM::internal_aggregate()]
#'
#' @keywords internal
aggregate_stats <- function(stat_df) {
  if (!("ID" %in% colnames(stat_df))) {
    return(stat_df)
  }

  UseMethod("aggregate_stats")
}

#' @rdname aggregate_stats
#' @export
aggregate_stats.cafs <- function(stat_df) {
  internal_aggregate(
    data = stat_df,
    group_cols = c("Source", "Cond", "Bin")
  )
}

#' @rdname aggregate_stats
#' @export
aggregate_stats.quantiles <- function(stat_df) {
  internal_aggregate(
    data = stat_df,
    group_cols = c("Source", "Cond", "Prob")
  )
}

#' @rdname aggregate_stats
#' @export
aggregate_stats.delta_funs <- function(stat_df) {
  internal_aggregate(
    data = stat_df,
    group_cols = c("Source", "Prob")
  )
}

#' @rdname aggregate_stats
#' @export
aggregate_stats.fit_stats <- function(stat_df) {
  internal_aggregate(
    data = stat_df,
    group_cols = NULL
  )
}



#' Aggregate Data Frame Columns by Group
#'
#' internal function to aggregate columns of a data frame across "ID"
#' while considering a set of grouping columns. It retains the class and
#' attriubtes of the input data.
#'
#' @param data A [data.frame] containing the data to be aggregated. It should
#'  include both the grouping columns, an "ID" column, and the columns for which
#'  aggregation shall take place.
#' @param group_cols A character vector specifying the names of the columns to
#'  group by during aggregation.
#'
#' @details
#' `internal_aggregate` identifies DV columns as those not in `group_cols` or
#' `"ID"`. It then calculates the mean of these DV columns, grouped by the
#' specified columns.
#'
#' @return A `data.frame` containing the aggregated data.
#'
#' @seealso [dRiftDM::aggregate_stats()], [dRiftDM::calc_stats()],
#' [dRiftDM::new_stats_dm()]
#'
#' @keywords internal
internal_aggregate <- function(data, group_cols) {
  all_cols <- colnames(data)

  # Select columns that don't start with the group_cols or ID
  dv_cols <- all_cols[!(colnames(data) %in% c("ID", group_cols))]

  # Aggregate by ID for those columns
  agg_df <- stats::aggregate(data[dv_cols],
    data[rev(group_cols)],
    FUN = mean,
    na.rm = TRUE
  )

  # reorder columns to have consistency with the supplied data.frame
  agg_df <- agg_df[c(group_cols, dv_cols)]

  # keep class and attributes and pass back
  agg_df <- copy_class_attributes(old = data, new = agg_df)

  agg_df <- validate_stats_dm(agg_df)

  return(agg_df)
}



# HELPER TO KEEP CLASS/ATTRIBUTS  --------------------------------------------

#' Copy Class Attributes from One Object to Another
#'
#' This function transfers class attributes from an `old` object to a `new`
#' object, ensuring that `new` inherits the class structure and missing
#' attributes of `old`. The primary purpose is to enforce class consistency and
#' restore any lost attributes when modifying or combining objects. It is
#' used in the internals of the package and it is not exported.
#'
#' @param old The source object from which class attributes will be copied.
#' @param new The target object to which class attributes will be assigned.
#' @return The modified `new` object with attributes and class from `old`.
#'
#' @details
#' The function assumes that all class attributes of `new` can be found in
#' `old`. Note also, that the order of attributes is not ensured.
#'
#' @keywords internal
copy_class_attributes <- function(old, new) {
  UseMethod("copy_class_attributes")
}


#' @rdname copy_class_attributes
#' @export
copy_class_attributes.stats_dm <- function(old, new) {
  stopifnot(class(new) %in% class(old))
  lost_attribtues <- setdiff(names(attributes(old)), names(attributes(new)))

  class(new) <- class(old) # ensures sorting
  for (one_attr in lost_attribtues) { # doesn't ensure sorting
    attr(new, one_attr) <- attr(old, one_attr)
  }

  return(new)
}




# UNPACK METHODS ----------------------------------------------------------

#' @rdname unpack_obj
#' @export
unpack_obj.stats_dm <- function(object, ..., unpack_elements = TRUE) {
  if (unpack_elements) {
    object <- as.data.frame(object)
    attr(object, "b_coding") <- NULL
  }

  return(object)
}


#' @rdname unpack_obj
#' @export
unpack_obj.stats_dm_list <- function(object, ..., unpack_elements = TRUE,
                                     type = NULL) {
  # default is all stored stat types
  if (is.null(type)) {
    type <- names(object)
  }
  type <- match.arg(type, names(object), several.ok = TRUE)

  # iterate across all
  stats <- sapply(type, function(x) {
    unpack_obj(object[[x]], unpack_elements = unpack_elements)
  }, simplify = FALSE, USE.NAMES = TRUE)

  if (length(type) == 1) {
    return(stats[[1]])
  } else {
    return(stats)
  }
}

Try the dRiftDM package in your browser

Any scripts or data that you put into this service are public.

dRiftDM documentation built on April 3, 2025, 7:48 p.m.