R/log_AUC.R

Defines functions prep_log_AUC

Documented in prep_log_AUC

#' Calculate log x_axis values for AUClog
#'
#' @section Correction types for handling zero x-axis values:
#' "Corr" adds a set correction value to each \code{x_axis}
#' value and then takes the log of those values. "Shift" and "adjust" implement increasing
#' the \code{x_axis} values by the average difference between the log values on the \code{x_axis}.
#' "IHS" calculates the inverse hyperbolic sine for the \code{x_axis},
#' which is different than the logarithm
#' but is highly correlated with log transformed values. The IHS transformation
#' does not require adjustments because \code{IHS(0) == 0}.
#'
#' @param dat Discounting data tibble. \code{AUC_zeroes} should be run first if
#' zero values on the \code{x_axis} will need to be included.
#' @param x_axis Delays/probabilities/social distance variable
#' @param log_base Base of the logarithm
#' @param type Type of correction to handle 0 values on x_axis. Acceptable values
#' are "corr"., "adjust", "shift" and "IHS". "Corr" adds a set value to each x_axis
#' value and then takes the log of those values. "Adjust" and "shift" are synonyms and implement increasing
#' the x_axis values by the average difference between the log values on the x_axis.
#' "IHS" calculates the inverse hyperbolic sine, which is different than the logarithm
#' but is highly correlated with log transformed values. The IHS transformation
#' does not require corrections
#' @param correction If \code{type == "corr"} this value is what is added to the
#' x_axis prior to taking the log values.
#' @param ihs_theta If \code{type == "IHS"} this value is used to adjust the
#' standard IHS distribution to a preferred distribution.
#' @param dec_offset If \code{TRUE}, offsets the log x_axis values if the lowest
#' non-zero x_axis value is a decimal. This calculation is preferred because if
#' x_axis values are negative then the log values will be negative. The negative
#' log values can cause inconsistencies in how AUC is calculated.
#'
#' @return Original data frame (a tibble) that includes an appended column with log scale
#' version of x_axis
#'
#' @import dplyr
#' @importFrom glue glue
#' @importFrom tibble is_tibble
#' @importFrom rlang :=
#' @export
#'
#' @examples
#'
#' prep_log_AUC(
#'   dat = examp_DD,
#'   x_axis = "delay_months",
#'   log_base = 10,
#'   dec_offset = TRUE,
#'   type = "adjust",
#'   correction = 1
#' )
prep_log_AUC <- function(dat,
                         x_axis,
                         log_base = 2,
                         type = "shift",
                         correction = 1,
                         ihs_theta = 1,
                         dec_offset = TRUE) {
  {    if (!tibble::is_tibble(dat)) {
    stop("dat must be a tibble")
  }
    
    if (!base::is.character(x_axis)) {
      base::stop("x_axis must be a string indicating the x-axis variable")
    }
    
    if (!base::is.numeric(log_base) | (log_base <= 0)) {
      base::stop("log_base must be a number greater than 0")
    }
    
    if (!(type %in% (c("corr", "adjust","shift", "IHS")))) {
      base::stop("type must be a string of either \"corr\", \"shift\", \"adjust\", or \"IHS\".")
    }
    
    if (!base::is.numeric(correction) | (correction <= 0)) {
      base::stop("correction must be a number greater than 0")
    }
    
    if (!base::is.logical(dec_offset)) {
      base::stop("dec_offset must be either TRUE or FALSE")
    }  } # Checks
  
  x_vals <- dat %>%
    dplyr::pull({{ x_axis }}) %>%
    base::unique()
  
  # For ease separated out of single tidy function
  log_vals <-
    dplyr::tibble(orig = x_vals) %>%
    dplyr::arrange("orig") %>%
    dplyr::mutate(
      log_val = base::log(.data[["orig"]], log_base),
      log_diff = .data[["log_val"]] - dplyr::lag(.data[["log_val"]])
    )
  
  # It is possible that other forms will be implemented in the future,
  # so keeping the type variable and just setting it
  
  if (type == "adjust" | type == "shift") {
    
    # Find mean differences between x_axis
    mean_diff <-
      log_vals %>%
      dplyr::filter(!base::is.na(.data[["log_diff"]]) &
                      !base::is.infinite(.data[["log_diff"]])
      ) %>%
      dplyr::pull(.data[["log_diff"]]) %>%
      base::mean()
  } else if (type == "corr") {
    mean_diff <- 1
  }
  
  inc_zero <- base::min(x_vals) == 0
  
  if (!inc_zero) {
    mean_diff <- 0
  }
  
  # The offset exists to eliminate negative x_axis values for clean log values
  if (dec_offset) {
    log_offset <-
      log_vals %>%
      dplyr::filter(!base::is.infinite(.data[["log_val"]])) %>%
      dplyr::pull(.data[["log_val"]]) %>%
      base::min()
    
    if (log_offset < 0) {
      log_offset <- base::abs(log_offset)
    } else {
      log_offset <- 0
    }
  } else {
    log_offset <- 0
  }
  
  if (type == "adjust" | type == "shift") {
    log_vals <-
      log_vals %>%
      dplyr::mutate(log_val_adj = .data[["log_val"]] +
                      log_offset + mean_diff)
  } else if (type == "corr") {
    log_vals <-
      log_vals %>%
      dplyr::mutate(log_val_adj = base::log(.data[["orig"]] + correction,
                                            base = log_base
      ))
  } else if (type == "IHS") {
    log_vals <-
      log_vals %>%
      dplyr::mutate(
        log_val_adj = log(ihs_theta * .data[["orig"]] +
                            sqrt((ihs_theta ^ 2) * (.data[["orig"]] ^ 2) + 1)),
        #v1.0.0_ihs = base::asinh(.data[["orig"]]) #Was IHS from version 1.0.0
        #If theta is set to 1, as is the default in the function then the new
        #IHS values in the update will equal the old
      )
  }
  
  if (inc_zero) {
    log_vals[1, "log_val_adj"] <- 0
  }
  
  new_col <- glue::glue("log_{x_axis}")
  
  log_vals <-
    log_vals %>%
    dplyr::select(
      dplyr::all_of(
        base::c("orig","log_val_adj"))) %>%
    dplyr::rename(
      {{ x_axis }} := "orig",
      {{ new_col }} := "log_val_adj"
    )
  
  dat <- dplyr::left_join(dat,
                          log_vals,
                          by = x_axis
  )
  
  base::return(dat)
}

Try the discAUC package in your browser

Any scripts or data that you put into this service are public.

discAUC documentation built on Feb. 24, 2026, 1:07 a.m.