R/best_cutoff.R

Defines functions best_cutoff

Documented in best_cutoff

#' Extract Best Cutoff and Add Binary Variable to Data Frame
#'
#' @description
#' Determines the optimal cutoff point for a continuous variable in survival
#' analysis using the maximally selected rank statistics method. Creates a
#' binary variable based on the identified cutoff and adds it to the input data
#' frame.
#'
#' @param pdata Data frame containing survival information and the continuous
#'   variable.
#' @param variable Character string specifying the name of the continuous
#'   variable for which the optimal cutoff should be determined.
#' @param time Character string specifying the column name containing
#'   time-to-event data. Default is `"time"`.
#' @param status Character string specifying the column name containing event
#'   status (censoring information). Default is `"status"`.
#' @param print_result Logical indicating whether to print detailed results
#'   including cutoff value and Cox model summaries. Default is `TRUE`.
#'
#' @return Data frame identical to `pdata` with an additional binary column
#'   named `<variable>_binary` containing "High" and "Low" categories based on
#'   the optimal cutoff.
#'
#' @export
#' @author Dongqiang Zeng
#'
#' @examples
#' set.seed(123)
#' pdata <- data.frame(
#'   time = rexp(100),
#'   status = rbinom(100, 1, 0.5),
#'   score = rnorm(100, mean = 50, sd = 10)
#' )
#' result <- best_cutoff(pdata, variable = "score", print_result = FALSE)
#' table(result$score_binary)
best_cutoff <- function(pdata, variable, time = "time",
                        status = "status", print_result = TRUE) {
  # Input validation
  if (!is.data.frame(pdata)) {
    cli::cli_abort("{.arg pdata} must be a data frame")
  }
  if (!variable %in% colnames(pdata)) {
    cli::cli_abort("Variable {.val {variable}} not found in pdata")
  }
  if (!time %in% colnames(pdata)) {
    cli::cli_abort("Time column {.val {time}} not found in pdata")
  }
  if (!status %in% colnames(pdata)) {
    cli::cli_abort("Status column {.val {status}} not found in pdata")
  }

  pdata <- as.data.frame(pdata)

  # Rename columns temporarily
  colnames(pdata)[colnames(pdata) == time] <- "time_iobr"
  colnames(pdata)[colnames(pdata) == status] <- "status_iobr"

  # Remove NA values
  pdata <- pdata[!is.na(pdata$time_iobr) & !is.na(pdata$status_iobr), ]

  if (nrow(pdata) == 0) {
    cli::cli_abort("No valid data after removing NA values")
  }

  # Ensure numeric
  pdata$time_iobr <- as.numeric(pdata$time_iobr)
  pdata$status_iobr <- as.numeric(pdata$status_iobr)

  # Find best cutoff
  surv_obj <- survival::Surv(pdata$time_iobr, pdata$status_iobr)

  iscutoff <- survminer::surv_cutpoint(
    pdata,
    time = "time_iobr",
    event = "status_iobr",
    variables = variable
  )

  cutoff_value <- iscutoff$cutpoint$cutpoint
  cli::cli_alert_success(paste(
    "Best cutoff for {.val {variable}}:",
    "{round(cutoff_value, 3)}"
  ))

  # Cox model for continuous variable
  cox_cont <- summary(survival::coxph(
    surv_obj ~ pdata[[variable]],
    data = pdata
  ))

  # Create binary variable
  variable2 <- paste0(variable, "_binary")
  pdata[[variable2]] <- ifelse(
    pdata[[variable]] <= cutoff_value,
    "Low",
    "High"
  )
  pdata[[variable2]] <- factor(pdata[[variable2]], levels = c("Low", "High"))

  # Summary statistics
  binary_summary <- summary(pdata[[variable2]])

  # Cox model for binary variable
  cox_binary <- summary(survival::coxph(
    surv_obj ~ pdata[[variable2]],
    data = pdata
  ))

  if (print_result) {
    print(list(
      best_cutoff = paste0("The best cutoff is = ", round(cutoff_value, 3)),
      cox_continuous_object = cox_cont,
      summary_binary_variable = binary_summary,
      cox_binary_object = cox_binary
    ))
  }

  # Restore original column names
  colnames(pdata)[colnames(pdata) == "time_iobr"] <- time
  colnames(pdata)[colnames(pdata) == "status_iobr"] <- status

  pdata
}

Try the IOBR package in your browser

Any scripts or data that you put into this service are public.

IOBR documentation built on May 30, 2026, 5:07 p.m.