R/dropplot.R
In ILRCM: Convert Irregular Longitudinal Data to Regular Intervals and Perform Clustering

Documented in dropplot

#' Dropout Curve and Observation Distribution for Irregular Longitudinal Data
#'
#' This function generates a combined plot of a dropout curve and a histogram of observation counts over time.
#' The dropout curve shows how many subjects remain in the study over time based on their last observation time.
#' The histogram shows how the observations are distributed across time.
#'
#' @param data A data frame containing the longitudinal data.
#' @param id_col A character string specifying the column name for subject identifiers.
#' @param time_col A character string specifying the column name for the time variable.
#' @param bins Number of bins for the histogram (default is 100).
#' @param percentile A numeric value between 0 and 100 specifying the cutoff for the red dropout line (default is 90).
#'
#' @return A list with two elements:
#' \itemize{
#'   \item \code{plot}: A ggplot object showing the dropout curve and histogram.
#'   \item \code{data}: A data frame with mid-points of the time bins (`mid_time`) and the number of observations (`count`) in descending order.
#' }
#'
#' @import dplyr
#' @import ggplot2
#' @import scales
#' @importFrom rlang sym
#'
#' @examples
#' \dontrun{
#'   data(smocc)  # assumes smocc is loaded with columns id and age
#'   result <- dropplot(data = smocc, id_col = "id", time_col = "age", bins = 60, percentile = 90)
#'   print(result$plot)
#'   head(result$data)
#' }
#'
#' @export
dropplot <- function(data, id_col, time_col, bins = 100, percentile = 90) {
  id_sym <- rlang::sym(id_col)
  time_sym <- rlang::sym(time_col)

  # Dropout curve
  last_obs <- data %>%
    dplyr::group_by(!!id_sym) %>%
    dplyr::summarize(last_time = max(!!time_sym), .groups = "drop_last")

  time_grid <- seq(0, max(data[[time_col]], na.rm = TRUE), length.out = 1000)
  dropout_curve <- data.frame(
    time = time_grid,
    n_subjects = sapply(time_grid, function(t) sum(last_obs$last_time >= t))
  )

  # Custom cutoff
  total_subjects <- nrow(last_obs)
  remaining_subjects_cutoff <- ceiling((100 - percentile) / 100 * total_subjects)
  cutoff_time <- min(dropout_curve$time[dropout_curve$n_subjects <= remaining_subjects_cutoff])

  # Histogram
  breaks <- seq(min(data[[time_col]], na.rm = TRUE), max(data[[time_col]], na.rm = TRUE), length.out = bins + 1)
  hist_data <- suppressWarnings({
    data %>%
      dplyr::mutate(bin = cut(!!time_sym, breaks = breaks, include.lowest = TRUE)) %>%
      dplyr::count(bin) %>%
      dplyr::mutate(
        lower = as.numeric(sub("\\((.+),.*", "\\1", bin)),
        upper = as.numeric(sub("[^,]*,([^]]*)\\]", "\\1", bin)),
        bin_mid = (lower + upper) / 2
      ) %>%
      dplyr::filter(!is.na(bin_mid))
  })

  bin_width <- diff(breaks)[1] * 0.9

  # Plot
  p <- ggplot2::ggplot() +
    ggplot2::geom_bar(data = hist_data, aes(x = bin_mid, y = n),
                      stat = "identity", fill = "blue", alpha = 0.4, width = bin_width) +
    ggplot2::geom_line(data = dropout_curve, aes(x = time, y = n_subjects),
                       color = "darkblue", size = 0.8) +
    ggplot2::geom_hline(yintercept = remaining_subjects_cutoff, color = "red", linetype = "dashed", size = 1) +
    ggplot2::annotate("text", x = max(data[[time_col]], na.rm = TRUE), y = remaining_subjects_cutoff,
                      label = paste0(percentile, "% dropout (", remaining_subjects_cutoff, " ", id_col, " remaining)"),
                      hjust = 1.1, vjust = -0.5, color = "red", fontface = "bold", size = 4) +
    ggplot2::scale_y_continuous(
      name = "Number of subjects or observations",
      breaks = scales::pretty_breaks(n = 5)
    ) +
    ggplot2::scale_x_continuous(
      name = time_col,
      breaks = scales::pretty_breaks(n = 5)
    ) +
    ggplot2::ggtitle("Dropout Curve and Observation Distribution for Irregular Longitudinal Data") +
    ggplot2::geom_hline(yintercept = 0, color = "black", size = 1) +
    ggplot2::geom_vline(xintercept = 0, color = "black", size = 1) +
    ggplot2::theme_minimal() +
    ggplot2::theme(
      legend.position = "none",
      panel.grid.major = ggplot2::element_line(color = "grey85", size = 0.3),
      panel.grid.minor = ggplot2::element_line(color = "grey85", size = 0.1),
      axis.line = ggplot2::element_blank(),
      panel.border = ggplot2::element_blank(),
      plot.title = ggplot2::element_text(face = "bold", size = 14),
      axis.title = ggplot2::element_text(face = "bold", size = 12),
      axis.text = ggplot2::element_text(face = "bold", size = 14)
    )

  result_df <- hist_data %>%
    dplyr::select(mid_time = bin_mid, count = n) %>%
    dplyr::arrange(desc(count))

  return(list(plot = p, data = result_df))
}

utils::globalVariables(c('bin','lower','upper','bin_mid','n_subjects'))

Any scripts or data that you put into this service are public.

ILRCM documentation built on Aug. 12, 2025, 1:08 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

ILRCM
Convert Irregular Longitudinal Data to Regular Intervals and Perform Clustering

R/dropplot.R
In ILRCM: Convert Irregular Longitudinal Data to Regular Intervals and Perform Clustering

Defines functions dropplot

Documented in dropplot

Try the ILRCM package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ILRCM Convert Irregular Longitudinal Data to Regular Intervals and Perform Clustering

R/dropplot.R In ILRCM: Convert Irregular Longitudinal Data to Regular Intervals and Perform Clustering

Defines functions dropplot

Documented in dropplot

Try the ILRCM package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ILRCM
Convert Irregular Longitudinal Data to Regular Intervals and Perform Clustering

R/dropplot.R
In ILRCM: Convert Irregular Longitudinal Data to Regular Intervals and Perform Clustering