R/iclogcondist_visualization.R
In iclogcondist: Log-Concave Distribution Estimation with Interval-Censored Data

Documented in iclogcondist_visualization plot.iclogcondist

#' Visualize the Estimated Cumulative Distribution Functions
#'
#' @description
#' This function visualizes a user-specified distribution \code{true_dist} (if available) and the estimated
#' cumulative distribution functions (CDF) \eqn{F(t)} and \eqn{log F(t)} for a given range.
#' The function overlays the estimated functions from a list of fitted models
#' on the same plot, allowing comparison with the user-specified distribution (if provided).
#' In a simulation study, the user-specified distribution can correspond to the true underlying distribution.
#'
#' @param X A dataset or input data used to prepare the plot range if \code{range} is not specified.
#' @param range A numeric vector of length 2 specifying the range of \code{t} values for plotting.
#'   If \code{NA} the function calculates the range based on the input data \code{X}.
#' @param fit_list A named list of fitted models, where each element is expected to contain
#'   an \code{est} object with estimates for generating the CDF plots. The name of the list should be \code{"LCMLE"},\code{"UMLE"} or \code{"LCM_UMLE"}
#' @param true_dist Optional. A data frame or list containing the user-specified distribution values,
#'   with components \code{x} and \code{y} representing the values of \eqn{t} and \eqn{F(t)} respectively.
#'
#' @return A list containing two ggplot objects: \code{logF_plot} for \eqn{log F(t)} and \code{F_plot} for \eqn{F(t)}.
#' @import ggplot2
#' @examples
#' # Example usage
#' data(lgnm)
#' fit_LCMLE <- ic_LCMLE(lgnm)
#' fit_UMLE <- ic_UMLE(lgnm)
#' iclogcondist_visualization(
#'   X = lgnm,
#'   range = c(0, 10),
#'   fit_list = list(
#'     "UMLE" = fit_UMLE,
#'     "LCMLE" = fit_LCMLE
#'   )
#' )
#'
#' @export
iclogcondist_visualization <- function(X, range = NA, fit_list = list(), true_dist = NA) {
  # Set up plot range based on X if default range is provided
  if (any(is.na(range))) {
    tau_no_Inf <- data_prep(X)$tau_no_Inf
    range <- c(min(tau_no_Inf), max(tau_no_Inf))
  }
  
  n_points <- 1000
  tau <- seq(range[1], range[2], length.out = n_points)
  
  if (is.data.frame(true_dist)) {
    true_dist <- true_dist[(true_dist$x >= range[1]) & (true_dist$x <= range[2]),]
    true_data <- data.frame(tau = true_dist$x, true_values = true_dist$y)
    true_data$true_values_log <- log(true_data$true_values)
    plot_true_data <- TRUE  # Indicator to plot true_data
  } else {
    plot_true_data <- FALSE  # Do not plot true_data if true_dist is NA
  }
  

  
  fit_data_log <- data.frame(tau = rep(tau, length(fit_list)), 
                             log_F_est = numeric(length(tau) * length(fit_list)),
                             fit_name = rep(names(fit_list), each = n_points))
  
  fit_data_F <- data.frame(tau = rep(tau, length(fit_list)), 
                           F_est = numeric(length(tau) * length(fit_list)),
                           fit_name = rep(names(fit_list), each = n_points))
  
  for (i in seq_along(fit_list)) {
    fit_name <- names(fit_list)[i]
    fit_data_log$log_F_est[fit_data_log$fit_name == fit_name] <- get_F_at_x(fit_list[[fit_name]], x = tau, log = TRUE)
    fit_data_F$F_est[fit_data_F$fit_name == fit_name] <- get_F_at_x(fit_list[[fit_name]], x = tau, log = FALSE)
  }

  
  colors = c("True Distribution" = "black", "LCMLE" = "#FF0000", "UMLE" = "#0000FF", "LCM_UMLE" = "#00FF00")
  # Create ggplot for log F(t)
  p_logF <- ggplot() +
    labs(title = "log F(t)", x = "t", y = "log F(t)") +
    scale_color_manual(name = "Legend", values = colors) +
    theme_minimal() +
    theme(legend.position = "bottom")
  
  # Add true_data layer if available
  if (plot_true_data) {
    p_logF <- p_logF +
      geom_line(data = true_data, aes(x = .data$tau, y = .data$true_values_log, color = "True Distribution"), linewidth = 1)
  }
  
  # Add fit_data_log layer
  p_logF <- p_logF +
    geom_line(data = fit_data_log, aes(x = .data$tau, y = .data$log_F_est, color = .data$fit_name), linewidth = 1)
  
  # Create ggplot for F(t)
  p_F <- ggplot() +
    labs(title = "F(t)", x = "t", y = "F(t)") +
    scale_color_manual(name = "Legend", values = colors) +
    theme_minimal() +
    theme(legend.position = "bottom")
  
  # Add true_data layer if available
  if (plot_true_data) {
    p_F <- p_F +
      geom_line(data = true_data, aes(x = .data$tau, y = .data$true_values, color = "True Distribution"), linewidth = 1)
  }
  
  # Add fit_data_F layer
  p_F <- p_F +
    geom_line(data = fit_data_F, aes(x = .data$tau, y = .data$F_est, color = .data$fit_name), linewidth = 1)
  
  # Return the plots as a list
  return(list(logF_plot = p_logF, F_plot = p_F))
}



#' Plot Method for iclogcondist_plot Objects
#'
#' This function generates a plot for objects of class \code{iclogcondist}, which are typically generated by 
#' \code{ic_UMLE}, \code{ic_LCM_UMLE}, or \code{ic_LCMLE}. The plot can display either the cumulative 
#' distribution function \code{F(t)} or the log cumulative distribution function \code{logF(t)}, depending on the 
#' setting of the \code{log} parameter.
#'
#' @param x An object of class \code{iclogcondist}, typically generated by \code{ic_UMLE}, \code{ic_LCM_UMLE}, or \code{ic_LCMLE}.
#' @param log Logical; if \code{TRUE}, plots the log cumulative distribution function \code{logF(t)}. 
#'            If \code{FALSE}, plots \code{F(t)}. Default is \code{FALSE}.
#' @param ... Additional arguments passed to the plotting function.
#' @return An invisible \code{ggplot} object representing the plot. The plot is also displayed in the current graphics device.
#' @import ggplot2
#' @examples
#' # Example usage with ic_UMLE, ic_LCM_UMLE, and ic_LCMLE
#' data(lgnm)
#' X <- lgnm
#' fit_UMLE <- ic_UMLE(X)
#' fit_LCM_UMLE <- ic_LCM_UMLE(X)
#' fit_LCMLE <- ic_LCMLE(X)
#' plot(fit_UMLE, log = TRUE)   # Plot logF(t) for UMLE
#' plot(fit_LCM_UMLE, log = FALSE)  # Plot F(t) for LCM_UMLE
#' plot(fit_LCMLE, log = FALSE) # Plot F(t) for LCMLE
#' @export
plot.iclogcondist <- function(x, log = FALSE, ...) {
  if (!inherits(x, "iclogcondist")) stop("Object must be of class 'iclogcondist'")
  
  df_est <- data.frame(
    tau = x$est$tau_no_Inf,
    F_hat = x$est$F_hat,
    phi_hat = x$est$phi_hat
  )
  
  df_node <- data.frame(
    tau = x$knot_info$tau_on_knot,
    F_hat = x$knot_info$F_on_knot,
    phi_hat = x$knot_info$phi_on_knot
  ) 
  
  # Helper function to prepare plot data with intermediate points
  prepare_plot_data <- function(df, y_col, y_lead_col) {
    # Add lead columns manually
    df$tau_lead <- c(df$tau[-1], NA)
    df$y_lead <- c(df[[y_lead_col]][-1], NA)
    
    # Filter rows where lead values are NA
    df <- df[!is.na(df$tau_lead), ]
    
    # Initialize empty lists to store tau and y_value sequences
    tau_list <- list()
    y_value_list <- list()
    
    # Loop through each row to create intermediate points
    for (i in seq_len(nrow(df))) {
      tau_list[[i]] <- c(df$tau[i], df$tau_lead[i], df$tau_lead[i])
      y_value_list[[i]] <- c(df[[y_col]][i], df[[y_col]][i], df$y_lead[i])
    }
    
    # Combine results into a single data frame
    tau <- unlist(tau_list)
    y_value <- unlist(y_value_list)
    
    result <- data.frame(tau = tau, y_value = y_value)
    return(result)
  }
  
  if (log) {
    y_label <- "logF(t)"
    y_value <- "phi_hat"
  } else {
    y_label <- "F(t)"
    y_value <- "F_hat"
  }
  
  # Determine plot data and labels based on `log` parameter
  if (class(x)[2] == "ic_UMLE") {
    plot_data <- prepare_plot_data(df_est, y_value, y_value)
    p <- ggplot() +
      geom_line(data = plot_data, aes(x = .data$tau, y = .data$y_value), color = "blue") +
      geom_point(data = df_est, aes(x = .data$tau, y = .data[[y_value]]), color = "red") +
      labs(
        title = paste(class(x)[2], "Estimate of", y_label),
        x = "t",
        y = y_label
      ) +
      theme_minimal()
  } else {
    p <- ggplot() +
      geom_line(data = df_est, aes(x = .data$tau, y = .data[[y_value]]), color = "blue", linewidth = 1.2) +
      geom_point(data = df_node, aes(x = .data$tau, y = .data[[y_value]]), color = "red") +
      labs(
        title = paste(class(x)[2], "Estimate of", y_label),
        x = "t",
        y = y_label
      ) +
      theme_minimal()
  }
  print(p)
  return(invisible(p))
}