R/para_plot_CI.R
In parasiteR: A Theorical-Practical Approach to Parasitological Data Analysis

Documented in para_plot_CI

#' Visualization of parasitological descriptor with confidence intervals
#'
#' This function generates graphical representations of parasitological estimates (abundance, intensity, or prevalence) including their associated confidence intervals. It supports multiple input formats and automatically detects the response variable and confidence interval structure. The function allows flexible grouping, species filtering, and visualization either as faceted plots or separate panels.
#' The function is designed to be compatible with outputs from different estimation functions within the package (e.g., \code{\link[parasiteR]{para_abundance_CI}}, \code{\link[parasiteR]{para_intensity_CI}}, \code{\link[parasiteR]{para_prevalence_CI}}). Automatic detection of confidence intervals ensures flexibility across workflows. Interpretation of graphical outputs remains the responsibility of the user.
#' It automatically detects:
#'
#' \itemize{
#'  \item The response variable to be plotted.
#'  \item The structure of confidence intervals, including:
#'  \itemize{
#'    \item Separate columns (Lower_CI, Upper_CI)
#'    \item Method-specific intervals (e.g., exact or Blaker)
#'    \item Combined intervals stored as a single character column (e.g., "min - max")
#' }
#' }
#'
#' When multiple grouping variables are provided in x_var, they are combined into a single factor for visualization. Confidence intervals are displayed as vertical error bars, and point estimates are overlaid. When multiple parasite taxa are present, results are displayed using faceting or as separate plots.
#' @usage
#' para_plot_CI(para_data, group_vars, sp_cols = NULL, descriptor = NULL,
#'  lower_ci = NULL, upper_ci = NULL, point_color = "blue", line_size = 1,
#'  point_size = 3, n_cols = 1, include_zeros = TRUE, separate_plots = FALSE)
#'
#' @importFrom rlang := .data
#'
#' @param para_data Data frame containing parasitological descriptors and confidence intervals estimated with one of the following functions: \code{\link[parasiteR]{para_abundance_CI}}, \code{\link[parasiteR]{para_intensity_CI}}, \code{\link[parasiteR]{para_prevalence_CI}}.
#' @param group_vars Character vector specifying the variable(s) to be used on the x-axis. Multiple variables will be combined.
#' @param sp_cols Optional vector of parasite taxa to include in the plot. Default is \code{NULL} (all taxa are included).
#' @param descriptor Name of the variable to be plotted on the y-axis. If \code{NULL}, the function automatically detects a suitable variable (e.g., \code{prevalence}, \code{MeanA}, \code{MedA}, \code{MeanI}, \code{MedI}).
#' @param lower_ci Optional names of the columns containing the lower confidence. If \code{NULL}, the function automatically detects and extracts them. Default is \code{NULL}.
#' @param upper_ci Optional names of the columns containing the upper confidence. If \code{NULL}, the function automatically detects and extracts them. Default is \code{NULL}.
#' @param point_color Color of the points. Default is \code{"blue"}.
#' @param line_size Line width of the confidence interval bars. Default is \code{1}.
#' @param point_size Size of the points. Default is \code{3}.
#' @param n_cols Number of columns used in faceted plots. Default is \code{1}.
#' @param include_zeros Logical. If \code{FALSE}, zero values are excluded from the plot. Default is \code{TRUE}.
#' @param separate_plots Logical. If \code{TRUE}, returns a list of plots (one per species). If \code{FALSE}, produces a faceted plot. Default is \code{FALSE}.
#'
#' @return A ggplot object or a list of ggplot objects representing the estimated values and their confidence intervals.
#' @references
#' Bush, A.O., Lafferty, K.D., Lotz, J.M., Shostak, A.W. (1997). Parasitology meets ecology on its own terms:
#' Margolis revisited. \emph{Journal of Parasitology}, 83(4), 575–583.
#'
#' Reiczigel, J., Marozzi, M., Fabian, I., Rózsa, L. (2019). Biostatistics for parasitologists – a primer to
#' quantitative parasitology. \emph{Trends in Parasitology}, 35(4), 277–281.
#'
#' @author Juan Manuel Cabrera, Exequiel Furlan and Elisa Helman
#'
#' @export

para_plot_CI <- function(para_data, group_vars, sp_cols = NULL,
                         descriptor = NULL, lower_ci = NULL, upper_ci = NULL,
                           point_color = "blue", line_size = 1, point_size = 3,
                           n_cols = 1, include_zeros = TRUE,
                           separate_plots = FALSE){

    MeanA<-NA
    MeanI<-NA
    MedA<-NA
    MedI<-NA
    prevalence<-NA
    .data<-NA
    Lower_CI<-NA
    Lower_blaker<-NA
    Upper_CI<-NA
    Upper_blaker<-NA
    Sp<-NA
    CI<-NA
    CI_exact<-NA
    CI_blaker<-NA


    # ---------------------------
    # 1. Detectar variable Y
    # ---------------------------
    if (is.null(descriptor)) {
      if ("MeanA" %in% colnames(para_data)) descriptor <- "MeanA"
      else if ("MedA" %in% colnames(para_data)) descriptor <- "MedA"
      else if ("MeanI" %in% colnames(para_data)) descriptor <- "MeanI"
      else if ("MedI" %in% colnames(para_data)) descriptor <- "MedI"
      else if ("prevalence" %in% colnames(para_data)) descriptor <- "prevalence"
      else stop("No valid response variable found.")
    }

    # ---------------------------
    # 2. Filtrar especies
    # ---------------------------
    if (!is.null(sp_cols)) {
      para_data <- para_data %>%
        dplyr::filter(Sp %in% sp_cols)
    }

    # ---------------------------
    # 3. Detectar CI automáticamente (ROBUSTO)
    # ---------------------------

    # Caso 1: CI ya separadas estándar
    if (all(c("Lower_CI", "Upper_CI") %in% colnames(para_data))) {
      lower_ci <- "Lower_CI"
      upper_ci <- "Upper_CI"

      # Caso 2: exact
    } else if (all(c("Lower_exact", "Upper_exact") %in% colnames(para_data))) {
      lower_ci <- "Lower_exact"
      upper_ci <- "Upper_exact"

      # Caso 3: blaker
    } else if (all(c("Lower_blaker", "Upper_blaker") %in% colnames(para_data))) {
      lower_ci <- "Lower_blaker"
      upper_ci <- "Upper_blaker"

      # Caso 4: CI combinada estándar
    } else if ("CI" %in% colnames(para_data)) {

      para_data <- para_data %>%
        tidyr::separate(CI,
                        into = c("Lower_CI", "Upper_CI"),
                        sep = " - ",
                        convert = TRUE)

      lower_ci <- "Lower_CI"
      upper_ci <- "Upper_CI"

      # Caso 5: CI exact combinada
    } else if ("CI_exact" %in% colnames(para_data)) {

      para_data <- para_data %>%
        tidyr::separate(CI_exact,
                        into = c("Lower_CI", "Upper_CI"),
                        sep = " - ",
                        convert = TRUE)

      lower_ci <- "Lower_CI"
      upper_ci <- "Upper_CI"

      # Caso 6: CI blaker combinada
    } else if ("CI_blaker" %in% colnames(para_data)) {

      para_data <- para_data %>%
        tidyr::separate(CI_blaker,
                        into = c("Lower_CI", "Upper_CI"),
                        sep = " - ",
                        convert = TRUE)

      lower_ci <- "Lower_CI"
      upper_ci <- "Upper_CI"

    } else {
      stop("No valid confidence interval columns found.")
    }

    # ---------------------------
    # 4. Convertir a numérico
    # ---------------------------
    para_data <- para_data %>%
      dplyr::mutate(
        !!descriptor := as.numeric(.data[[descriptor]]),
        Lower_CI = as.numeric(.data[[lower_ci]]),
        Upper_CI = as.numeric(.data[[upper_ci]])
      )

    # ---------------------------
    # 5. Filtrar NA
    # ---------------------------
    para_data <- para_data %>%
      dplyr::filter(!is.na(.data[[descriptor]]))

    # ---------------------------
    # 6. Filtrar ceros
    # ---------------------------
    if (!include_zeros) {
      para_data <- para_data %>%
        dplyr::filter(.data[[descriptor]] > 0)
    }

    # ---------------------------
    # 7. Chequeo
    # ---------------------------
    if (nrow(para_data) == 0) {
      stop("No data available to plot after filtering.")
    }

    # ---------------------------
    # 8. Crear variable X
    # ---------------------------
    if (length(group_vars) > 1) {
      para_data <- para_data %>%
        dplyr::mutate(
          X_combined = interaction(!!!dplyr::syms(group_vars), sep = " - ")
        )
      x_plot <- "X_combined"
      x_label <- paste(group_vars, collapse = " - ")
    } else {
      x_plot <- group_vars
      x_label <- group_vars
    }

    # ---------------------------
    # 9. Label Y prolijo
    # ---------------------------
    y_label <- dplyr::case_when(
      descriptor == "prevalence" ~ "Prevalence and CI",
      descriptor == "MeanA" ~ "Mean abundance and CI",
      descriptor == "MedA" ~ "Median abundance and CI",
      descriptor == "MeanI" ~ "Mean intensity and CI",
      descriptor == "MedI" ~ "Median intensity and CI",
      TRUE ~ descriptor
    )

    n_sp <- length(unique(para_data$Sp))

    # ---------------------------
    # 10. Función base
    # ---------------------------
    make_plot <- function(df) {
      ggplot2::ggplot(df,
                      ggplot2::aes(x = .data[[x_plot]], y = .data[[descriptor]])
      ) +
        ggplot2::geom_errorbar(
          ggplot2::aes(ymin = Lower_CI, ymax = Upper_CI),
          width = 0.2,
          linewidth = line_size,
          color = "black",
          na.rm = TRUE
        ) +
        ggplot2::geom_point(
          color = point_color,
          size = point_size
        ) +
        ggplot2::theme_minimal() +
        ggplot2::theme(
          axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)
        ) +
        ggplot2::labs(
          title = unique(df$Sp),
          x = x_label,
          y = y_label
        )
    }

    # ---------------------------
    # 11. Una sola especie
    # ---------------------------
    if (n_sp == 1) {
      return(make_plot(para_data))
    }

    # ---------------------------
    # 12. Múltiples especies
    # ---------------------------
    if (separate_plots) {
      return(split(para_data, para_data$Sp) %>%
               lapply(make_plot))
    }

    # Facet
    plot <- ggplot2::ggplot(
      para_data,
      ggplot2::aes(x = .data[[x_plot]], y = .data[[descriptor]])
    ) +
      ggplot2::geom_errorbar(
        ggplot2::aes(ymin = Lower_CI, ymax = Upper_CI),
        width = 0.2,
        linewidth = line_size,
        color = "black",
        na.rm = TRUE
      ) +
      ggplot2::geom_point(
        color = point_color,
        size = point_size
      ) +
      ggplot2::facet_wrap(~ Sp, scales = "free_y", ncol = n_cols) +
      ggplot2::theme_minimal() +
      ggplot2::theme(
        axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)
      ) +
      ggplot2::labs(
        x = x_label,
        y = y_label
      )

    return(plot)
  }