Nothing
#' Exploratory plots of parasite prevalence
#'
#' Generates exploratory visualizations of parasite prevalence across taxa and optional grouping variables. The function produces stacked bar plots showing the proportion of infested and non-infested hosts, facilitating the assessment of prevalence patterns across hierarchical combinations.
#'
#' The function reshapes the dataset into long format and calculates prevalence as the proportion of infested hosts (hosts with parasite counts > 0) relative to the number of analyzed hosts for each parasite taxon and grouping combination. For each combination, the function generates:
#' \itemize{
#' \item The proportion of infested hosts.
#' \item The proportion of non-infested hosts.
#' }
#' Faceting is applied to display each parasite taxon and grouping combination in separate panels. Special cases are handled as follows:
#'\itemize{
#' \item When no observations are available (all values are missing or the combination is absent), a message is displayed indicating that the data were not analyzed.
#' \item When only one host is available, a message is displayed indicating that the sample size is insufficient for prevalence estimation.
#' \item When all observed values are zero, a message is displayed indicating that the parasite was not recorded for that combination.
#' }
#' All proportions are expressed on a 0–1 scale. These plots are intended for exploratory purposes and should not be used as formal inference tools.
#'
#' @usage
#' para_explo_prev(dataset, sp_cols, group_vars = NULL,
#' n_col = NULL, verbose = FALSE)
#'
#' @param dataset Data frame containing parasite data.
#' @param sp_cols Vector with the names of the columns containing parasite abundance (taxa) to be plotted.
#' @param group_vars Vector with the names of categorical variables used to define groups (e.g., "Sex", "Site"). Default = \code{NULL}.
#' @param n_col Integer specifying the number of columns in the faceted plot layout. If \code{NULL}, the number of columns is determined automatically by ggplot2.Default = \code{NULL}.
#' @param verbose A logical value indicating if progress messages should be given. Default = \code{FALSE}.
#' @return A ggplot2 object containing the generated faceted stacked bar plots. This object can be further customized using standard ggplot2 functions.
#'
#' @examples
#'
#'#Species 1 and 2
#'
#'para_explo_prev(para_data$dataset,
#' sp_cols = c("Sp1", "Sp2"),
#' group_vars = c("Site", "Sp_host"),
#' n_col = 4,
#' verbose = TRUE)
#'
#'#Species 3 and 4
#'
#'para_explo_prev(para_data$dataset,
#' sp_cols = c("Sp3", "Sp4"),
#' group_vars = c("Site", "Sp_host"),
#' n_col = 4,
#' verbose = TRUE)
#'
#' @author Juan Manuel Cabrera, Exequiel Furlan and Elisa Helman
#'
#' @export
#'
para_explo_prev <- function(dataset, sp_cols, group_vars = NULL, n_col = NULL, verbose = FALSE)
{
Abund<-NA
n_obs<-NA
n_inf<-NA
all_na<-NA
prev<-NA
Proportion<-NA
Status<-NA
label<-NA
# ---------------------------
# Validaciones
# ---------------------------
if (is.null(sp_cols) || length(sp_cols) == 0) {
stop("The species columns must be specified (sp_cols).")
}
if (!all(sp_cols %in% colnames(dataset))) {
stop("Some of the specified species columns do not exist in the dataset.")
}
if (!is.null(group_vars) && !all(group_vars %in% colnames(dataset))) {
stop("Some of the specified categorical variables do not exist in the dataset.")
}
# ---------------------------
if (verbose) message("Drawing prevalence exploratory plots...")
# Long format
data_long <- dataset %>%
tidyr::pivot_longer(cols = dplyr::all_of(sp_cols),
names_to = "Sp",
values_to = "Abund") %>%
dplyr::mutate(Abund = as.numeric(as.character(Abund)))
# Completar combinaciones (CLAVE)
data_long <- data_long %>%
tidyr::complete(!!!dplyr::syms(c(group_vars, "Sp")))
# Resumen
summary_df <- data_long %>%
dplyr::group_by(dplyr::across(dplyr::all_of(c(group_vars, "Sp")))) %>%
dplyr::summarise(
n_total = dplyr::n(),
n_obs = sum(!is.na(Abund)),
n_inf = sum(Abund > 0, na.rm = TRUE),
all_na = n_obs == 0,
all_zero = n_obs > 0 & all(Abund == 0, na.rm = TRUE),
.groups = "drop"
) %>%
dplyr::mutate(
prev = ifelse(n_obs > 0, n_inf / n_obs, NA)
)
# Datos para barras
bar_df <- summary_df %>%
dplyr::filter(!all_na & n_obs > 1) %>%
dplyr::mutate(
Infested = prev,
Not_infested = 1 - prev
) %>%
tidyr::pivot_longer(cols = c("Infested", "Not_infested"),
names_to = "Status",
values_to = "Proportion")
# Mensajes
message_df <- summary_df %>%
dplyr::filter(all_na | n_obs == 1) %>%
dplyr::mutate(
label = dplyr::case_when(
all_na ~ "Not analyzed",
n_obs == 1 ~ "One host\nanalyzed"
)
)
# Base plot
p <- ggplot2::ggplot() +
ggplot2::theme_minimal()
# Barras
p <- p +
ggplot2::geom_bar(
data = bar_df,
ggplot2::aes(x = 1, y = Proportion, fill = Status),
stat = "identity",
width = 0.6
)
# Colores
p <- p +
ggplot2::scale_fill_manual(values = c(
"Infested" = "firebrick",
"Not_infested" = "grey70"
))
# Mensajes
p <- p +
ggplot2::geom_text(
data = message_df,
ggplot2::aes(x = 1, y = 0.5, label = label),
size = 3
)
# Facetas
if (!is.null(group_vars)) {
p <- p + ggplot2::facet_wrap(
stats::as.formula(paste("~", paste(c(group_vars, "Sp"), collapse = " + "))),
scales = "free",
ncol = n_col
)
} else {
p <- p + ggplot2::facet_wrap(~Sp, ncol = n_col)
}
# Estética eje
p <- p +
ggplot2::labs(x = NULL, y = "Proportion of hosts") +
ggplot2::theme(
axis.text.x = ggplot2::element_blank(),
axis.ticks.x = ggplot2::element_blank()
)
if (verbose) message("Calculation completed")
return(p)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.