R/visualization.R

Defines functions plot_pca_biplot plot_sensitivity plot_scoring_curves plot_radar plot_sqi plot_scores

Documented in plot_pca_biplot plot_radar plot_scores plot_scoring_curves plot_sensitivity plot_sqi

# ============================================================
# SQIpro: Visualization Functions
# All plots are ggplot2-based and publication-ready
# ============================================================

#' Plot Individual Variable Scores as a Heatmap
#'
#' @description
#' Displays a heatmap of mean variable scores (0--1) per group, allowing
#' rapid visual identification of which variables drive high or low SQI
#' within each land-use system.
#'
#' @param scored A scored data frame from \code{\link{score_all}}.
#' @param config A \code{sqi_config} object.
#' @param group_cols Character vector. Grouping columns.
#' @param group_by Character. Which grouping column to display on the x-axis.
#' @param facet_by Character or \code{NULL}. Optional column to facet by
#'   (e.g., \code{"Depth"}).
#' @param palette Character. Colour palette: \code{"RdYlGn"} (default),
#'   \code{"Blues"}, or any \code{RColorBrewer} name.
#' @param title Character. Plot title.
#'
#' @return A \code{ggplot} object.
#'
#' @examples
#' data(soil_data)
#' cfg <- make_config(
#'   variable = c("pH","EC","BD","OC","MBC","Clay"),
#'   type     = c("opt","less","less","more","more","opt"),
#'   opt_low  = c(6.0, NA, NA, NA, NA, 20),
#'   opt_high = c(7.0, NA, NA, NA, NA, 35)
#' )
#' scored <- score_all(soil_data, cfg, group_cols = c("LandUse","Depth"))
#' plot_scores(scored, cfg, group_cols = c("LandUse","Depth"),
#'             group_by = "LandUse", facet_by = "Depth")
#'
#' @export
plot_scores <- function(scored, config, group_cols = "LandUse",
                        group_by = group_cols[1],
                        facet_by = NULL,
                        palette  = "RdYlGn",
                        title    = "Mean Variable Scores by Group") {

  vars  <- intersect(config$variable, names(scored))
  syms  <- rlang::syms(group_cols)

  long <- scored %>%
    dplyr::group_by(!!!syms) %>%
    dplyr::summarise(dplyr::across(dplyr::all_of(vars), mean, na.rm = TRUE),
                     .groups = "drop") %>%
    tidyr::pivot_longer(cols = dplyr::all_of(vars),
                        names_to  = "Variable",
                        values_to = "Score")

  p <- ggplot2::ggplot(long,
         ggplot2::aes(x = .data[[group_by]],
                      y = .data[["Variable"]],
                      fill = .data[["Score"]])) +
    ggplot2::geom_tile(colour = "white", linewidth = 0.4) +
    ggplot2::scale_fill_gradient2(
      low = "#d73027", mid = "#fee08b", high = "#1a9850",
      midpoint = 0.5, limits = c(0, 1),
      name = "Score\n(0-1)") +
    ggplot2::labs(title = title, x = group_by, y = "Soil Variable") +
    ggplot2::theme_classic(base_size = 12) +
    ggplot2::theme(
      axis.text.x  = ggplot2::element_text(angle = 35, hjust = 1),
      plot.title   = ggplot2::element_text(face = "bold"),
      legend.position = "right"
    )

  if (!is.null(facet_by))
    p <- p + ggplot2::facet_wrap(stats::as.formula(paste("~", facet_by)))
  p
}


#' Plot Soil Quality Index Across Groups
#'
#' @description
#' Creates a grouped bar chart of SQI values per group, with optional
#' error bars (standard deviation computed across replicate observations
#' before indexing) and significance letters.
#'
#' @param sqi_result A data frame returned by any \code{sqi_*()} function.
#' @param sqi_col Character. Name of the SQI column to plot.
#' @param group_col Character. Grouping column for the x-axis.
#' @param fill_col Character or \code{NULL}. Column for fill (e.g.,
#'   \code{"Depth"} to produce side-by-side bars).
#' @param letters_df Data frame with columns \code{Group} and \code{Letter}
#'   (from \code{\link{sqi_anova}}), or \code{NULL}.
#' @param title Character. Plot title.
#' @param y_label Character. Y-axis label.
#' @param palette Character vector of colours.
#'
#' @return A \code{ggplot} object.
#'
#' @examples
#' data(soil_data)
#' cfg <- make_config(
#'   variable = c("pH","EC","BD","OC","MBC","Clay"),
#'   type     = c("opt","less","less","more","more","opt"),
#'   opt_low  = c(6.0, NA, NA, NA, NA, 20),
#'   opt_high = c(7.0, NA, NA, NA, NA, 35)
#' )
#' scored <- score_all(soil_data, cfg, group_cols = c("LandUse","Depth"))
#' res    <- sqi_linear(scored, cfg, group_cols = c("LandUse","Depth"))
#' plot_sqi(res, sqi_col = "SQI_linear", group_col = "LandUse",
#'          fill_col = "Depth")
#'
#' @export
plot_sqi <- function(sqi_result, sqi_col, group_col,
                     fill_col   = NULL,
                     letters_df = NULL,
                     title      = "Soil Quality Index",
                     y_label    = "SQI (0-1)",
                     palette    = c("#2d6a4f","#52b788","#95d5b2",
                                    "#d8f3dc","#b7e4c7")) {

  aes_args <- if (!is.null(fill_col)) {
    ggplot2::aes(x    = .data[[group_col]],
                 y    = .data[[sqi_col]],
                 fill = .data[[fill_col]])
  } else {
    ggplot2::aes(x    = .data[[group_col]],
                 y    = .data[[sqi_col]],
                 fill = .data[[group_col]])
  }

  p <- ggplot2::ggplot(sqi_result, aes_args) +
    ggplot2::geom_col(position = ggplot2::position_dodge(0.8),
                      width = 0.7, colour = "grey30", linewidth = 0.3) +
    ggplot2::scale_fill_manual(values = palette) +
    ggplot2::scale_y_continuous(limits = c(0, 1.1), expand = c(0, 0)) +
    ggplot2::labs(title = title, x = group_col, y = y_label) +
    ggplot2::theme_classic(base_size = 12) +
    ggplot2::theme(
      axis.text.x  = ggplot2::element_text(angle = 35, hjust = 1),
      plot.title   = ggplot2::element_text(face = "bold"),
      legend.title = ggplot2::element_blank()
    )

  if (!is.null(letters_df)) {
    merged <- merge(sqi_result, letters_df,
                    by.x = group_col, by.y = "Group", all.x = TRUE)
    p <- p + ggplot2::geom_text(
      data = merged,
      ggplot2::aes(x     = .data[[group_col]],
                   y     = .data[[sqi_col]] + 0.05,
                   label = .data[["Letter"]]),
      size = 4, fontface = "bold", inherit.aes = FALSE
    )
  }
  p
}


#' Radar / Spider Chart of Variable Scores
#'
#' @description
#' Draws a radar (spider) chart comparing mean variable scores across
#' groups.  Useful for visualising the multidimensional soil quality
#' profile of each land-use system.
#'
#' @param scored A scored data frame from \code{\link{score_all}}.
#' @param config A \code{sqi_config} object.
#' @param group_col Character. Column used to define radar chart series.
#' @param group_cols Character vector of all grouping columns.
#' @param vars Character vector of variables to include.  Defaults to all
#'   in \code{config}.
#' @param title Character. Plot title.
#' @param palette Character vector of colours for each group.
#'
#' @return Invisible \code{NULL}; the chart is rendered to the active
#'   graphics device.
#'
#' @references
#' Chambers, J.M., & Hastie, T.J. (1992). \emph{Statistical Models in S}.
#' Wadsworth & Brooks/Cole.
#'
#' @examples
#' data(soil_data)
#' cfg <- make_config(
#'   variable = c("pH","EC","BD","OC","MBC","Clay"),
#'   type     = c("opt","less","less","more","more","opt"),
#'   opt_low  = c(6.0, NA, NA, NA, NA, 20),
#'   opt_high = c(7.0, NA, NA, NA, NA, 35)
#' )
#' scored <- score_all(soil_data, cfg, group_cols = c("LandUse","Depth"))
#' plot_radar(scored, cfg, group_col = "LandUse",
#'            group_cols = c("LandUse","Depth"))
#'
#' @export
plot_radar <- function(scored, config, group_col, group_cols = group_col,
                       vars    = NULL,
                       title   = "Soil Quality Radar Profile",
                       palette = c("#1b7837","#762a83","#d6604d",
                                   "#4393c3","#f4a582")) {

  vars <- if (!is.null(vars)) vars else intersect(config$variable, names(scored))
  syms <- rlang::syms(group_cols)

  means <- scored %>%
    dplyr::group_by(!!!syms) %>%
    dplyr::summarise(dplyr::across(dplyr::all_of(vars),
                                   mean, na.rm = TRUE),
                     .groups = "drop") %>%
    dplyr::group_by(.data[[group_col]]) %>%
    dplyr::summarise(dplyr::across(dplyr::all_of(vars),
                                   mean, na.rm = TRUE),
                     .groups = "drop")

  mat          <- as.data.frame(means[, vars])
  rownames(mat) <- means[[group_col]]
  radar_df     <- rbind(rep(1, length(vars)),
                        rep(0, length(vars)),
                        mat)

  n_groups <- nrow(mat)
  cols     <- palette[seq_len(n_groups)]

  # Base-R radar chart (no external dependencies)
  p    <- length(vars)
  ang  <- seq(0, 2 * pi, length.out = p + 1)[seq_len(p)]
  xlim <- c(-1.4, 1.4)
  ylim <- c(-1.4, 1.4)

  graphics::plot.new()
  graphics::plot.window(xlim = xlim, ylim = ylim, asp = 1)
  graphics::title(main = title, cex.main = 1.1)

  # Grid circles
  for (r in c(0.25, 0.5, 0.75, 1.0)) {
    theta <- seq(0, 2 * pi, length.out = 200)
    graphics::lines(r * cos(theta), r * sin(theta), col = "grey80", lty = 1)
  }
  # Axis spokes and labels
  for (j in seq_len(p)) {
    graphics::lines(c(0, cos(ang[j])), c(0, sin(ang[j])), col = "grey70")
    graphics::text(1.18 * cos(ang[j]), 1.18 * sin(ang[j]),
                   labels = vars[j], cex = 0.78, font = 1)
  }
  # Data polygons
  for (k in seq_len(nrow(mat))) {
    r_vals <- as.numeric(mat[k, ])
    x_pts  <- c(r_vals * cos(ang), r_vals[1] * cos(ang[1]))
    y_pts  <- c(r_vals * sin(ang), r_vals[1] * sin(ang[1]))
    graphics::polygon(x_pts, y_pts,
                      border = cols[k],
                      col    = adjustcolor(cols[k], alpha.f = 0.12),
                      lwd    = 2)
  }
  graphics::legend("bottomright",
                   legend = rownames(mat),
                   col    = cols, lty = 1, lwd = 2,
                   bty    = "n", cex = 0.8)
  invisible(NULL)
}


#' Plot Scoring Curves for All Variables
#'
#' @description
#' Visualises the scoring function (0--1 transformation) for each variable
#' in the configuration, overlaid on the observed data distribution.  This
#' plot is essential for verifying that the scoring configuration is
#' biologically sensible before computing indices.
#'
#' @param data The raw (unscored) soil data frame.
#' @param config A \code{sqi_config} object.
#' @param group_cols Character vector of grouping columns to exclude.
#' @param ncol Integer. Number of columns in the facet grid. Default 3.
#'
#' @return A \code{ggplot} object.
#'
#' @examples
#' data(soil_data)
#' cfg <- make_config(
#'   variable = c("pH","EC","BD","OC","MBC","Clay"),
#'   type     = c("opt","less","less","more","more","opt"),
#'   opt_low  = c(6.0, NA, NA, NA, NA, 20),
#'   opt_high = c(7.0, NA, NA, NA, NA, 35)
#' )
#' plot_scoring_curves(soil_data, cfg,
#'                     group_cols = c("LandUse","Depth"))
#'
#' @export
plot_scoring_curves <- function(data, config, group_cols = "LandUse",
                                ncol = 3) {

  curve_list <- lapply(seq_len(nrow(config)), function(i) {
    var     <- config$variable[i]
    type    <- config$type[i]
    if (!var %in% names(data)) return(NULL)
    x_obs   <- data[[var]][is.finite(data[[var]])]
    x_seq   <- seq(min(x_obs), max(x_obs), length.out = 200)

    score_seq <- tryCatch(switch(
      type,
      "more"   = score_more(x_seq),
      "less"   = score_less(x_seq),
      "opt"    = score_optimum(x_seq,
                               config$opt_low[i], config$opt_high[i]),
      "trap"   = score_trapezoid(x_seq,
                                 config$min_val[i], config$opt_low[i],
                                 config$opt_high[i], config$max_val[i]),
      rep(NA_real_, length(x_seq))
    ), error = function(e) rep(NA_real_, length(x_seq)))

    data.frame(Variable = var,
               Type     = type,
               x        = x_seq,
               score    = score_seq)
  })

  curve_df <- do.call(rbind, Filter(Negate(is.null), curve_list))

  type_labels <- c(more = "More is Better",
                   less = "Less is Better",
                   opt  = "Optimum Range",
                   trap = "Trapezoidal")
  curve_df$Type_label <- type_labels[curve_df$Type]

  ggplot2::ggplot(curve_df,
    ggplot2::aes(x = .data[["x"]], y = .data[["score"]],
                 colour = .data[["Type_label"]])) +
    ggplot2::geom_line(linewidth = 1.1) +
    ggplot2::facet_wrap(~ Variable, scales = "free_x", ncol = ncol) +
    ggplot2::scale_colour_manual(
      values = c("More is Better" = "#1a9850",
                 "Less is Better" = "#d73027",
                 "Optimum Range"  = "#4575b4",
                 "Trapezoidal"    = "#984ea3"),
      name = "Scoring type") +
    ggplot2::scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.25)) +
    ggplot2::labs(
      title    = "Scoring Curves for Each Soil Variable",
      subtitle = paste("Verify that each curve reflects biological/",
                       "agronomic expectations"),
      x = "Observed Variable Value",
      y = "Score (0-1)"
    ) +
    ggplot2::theme_bw(base_size = 11) +
    ggplot2::theme(
      plot.title    = ggplot2::element_text(face = "bold"),
      strip.text    = ggplot2::element_text(face = "bold"),
      legend.position = "bottom"
    )
}


#' Sensitivity Tornado Chart
#'
#' @description
#' Visualises variable importance from \code{\link{sqi_sensitivity}} as a
#' horizontal bar (tornado) chart, ordered from most to least sensitive.
#'
#' @param sa_result Data frame from \code{\link{sqi_sensitivity}}.
#' @param title Character. Plot title.
#'
#' @return A \code{ggplot} object.
#'
#' @examples
#' data(soil_data)
#' cfg <- make_config(
#'   variable = c("pH","EC","BD","OC","MBC","Clay"),
#'   type     = c("opt","less","less","more","more","opt"),
#'   opt_low  = c(6.0, NA, NA, NA, NA, 20),
#'   opt_high = c(7.0, NA, NA, NA, NA, 35)
#' )
#' scored <- score_all(soil_data, cfg, group_cols = c("LandUse","Depth"))
#' sa     <- sqi_sensitivity(scored, cfg, group_cols = c("LandUse","Depth"))
#' plot_sensitivity(sa)
#'
#' @export
plot_sensitivity <- function(sa_result,
                             title = "Variable Sensitivity to SQI") {

  sa_result$variable <- factor(sa_result$variable,
                                levels = sa_result$variable[
                                  order(sa_result$relative_importance)])

  ggplot2::ggplot(sa_result,
    ggplot2::aes(x    = .data[["variable"]],
                 y    = .data[["relative_importance"]],
                 fill = .data[["relative_importance"]])) +
    ggplot2::geom_col(width = 0.65) +
    ggplot2::geom_errorbar(
      ggplot2::aes(ymin = pmax(.data[["relative_importance"]] -
                                 .data[["sd_change"]] /
                                 max(.data[["mean_change"]], na.rm=TRUE), 0),
                   ymax = .data[["relative_importance"]] +
                     .data[["sd_change"]] /
                     max(.data[["mean_change"]], na.rm=TRUE)),
      width = 0.25, colour = "grey30") +
    ggplot2::coord_flip() +
    ggplot2::scale_fill_gradient(low = "#fee08b", high = "#1a9850",
                                 name = "Relative\nImportance") +
    ggplot2::scale_y_continuous(limits = c(0, 1.05)) +
    ggplot2::labs(title = title,
                  x = "Soil Variable",
                  y = "Relative Importance (0-1)") +
    ggplot2::theme_classic(base_size = 12) +
    ggplot2::theme(plot.title = ggplot2::element_text(face = "bold"))
}


#' PCA Biplot of Soil Variables and Groups
#'
#' @description
#' Renders a PCA biplot showing both variable loadings and group centroids,
#' using \code{factoextra::fviz_pca_biplot}.  Useful for understanding
#' variable relationships and group separation underlying MDS selection.
#'
#' @param mds An object returned by \code{\link{select_mds}}.
#' @param scored A scored data frame (for group colour coding).
#' @param group_col Character. Column for group colours.
#' @param title Character. Plot title.
#'
#' @return A \code{ggplot} object.
#'
#' @examples
#' data(soil_data)
#' cfg <- make_config(
#'   variable = c("pH","EC","BD","OC","MBC","Clay"),
#'   type     = c("opt","less","less","more","more","opt"),
#'   opt_low  = c(6.0, NA, NA, NA, NA, 20),
#'   opt_high = c(7.0, NA, NA, NA, NA, 35)
#' )
#' scored <- score_all(soil_data, cfg, group_cols = c("LandUse","Depth"))
#' mds    <- select_mds(scored, group_cols = c("LandUse","Depth"))
#' plot_pca_biplot(mds, scored, group_col = "LandUse")
#'
#' @export
plot_pca_biplot <- function(mds, scored, group_col = "LandUse",
                            title = "PCA Biplot of Soil Quality Variables") {

  group_vec <- as.factor(scored[[group_col]])

  factoextra::fviz_pca_biplot(
    mds$pca,
    habillage   = group_vec,
    addEllipses = TRUE,
    ellipse.level = 0.90,
    label       = "var",
    col.var     = "black",
    repel       = TRUE,
    title       = title
  ) +
    ggplot2::theme_bw(base_size = 12)
}

Try the SQIpro package in your browser

Any scripts or data that you put into this service are public.

SQIpro documentation built on April 20, 2026, 5:06 p.m.