#' Plot number of introduced taxa for CBD pathways level 2
#'
#' Function to plot bar graph with number of taxa introduced by different
#' pathways at level 2, given a pathway level 1. Possible breakpoints: taxonomic
#' (kingdoms + vertebrates/invertebrates) and temporal (lower limit year).
#'
#' @param df df.
#' @param chosen_pathway_level1 character. A pathway level 1. If CBD standard is
#' followed (see argument `cbd_standard`), one of the level 1 pathways from
#' `pathways_cbd()`.
#' @param category `NULL` (default) or character. One of the kingdoms as given
#' in GBIF or `Chordata` (the phylum) or `Not Chordata` (all other phyla of
#' `Animalia`):
#' 1. `Plantae`
#' 2. `Animalia`
#' 3. `Fungi`
#' 4. `Chromista`
#' 5. `Archaea`
#' 6. `Bacteria`
#' 7. `Protozoa`
#' 8. `Viruses`
#' 9. `incertae sedis`
#' 10. `Chordata`
#' 11. `Not Chordata`
#' @param from `NULL` or numeric. Year trade-off: if not `NULL` select only
#' pathways related to taxa introduced during or after this year. Default:
#' `NULL`.
#' @param facet_column `NULL` (default) or character. The column to use to
#' create additional facet wrap bar graphs underneath the main graph. When
#' `NULL`, no facet graph are created. One of `family`, `order`, `class`,
#' `phylum`, `locality`, `native_range`, `habitat`. If column has another
#' name, rename it before calling this function. Facet `phylum` is not allowed
#' in combination with `category` equal to `"Chordata"` or `"Not Chordata"`.
#' Facet `kingdom` is allowed only with category equal to `NULL`.
#' @param pathway_level1_names character. Name of the column of `df`
#' containing information about pathways at level 1. Default:
#' `pathway_level1`.
#' @param pathway_level2_names character. Name of the column of `df`
#' containing information about pathways at level 2. Default:
#' `pathway_level2`.
#' @param pathways character. Vector with pathways level 2 to visualize. The
#' pathways are displayed following the order as in this vector.
#' @param taxon_names character. Name of the column of `df` containing
#' information about taxa. This parameter is used to uniquely identify taxa.
#' @param kingdom_names character. Name of the column of `df` containing
#' information about kingdom. Default: `"kingdom"`.
#' @param phylum_names character. Name of the column of `df` containing
#' information about phylum. This parameter is used only if `category` is
#' one of: `"Chordata"`, `"Not Chordata"`. Default:
#' `"phylum"`.
#' @param first_observed character. Name of the column of `df` containing
#' information about year of introduction. Default: `"first_observed"`.
#' @param cbd_standard logical. If `TRUE` the values of pathway level 2 are
#' checked based on CBD standard as returned by `pathways_cbd()`. Error is
#' returned if unmatched values are found. If `FALSE`, a warning is returned.
#' Default: `TRUE`.
#' @param title `NULL` or character. Title of the graph. Default: `NULL`.
#' @param x_lab `NULL` or character. x-axis label. Default: `"Number of
#' introduced taxa"`.
#' @param y_lab `NULL` or character. Title of the graph. Default: `"Pathways"`.
#' @return A list with three slots:
#' - `plot`: ggplot2 object (or egg object if facets are used). `NULL` if there
#' are no data to plot.
#' - `data_top_graph`: data.frame (tibble) with data used for the main plot (top
#' graph) in `plot`.
#' - `data_facet_graph`: data.frame (tibble) with data used for the faceting
#' plot in `plot`. `NULL` is returned if `facet_column` is `NULL`.
#'
#' @export
#'
#' @importFrom dplyr %>% .data
#' @importFrom rlang !!
#'
#' @examples
#' \dontrun{
#' library(readr)
#' datafile <- paste0(
#' "https://raw.githubusercontent.com/trias-project/indicators/master/data/",
#' "interim/data_input_checklist_indicators.tsv"
#' )
#' data <- read_tsv(datafile,
#' na = "",
#' col_types = cols(
#' .default = col_character(),
#' key = col_double(),
#' nubKey = col_double(),
#' speciesKey = col_double(),
#' first_observed = col_double(),
#' last_observed = col_double()
#' )
#' )
#' # All taxa
#' visualize_pathways_level2(data, chosen_pathway_level1 = "escape")
#'
#' # Animalia
#' visualize_pathways_level2(data,
#' chosen_pathway_level1 = "escape",
#' category = "Animalia"
#' )
#'
#' # Chordata
#' visualize_pathways_level2(
#' df = data,
#' chosen_pathway_level1 = "escape",
#' category = "Chordata"
#' )
#'
#' # select some pathways only
#' visualize_pathways_level2(
#' df = data,
#' chosen_pathway_level1 = "escape",
#' pathways = c("pet", "horticulture")
#' )
#'
#' # facet phylum
#' visualize_pathways_level2(
#' df = data,
#' chosen_pathway_level1 = "escape",
#' category = "Animalia",
#' facet_column = "phylum"
#' )
#'
#' # facet habitat
#' visualize_pathways_level2(
#' df = data,
#' chosen_pathway_level1 = "escape",
#' facet_column = "habitat"
#' )
#'
#' # Only taxa introduced from 1950
#' visualize_pathways_level2(
#' df = data,
#' chosen_pathway_level1 = "escape",
#' from = 1950
#' )
#'
#' # Add a title
#' visualize_pathways_level2(
#' df = data,
#' chosen_pathway_level1 = "escape",
#' category = "Plantae",
#' from = 1950,
#' title = "Pathway level 2 (escape): Plantae, from 1950"
#' )
#'
#' # Personalize axis labels
#' visualize_pathways_level2(
#' df = data,
#' chosen_pathway_level1 = "escape",
#' x_lab = "Aantal taxa",
#' y_lab = "pathways"
#' )
#' }
visualize_pathways_level2 <- function(df,
chosen_pathway_level1,
category = NULL,
from = NULL,
facet_column = NULL,
pathways = NULL,
pathway_level1_names = "pathway_level1",
pathway_level2_names = "pathway_level2",
taxon_names = "key",
kingdom_names = "kingdom",
phylum_names = "phylum",
first_observed = "first_observed",
cbd_standard = TRUE,
title = NULL,
x_lab = "Number of introduced taxa",
y_lab = "Pathways") {
# initial input checks
# Check df
assertthat::assert_that(is.data.frame(df), msg = "`df` must be a data frame.")
# Check pathway_level1_names
assertthat::assert_that(is.character(pathway_level1_names),
msg = "`pathway_level1_names` must be a character."
)
assertthat::assert_that(length(pathway_level1_names) == 1)
assertable::assert_colnames(df, pathway_level1_names, only_colnames = FALSE)
# Check pathway_level2_names
assertthat::assert_that(is.character(pathway_level2_names),
msg = "`pathway_level2_names` must be a character."
)
assertthat::assert_that(length(pathway_level2_names) == 1)
assertable::assert_colnames(df, pathway_level2_names, only_colnames = FALSE)
# Check category
if (!is.null(category)) {
assertthat::assert_that(is.character(category),
msg = paste0(
"`category` must be NULL or a character. One of: ",
paste(categories, collapse = ", "),
"."
)
)
assertthat::assert_that(length(category) == 1)
assertthat::assert_that(category %in% categories,
msg = paste0(
"`category` is not correct. Choose one of: ",
paste0(categories, collapse = ", "),
"."
)
)
}
# Check facet_column
assertthat::assert_that(is.null(facet_column) | is.character(facet_column),
msg = "Argument facet_column has to be NULL or a character."
)
if (is.character(facet_column)) {
assertthat::assert_that(length(facet_column) == 1)
assertable::assert_colnames(df, facet_column, only_colnames = FALSE)
# check facet column
valid_facets <- c(
"family", "order", "class", "phylum", "kingdom",
"locality", "native_range", "habitat"
)
facet_column <- match.arg(facet_column, valid_facets)
assertthat::assert_that(
is.null(category) || !(category == "Chordata" & facet_column == "phylum"),
msg = "You cannot use phylum as facet with category Chordata."
)
assertthat::assert_that(
is.null(category) ||
!(category == "Not Chordata" & facet_column == "phylum"),
msg = "You cannot use phylum as facet with category Not Chordata."
)
assertthat::assert_that(
is.null(category) ||
!(!is.null(category) & facet_column == "kingdom"),
msg = "You cannot use kingdom as facet if category is selected."
)
}
# Check chosen_pathway_level1
assertthat::assert_that(is.character(chosen_pathway_level1),
msg = "Argument `chosen_pathway_level1` must be a character."
)
assertthat::assert_that(length(chosen_pathway_level1) == 1)
pathways_level1 <- unique(df[[pathway_level1_names]])
assertthat::assert_that(chosen_pathway_level1 %in% pathways_level1,
msg = paste0(
"chosen_pathway_level1 ",
chosen_pathway_level1,
" not present in column ",
pathway_level1_names,
"."
)
)
# Check pathways
if (!is.null(pathways)) {
assertthat::assert_that(is.character(pathways),
msg = "`pathways` must be a vector of characters."
)
invalid_pathways <- pathways[!pathways %in%
df[[pathway_level2_names]]]
assertthat::assert_that(length(invalid_pathways) == 0,
msg = paste0(
"Pathways in `pathways` not present in ",
"data.frame: ",
paste(invalid_pathways, collapse = ","),
"."
)
)
}
# Check taxon_names
assertthat::assert_that(is.character(taxon_names),
msg = "`taxon_names` must be a character."
)
assertthat::assert_that(length(taxon_names) == 1)
assertable::assert_colnames(df, taxon_names, only_colnames = FALSE)
# Check kingdom_names
assertthat::assert_that(is.character(kingdom_names),
msg = "`kingdom_names` must be a character."
)
assertthat::assert_that(length(kingdom_names) == 1)
assertable::assert_colnames(df, kingdom_names, only_colnames = FALSE)
# check parameter phylum
assertthat::assert_that(is.character(phylum_names),
msg = "`phylum_names` must be a character."
)
assertthat::assert_that(length(phylum_names) == 1)
assertable::assert_colnames(df, phylum_names, only_colnames = FALSE)
# Check from
if (!is.null(from)) {
assertthat::assert_that(is.numeric(from),
msg = "`from` must be a number (year)."
)
assertthat::assert_that(length(from) == 1)
assertthat::assert_that(from > 0,
msg = "`from` must be a positive number."
)
assertthat::assert_that(from == as.integer(from),
msg = "`from` must be an integer."
)
assertthat::assert_that(
from <= as.numeric(substr(Sys.Date(), start = 1, stop = 4)),
msg = paste0(
"`from` must be less than ",
format(Sys.Date(), "%Y"),
"."
)
)
}
# Check first_observed
assertthat::assert_that(is.character(first_observed),
msg = "`first_observed` must be a character."
)
assertthat::assert_that(length(first_observed) == 1)
assertable::assert_colnames(df, first_observed, only_colnames = FALSE)
# Check title and labels
if (!is.null(title)) {
assertthat::assert_that(is.character(title),
msg = "`title` must be a character or NULL."
)
assertthat::assert_that(length(title) == 1)
}
if (!is.null(x_lab)) {
assertthat::assert_that(is.character(x_lab),
msg = "`x_lab` must be a character or NULL."
)
assertthat::assert_that(length(x_lab) == 1)
}
if (!is.null(y_lab)) {
assertthat::assert_that(is.character(y_lab),
msg = "`y_lab` must be a character or NULL."
)
assertthat::assert_that(length(y_lab) == 1)
}
# Rename to default column name
df <-
df %>%
dplyr::rename_at(dplyr::vars(tidyselect::all_of(kingdom_names)),
~"group") %>%
dplyr::rename_at(dplyr::vars(tidyselect::all_of(taxon_names)),
~"taxonKey") %>%
dplyr::rename_at(dplyr::vars(tidyselect::all_of(first_observed)),
~"first_observed") %>%
dplyr::rename_at(dplyr::vars(tidyselect::all_of(pathway_level1_names)),
~"pathway_level1") %>%
dplyr::rename_at(dplyr::vars(tidyselect::all_of(pathway_level2_names)),
~"pathway_level2")
# Select data with the chosen pathway level 1
df <-
df %>%
dplyr::filter(.data$pathway_level1 == chosen_pathway_level1)
# Handle asymmetric category system (Chordata, Not Chordta are not kingdoms)
if (!is.null(category)) {
if (!category %in% c("Chordata", "Not Chordata")) {
df <- df %>% dplyr::filter(.data$group == category)
} else {
df <-
df %>%
dplyr::rename_at(dplyr::vars(phylum_names), ~"phylum_group")
if (category == "Chordata") {
df <- df %>% dplyr::filter(.data$phylum_group == category)
} else {
df <-
df %>%
dplyr::filter(.data$group == "Animalia") %>%
dplyr::filter(.data$phylum_group != "Chordata")
}
}
}
# Apply cut-off on year of introduction if given
if (!is.null(from)) {
df <-
df %>%
dplyr::filter(.data$first_observed >= from)
}
if (!is.null(facet_column)) {
if (facet_column == "kingdom") {
# category NULL by assertion
df$kingdom <- df$group
}
}
# Handle NAs and ""
nas_or_empty_pathway_level2 <-
df %>%
dplyr::filter(is.na(.data$pathway_level2) |
.data$pathway_level2 == "") %>%
dplyr::distinct(.data$taxonKey)
if (nrow(nas_or_empty_pathway_level2) > 0) {
message_warning <- paste(
nrow(nas_or_empty_pathway_level2),
"taxa have no information about pathway level 2.",
"Set to 'unknown'."
)
warning(message_warning)
}
df <-
df %>%
# Handle NAs and "unknown"
dplyr::mutate(pathway_level2 = dplyr::if_else(is.na(.data$pathway_level2) |
.data$pathway_level2 == "",
"unknown",
.data$pathway_level2
))
# Import all CBD pathways level 2 within chosen pathway level 1
pathways_level2_all <-
pathways_cbd() %>%
dplyr::filter(.data$pathway_level1 == chosen_pathway_level1) %>%
dplyr::distinct(.data$pathway_level2)
# Select pathways
if (!is.null(pathways)) {
pathways <- replace(pathways, is.na(pathways) | pathways == "", "unknown")
pathways <- unique(pathways)
df <-
df %>%
dplyr::filter(.data$pathway_level2 %in% pathways)
} else {
if (cbd_standard == TRUE) {
pathways <- pathways_level2_all %>% dplyr::pull()
} else {
pathways <- unique(df$pathway_level2)
}
}
# Check values in column with pathways level 2
invalid_pathways <-
df %>%
dplyr::anti_join(pathways_level2_all,
by = "pathway_level2"
) %>%
dplyr::distinct(.data$pathway_level2) %>%
dplyr::pull()
message_invalid_pathways <-
paste0(
"No CBD standard pathways level 2 value(s) in column `",
pathway_level2_names,
"`: ",
paste0(invalid_pathways, collapse = ", "),
". Valid pathways values: ",
paste0(unique(pathways_level2_all$pathway_level2), collapse = ", "),
"."
)
if (cbd_standard == TRUE) {
assertthat::assert_that(length(invalid_pathways) == 0,
msg = message_invalid_pathways
)
} else {
warning(message_invalid_pathways)
}
# Distinct taxa
if (!is.null(facet_column)) {
df <-
df %>%
dplyr::distinct(.data$taxonKey, .data$pathway_level2, !!dplyr::sym(facet_column))
}
# Transform pathway level 2 column to factor to make ordering in graph easily
df <-
df %>%
dplyr::mutate(
pathway_level2 = factor(.data$pathway_level2, levels = pathways)
) %>%
dplyr::as_tibble()
# Distinct taxa without facet
data_top_graph <-
df %>%
dplyr::distinct(
.data$taxonKey,
.data$pathway_level2
)
# Plot number of taxa per pathway_level1
top_graph <- NULL
if (nrow(data_top_graph) > 0) {
top_graph <-
ggplot2::ggplot(
data_top_graph,
ggplot2::aes(x = forcats::fct_rev(.data$pathway_level2))
) +
ggplot2::geom_bar() +
ggplot2::xlab(y_lab) +
ggplot2::ylab(x_lab) +
ggplot2::coord_flip() +
ggplot2::ggtitle(title)
} else {
data_top_graph <- NULL
}
if (is.null(facet_column)) {
return(list(plot = top_graph,
data_top_graph = data_top_graph,
data_facet_graph = NULL))
} else {
facet_graph <- NULL
if (nrow(df) > 0) {
facet_graph <-
ggplot2::ggplot(
df,
ggplot2::aes(x = forcats::fct_rev(.data$pathway_level2))
) +
ggplot2::geom_bar() +
ggplot2::xlab(y_lab) + # invert labels to get them right after flipping
ggplot2::ylab(x_lab) +
ggplot2::coord_flip() +
ggplot2::ggtitle(title) +
ggplot2::facet_wrap(facet_column)
} else {
df <- NULL
}
if (all(!is.null(top_graph), !is.null(facet_graph))) {
return(list(plot = egg::ggarrange(top_graph, facet_graph, draw = FALSE),
data_top_graph = data_top_graph,
data_facet_graph = df))
}
else {
return(list(plot = NULL, data_top_graph = NULL, data_facet_graph = NULL))
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.