#' plot_pah
#'
#' Create a bar plot of PAH concentrations. The function allows you to group the data by one
#' or more variables, and decide which grouping variables will determine order or color.
#'
#' @export
#' @param pah_dat dataframe with PAH concentrations. It is recommended that entire samples with censored observations
#' (e.g., below detection limit) are removed from this data frame prior to calculating the difference between source and samples.
#' This function assumes these samples have already been removed.
#' @param conc_column column that contains PAH concentrations
#' @param sample_id_column column name that contains unique sample id
#' @param compound_column column name that contains PAH compound names. This can also include other chemicals, or sums of chemicals.
#' @param compound_plot a vector of strings identifying which compounds from compound_column to include in the plot. If more than one compound is given, the plot will be faceted by compound.
#' @param color_column a column with group variable by which to color code bars
#' @param group_column a column by which to group and order the bars.
#' @param order_column a column by which to order bars within groups. If left NA, this will default
#' to the conc_column so bars are ordered low to high within groups or across all
#' bars if no groups are given.
#' @param conc_units character string, units for PAH concentration that will be used
#' in labels on plots
#' @param thresholds logical, whether to include the EPA 16 priority compound thresholds via a horizontal line. This is only appriopriate if the values being plotted are the EPA priority sum concentration.
#' @import ggplot2
#' @import dplyr
#' @importFrom rlang .data
#' @importFrom rlang sym
#' @examples
#'
plot_pah <- function(pah_dat, conc_column = 'Value', sample_id_column = 'Sample',
compound_column = 'Parameter', compound_plot = "Total PAH",
color_column = NA, group_column = NA, order_column = NA, conc_units = "ppb",
thresholds = TRUE) {
quo_compound_column <- sym(compound_column)
quo_conc_column <- sym(conc_column)
quo_sample_id_column <- sym(sample_id_column)
if (!is.na(group_column)) {quo_group_column <- sym(group_column)}
order_column <- ifelse(is.na(order_column), conc_column, order_column)
quo_order_column <- sym(order_column)
pah_dat_temp <- filter(pah_dat, (!!quo_compound_column) %in% compound_plot)
barchart_theme <- theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
legend.position = "top", panel.spacing=unit(5,"pt"), strip.background = element_blank(),
strip.text = element_text(margin = margin(4,10,4,10, unit="pt")))
yvals <- c(round(min(log10(pah_dat_temp[[conc_column]])), 0), round(max(log10(pah_dat_temp[[conc_column]])), 0))
ybreaks <- yvals[1]:yvals[2]
yticklabs <- 10^(yvals[1]:yvals[2])
if (anyNA(group_column)) {
# plotting if there is no grouping column given
p <- ggplot(pah_dat_temp, aes_string(x = paste0('reorder(',sample_id_column,',', order_column,')'), y = conc_column)) +
geom_bar(stat="identity", position="identity", colour="black") +
barchart_theme +
labs(x = "", y = paste0("Concentration (", conc_units, ")"))
if (!is.na(color_column)) {
# add color if color column is given
p <- p + aes_string(fill = color_column)
}
if (length(compound_plot) > 1) {
# facet by compound if more than one compound ID is given
p <- p + facet_wrap(as.formula(paste('~',compound_column)), ncol = ifelse(length(compound_plot) > 3, 2, 1))
}
} else {
# now handle groups if groups are given
# first group data, calculate mean by group to determine group order
temp.order <- pah_dat_temp %>%
group_by(!!quo_group_column) %>%
summarize(max = max(!!quo_conc_column)) %>%
arrange(max)
pah_dat_temp[group_column] <- factor(pah_dat_temp[[group_column]],
levels = temp.order[[group_column]])
pah_dat_temp <- pah_dat_temp %>%
ungroup() %>%
mutate(thresholdPEC = ifelse(thresholds, ifelse(conc_units == 'ppb', 22800,22.8), NA),
thresholdTEC = ifelse(thresholds, ifelse(conc_units == 'ppb', 1610,1.610), NA))
dummy.dat <- pah_dat_temp %>%
select(!!quo_group_column, !!quo_compound_column, thresholdPEC, thresholdTEC) %>%
distinct()
p <- ggplot(pah_dat_temp, aes_string(x = paste0('reorder(',sample_id_column,',',order_column,")"), y = conc_column)) +
geom_bar(stat = 'identity', width = 0.8, position = position_dodge(width = 2), colour="black" ) +
# need to test this: does this work of PARAM_SYNYNOYM is one variable?
geom_hline(data = dummy.dat, aes(yintercept = thresholdPEC)) +
geom_hline(data = dummy.dat, aes(yintercept = thresholdTEC)) +
#geom_text(aes(y = 50000, label = c(rep(NA, 4), "TEC", rep(NA, 66)))) +
facet_grid(as.formula(paste(".~", group_column)),
space = "free_x", scales = 'free_x', switch = 'x') +
barchart_theme +
labs(x = "", y = paste0("Concentration (", conc_units, ")")) +
scale_y_continuous(breaks = 10^ybreaks, labels = yticklabs, trans = 'log10') +
theme(strip.text.x = element_text(angle = 90, hjust = 1),
panel.spacing = unit(0.2, "lines"), strip.placement = 'outside')
if (!is.na(color_column)) {
p <- p + aes_string(fill = color_column)
}
}
return(p)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.