knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) is_ghactions <- identical(Sys.getenv("CI"), "true")
# Enable this universe options(repos = c( kwbr = 'https://kwb-r.r-universe.dev', CRAN = 'https://cloud.r-project.org' )) # Install R package install.packages('wasserportal')
library(wasserportal) download_file <- function(url, tdir = tempdir()) { filename <- basename(url) target_path <- file.path(tdir, filename) download.file(url, dest = target_path, mode = "wb") target_path } yes_if_in_cas_number_of <- function(x, data) { dplyr::if_else(x %in% unique(data$cas_number), "yes", NA_character_) } top_filter_datatable <- function(data) { DT::datatable(data, filter = "top", rownames = FALSE) }
urls <- kwb.utils::resolve(list( record = "https://zenodo.org/record", norman_s0 = "<record>/5873975/files/susdat_2022-01-18-104316.csv", norman_s36_ubamp = "<record>/4323239/files/S36_UBAPMT_Dec2020.csv", norman_s90_zeropm = "<record>/5854252/files/ZeroPM_Box1.csv", norman_lists = "https://www.norman-network.com/nds/SLE/", gh_wasserportal = "https://kwb-r.github.io/wasserportal", stations_gwq_meta = "<gh_wasserportal>/stations_gwq_master.json", stations_gwq_data = "<gh_wasserportal>/stations_gwq_data.json" ))
### Download S0 | SUSDAT | Merged NORMAN Suspect List: SusDat ### Version: NORMAN-SLE-S0.0.4.1 (2021-01-18) ### DOI: 10.5281/zenodo.5873975 norman_s0_path <- download_file(urls$norman_s0) norman_s0 <- readr::read_csv(norman_s0_path) ### Download S36 | UBAPMT | Potential Persistent, Mobile and Toxic (PMT) substances ### Version: NORMAN-SLE-S36.0.2.1 (2020-12-15) ### DOI: "10.5281/zenodo.4323239" norman_s36_ubapmt_path <- download_file(urls$norman_s36_ubamp) norman_s36_ubapmt <- readr::read_csv(norman_s36_ubapmt_path) ### Download S90 | ZEROPMBOX1 | ZeroPM Box 1 Substances ### Version: Version NORMAN-SLE-S90.0.1.0 (2021-01-15) ### DOI: 10.5281/zenodo.5854252 norman_s90_zeropm_path <- download_file(urls$norman_s90_zeropm) norman_s90_zeropm <- readr::read_csv(norman_s90_zeropm_path) cas_wasserportal <- wasserportal::readPackageFile( file = "cas_wasserportal.csv", encoding = "UTF-8" ) cas_reach <- wasserportal::readPackageFile( file = "cas_reach.csv" ) wasserportal_cas_numbers <- unique(cas_wasserportal$cas_number) ubapmt_publication <- cas_reach %>% dplyr::filter(.data$cas_number %in% wasserportal_cas_numbers) ubapmt_zenodo <- norman_s36_ubapmt %>% dplyr::filter(.data$CAS_Number %in% wasserportal_cas_numbers) %>% dplyr::rename(cas_number = .data$CAS_Number) missing_on_zenodo <- cas_reach %>% dplyr::mutate( zenodo = yes_if_in_cas_number_of(.data$cas_number, ubapmt_zenodo), publication = yes_if_in_cas_number_of(.data$cas_number, ubapmt_publication) ) %>% dplyr::filter( publication == "yes" | zenodo == "yes" ) %>% dplyr::relocate( tidyselect::all_of(c("publication", "zenodo")), .before = .data$emission_likelihood ) top_filter_datatable(missing_on_zenodo) norman_s0_in_wasserportal <- norman_s0 %>% dplyr::mutate( cas_number = stringr::str_remove( .data$CAS_RN_Dashboard, pattern = "^CAS_RN:\\s" ) ) %>% dplyr::filter( !is.na(.data$cas_number), .data$cas_number %in% wasserportal_cas_numbers ) #top_filter_datatable(norman_s0_in_wasserportal) norman_lists_table <- xml2::read_html(urls$norman_lists) %>% rvest::html_element(css = ".table") %>% rvest::html_table() %>% janitor::clean_names() DT::datatable(norman_lists_table)
# Load R package ### For details see: ### https://kwb-r.github.io/wasserportal/articles/groundwater.html ### JSON files (see below) are build every day automatically at 5 a.m. with ### continuous integration, for build status, see here: ### https://github.com/KWB-R/wasserportal/actions/workflows/pkgdown.yaml ### GW quality (all available parameters!) gwq_master <- jsonlite::fromJSON(urls$stations_gwq_meta) gwq_data <- jsonlite::fromJSON(urls$stations_gwq_data) %>% dplyr::filter(.data$Parameter %in% cas_wasserportal$Parameter) %>% dplyr::inner_join(cas_wasserportal, by = "Parameter") %>% dplyr::inner_join(norman_s0_in_wasserportal, by = "cas_number") %>% dplyr::filter( .data$logKow_EPISuite <= 4.5, !is.na(.data$`LC50_48_hr_ug/L`) ) %>% dplyr::mutate( Messstellennummer = as.character(Messstellennummer), ## CensorCode: either "below" (less than) for concentration below detection ## limit (value is detection limit) or "nc" (not censored) for concentration ## above detection limit CensorCode = dplyr::case_when( Messwert <= 0 ~ "lt", TRUE ~ "nc" ), Messwert = dplyr::case_when( Messwert < 0 ~ abs(Messwert), ### Only two decimal numbers are exported by Wasserportal, but some ### sustances have lower detection limit, e.g. 0.002 which results in ### -0.00 export, thus the dummy detection limit 0.00999 was introduced ### (until fixed by Senate: Christoph will sent a email to Matthias ### Schroeder) Messwert == 0 ~ 0.009999, TRUE ~ Messwert ) ) %>% dplyr::left_join(gwq_master, by = c("Messstellennummer" = "Nummer")) gwq_subs <- gwq_data %>% dplyr::count(.data$cas_number, .data$CensorCode) %>% tidyr::pivot_wider(names_from = CensorCode, values_from = n) %>% dplyr::mutate( lt = ifelse(is.na(lt), 0, lt), nc = ifelse(is.na(nc), 0, nc), n_total = lt + nc, percent_nc = 100 * nc / n_total ) %>% dplyr::rename( n_lt = lt, n_nc = nc ) %>% dplyr::left_join(norman_s0_in_wasserportal) %>% dplyr::rename(name_norman = .data$Name_Dashboard) readr::write_csv(gwq_subs, "gwq_subs.csv") top_filter_datatable(gwq_subs) samples <- gwq_data %>% dplyr::rename(name_norman = .data$Name_Dashboard) %>% dplyr::select( name_norman, cas_number, Messstellennummer, Datum, CensorCode, Messwert, Einheit ) samples_by_para_and_station <- gwq_data %>% dplyr::count( .data$cas_number, .data$Messstellennummer, .data$CensorCode ) %>% tidyr::pivot_wider(names_from = CensorCode, values_from = n) %>% dplyr::mutate( lt = ifelse(is.na(lt), 0, lt), nc = ifelse(is.na(nc), 0, nc), n_total = lt + nc, percent_nc = 100 * nc / n_total ) %>% dplyr::rename( n_lt = lt, n_nc = nc ) %>% dplyr::left_join(norman_s0_in_wasserportal) %>% dplyr::rename(name_norman = .data$Name_Dashboard) %>% dplyr::left_join(gwq_master, by = c(Messstellennummer = "Nummer")) %>% dplyr::arrange(dplyr::desc(percent_nc)) # samples_by_category_and_station <- samples_by_para_and_station %>% # dplyr::group_by(.data$category, # .data$category_name, # .data$Messstellennummer) %>% # dplyr::summarise(n_lt = sum(n_lt), # n_nc = sum(n_nc), # n_total = sum(n_total)) %>% # dplyr::mutate(percent_nc = 100*n_nc/n_total) %>% # dplyr::arrange(dplyr::desc(percent_nc)) gwq_subs_stations_n_abovedetection <- samples_by_para_and_station %>% dplyr::filter(n_nc > 0) %>% dplyr::group_by(.data$cas_number) %>% dplyr::summarise(n_stations_abovedetection = dplyr::n()) gwq_subs_stations_n_paras_abovedetection <- samples_by_para_and_station %>% dplyr::filter(n_nc > 0) %>% dplyr::group_by( #.data$category, #.data$category_name, .data$Messstellennummer ) %>% dplyr::summarise(n_paras_abovedetection = dplyr::n()) %>% dplyr::left_join(gwq_master, by = c("Messstellennummer" = "Nummer")) # gwq_subs_stations_n_paras_abovedetection_wide <- gwq_subs_stations_n_paras_abovedetection %>% # dplyr::ungroup() %>% # dplyr::select(Messstellennummer, category, n_paras_abovedetection) %>% # tidyr::pivot_wider(names_from = "category", # names_prefix = "cat_", # values_from = "n_paras_abovedetection") %>% # dplyr::left_join(gwq_master, by = c("Messstellennummer" = "Nummer")) samples_by_para_and_station_n <- samples_by_para_and_station %>% dplyr::group_by(name_norman, cas_number) %>% dplyr::summarise( n_stations_sampled = dplyr::n(), n_stations_total = length(unique(gwq_master$Nummer)), n_lt = sum(n_lt), n_nc = sum(n_nc), n_total = sum(n_total) ) %>% dplyr::left_join(gwq_subs_stations_n_abovedetection) %>% dplyr::mutate( n_stations_abovedetection = ifelse( is.na(n_stations_abovedetection), 0, n_stations_abovedetection ), n_abovedetection = ifelse(is.na(n_nc), 0, n_nc), n_belowdetection = ifelse(is.na(n_lt), 0, n_lt), percent_samples_abovedetection = 100 * n_nc / n_total, percent_stations_abovedetection = 100 * n_stations_abovedetection / n_stations_total, percent_stations_sampled = 100 * n_stations_sampled / n_stations_total ) %>% dplyr::select( name_norman, cas_number, n_stations_abovedetection, n_stations_sampled, n_stations_total, percent_stations_abovedetection, percent_stations_sampled, n_belowdetection, n_abovedetection, n_total, percent_samples_abovedetection ) %>% dplyr::arrange( dplyr::desc(percent_stations_abovedetection), dplyr::desc(percent_samples_abovedetection) ) ### Export data to EXCEL gwq_data_list <- list( norman_lists_table = norman_lists_table, cas_wasserportal = cas_wasserportal, cas_reach = cas_reach, ubapmt_missing_on_zenodo = missing_on_zenodo, norman_s0_in_wasserportal = norman_s0_in_wasserportal, samples = samples, samples_by_para = gwq_subs %>% dplyr::arrange(dplyr::desc(percent_nc)), samples_by_para_and_station = samples_by_para_and_station, samples_by_para_and_station_n = samples_by_para_and_station_n ) #samples_by_stations_para_above = gwq_subs_stations_n_paras_abovedetection_wide) #samples_by_category_and_station = samples_by_category_and_station) openxlsx::write.xlsx( x = gwq_data_list, file = "wasserportal_gwq_norman-list_s0_v1.0.0.xlsx", overwrite = TRUE )
Filter criteria:
logKow_EPISuite
<= 4.5
)LC50_48_hr_ug/
not NA
)gwq_subs_plot <- samples_by_para_and_station_n %>% dplyr::filter(.data$percent_samples_abovedetection > 0) %>% dplyr::left_join(norman_s0_in_wasserportal) %>% dplyr::arrange(.data$`LC50_48_hr_ug/L`) %>% dplyr::mutate(label = sprintf( "%s (%s, LC50_48_hr_ug/L: %.1f, logKow_EPISuite: %.1f : ", .data$name_norman, .data$cas_number, .data$`LC50_48_hr_ug/L`, .data$logKow_EPISuite )) gwq_subs_plot$label <- as.factor(gwq_subs_plot$label) g1 <- gwq_subs_plot %>% ggplot2::ggplot(ggplot2::aes( x = .data$percent_samples_abovedetection, y = forcats::fct_reorder( .data$label, .data$percent_samples_abovedetection, .desc = TRUE ), label = sprintf( "%2.2f %% (n_samples = %d, n_stations = %d)", .data$percent_samples_abovedetection, .data$n_total, .data$n_stations_sampled ), fill = .data$logKow_EPISuite )) + #ggplot2::scale_fill_brewer(palette="RdYlGn") + ggplot2::scale_y_discrete(limits = rev) + ggplot2::geom_bar(stat = "identity") + ggplot2::geom_text(size = 1.8, hjust = -0.01) + ggplot2::xlim(c(0,120)) + ggplot2::theme_bw() + ggplot2::theme(legend.position = "top") + ggplot2::labs( # subtitle = sprintf( # "%d / %d substances (>= 1 value above detection limit)", # sum(gwq_subs_plot$n_abovedetection > 0), # nrow(gwq_subs_plot) # ), y = "", x = "Percent of Samples above Detection Limit (%)" ) g1 ggplot2::ggsave( filename = "wasserportal_norman-s0-substances_only-above-detection-limit.jpeg", plot = g1, width = 40, height = 20, units = "cm" )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.