# Should the data of only a subset of stations be downloaded? use_random_subset_of_stations <- FALSE knitr::opts_chunk$set( collapse = TRUE, comment = "#>" #, eval = FALSE # to temporarily "switch off" this (long running) vignette )
`%>%` <- magrittr::`%>%` urls <- kwb.utils::resolve(list( gh_wasserportal = "https://kwb-r.github.io/wasserportal", stations_gwl_meta = "<gh_wasserportal>/stations_gwl_master.json", stations_gwl_data = "<gh_wasserportal>/stations_gwl_data.json", stations_gwq_meta = "<gh_wasserportal>/stations_gwq_master.json", stations_gwq_data = "<gh_wasserportal>/stations_gwq_data.json", stations_crosstable = "<gh_wasserportal>/stations_crosstable.json" )) top_filter_data_table <- function(data) { DT::datatable(data, filter = "top") } cat_file_enumeration <- function(base_url, files) { cat(paste0( sprintf("- [%s](%s/%s)", files, base_url, files), collapse = "\n\n" )) }
stations_list <- wasserportal::get_stations(type = "list") is_gw <- stringr::str_detect(names(stations_list), "groundwater") files <- wasserportal::list_masters_data_to_csv(stations_list[is_gw])
The following groundwater master data .csv
files are available for download:
cat_file_enumeration(urls$gh_wasserportal, files)
if (use_random_subset_of_stations) { stations_list_bak <- stations_list x <- stations_list$groundwater.level[sample(876, 10), ] stations_list$groundwater.level <- x x <- stations_list$groundwater.quality[sample(208, 10), ] stations_list$groundwater.quality <- x } gw_data_list <- wasserportal::get_groundwater_data( stations_list = stations_list, debug = TRUE ) files <- wasserportal::list_timeseries_data_to_zip(gw_data_list) files # Data availability per parameter gw_data_list %>% dplyr::bind_rows() %>% dplyr::count(Parameter, Einheit) %>% dplyr::arrange(dplyr::desc(.data$n))
The following groundwater data .zip
files are available for download:
cat_file_enumeration(urls$gh_wasserportal, files)
Download CSV/JSON/ZIP files scraped and prepared each day at 5 a.m. UTC for re-use in R. The following data are available:
Data availability
stations_crosstable.json:
available parameters per station (see wasserportal::get_overview_options()
for available options). Note: includes also surface monitoring stations!
library(wasserportal) stations_crosstable <- jsonlite::fromJSON(urls$stations_crosstable) str(stations_crosstable)
Master Data
stations_gwl_master.json: for GW level stations
stations_gwq_master.json: for GW quality stations
Measurements
stations_gwl_data.json: GW level measurements for stations
stations_gwq_data.json: GW quality measurements for all available parameters and stations
Please find an example below for merging all this information into a single data frame:
library(wasserportal) site_number_to_character <- function(data) { data %>% dplyr::mutate( Messstellennummer = as.character(.data$Messstellennummer) ) } left_join_by_site <- function(data, master_data) { data %>% dplyr::left_join(master_data, by = c("Messstellennummer" = "Nummer")) } ### GW levels gwl_master <- jsonlite::fromJSON(urls$stations_gwl_meta) gwl_data <- jsonlite::fromJSON(urls$stations_gwl_data) %>% site_number_to_character() %>% left_join_by_site(gwl_master) str(gwl_data) ### GW quality (all available parameters!) gwq_master <- jsonlite::fromJSON(urls$stations_gwq_meta) gwq_data <- jsonlite::fromJSON(urls$stations_gwq_data) %>% site_number_to_character() %>% left_join_by_site(gwq_master) str(gwq_data) ### Merge GW level and quality into one data frame gw_data <- dplyr::bind_rows(gwl_data, gwq_data) str(gw_data)
# Helper functions to be reused in different data summaries select_main_columns <- function(data) { data %>% dplyr::select(dplyr::all_of(c( "Messstellennummer", "Parameter", "Datum", "Messwert" ))) } summarise_min_max_n_arrange <- function(data) { data %>% dplyr::summarise( date_min = min(.data$Datum), date_max = max(.data$Datum), n = dplyr::n(), .groups = "drop" ) %>% dplyr::arrange(dplyr::desc(.data$n)) }
gwq_data_by_parameter <- gwq_data %>% select_main_columns() %>% dplyr::group_by(.data$Parameter) %>% summarise_min_max_n_arrange() top_filter_data_table(gwq_data_by_parameter)
gwq_data_by_parameter_and_station <- gwq_data %>% select_main_columns() %>% dplyr::group_by(.data$Parameter, .data$Messstellennummer) %>% summarise_min_max_n_arrange() top_filter_data_table(gwq_data_by_parameter_and_station)
openxlsx::write.xlsx( x = list( gwq_by_parameter = gwq_data_by_parameter, gwq_by_parameter_and_station = gwq_data_by_parameter_and_station, gwq_data = gwq_data, gwq_master = gwq_master ), file = "wasserportal_gwq_data.xlsx", overwrite = TRUE )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.