# Should the data of only a subset of stations be downloaded?
use_random_subset_of_stations <- FALSE

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
  #, eval = FALSE # to temporarily "switch off" this (long running) vignette
)

Define URLs and Helper Functions

`%>%` <- magrittr::`%>%`

urls <- kwb.utils::resolve(list(
  gh_wasserportal = "https://kwb-r.github.io/wasserportal",
  stations_gwl_meta = "<gh_wasserportal>/stations_gwl_master.json",
  stations_gwl_data = "<gh_wasserportal>/stations_gwl_data.json",
  stations_gwq_meta = "<gh_wasserportal>/stations_gwq_master.json",
  stations_gwq_data = "<gh_wasserportal>/stations_gwq_data.json",
  stations_crosstable = "<gh_wasserportal>/stations_crosstable.json"
))

top_filter_data_table <- function(data) {
  DT::datatable(data, filter = "top")
}

cat_file_enumeration <- function(base_url, files) {
  cat(paste0(
    sprintf("- [%s](%s/%s)", files, base_url, files),
    collapse = "\n\n"
  ))
}

Master Data

stations_list <- wasserportal::get_stations(type = "list")

is_gw <- stringr::str_detect(names(stations_list), "groundwater")

files <- wasserportal::list_masters_data_to_csv(stations_list[is_gw])

The following groundwater master data .csv files are available for download:

cat_file_enumeration(urls$gh_wasserportal, files)

Get Groundwater Data

if (use_random_subset_of_stations) {
  stations_list_bak <- stations_list
  x <- stations_list$groundwater.level[sample(876, 10), ]
  stations_list$groundwater.level <- x
  x <- stations_list$groundwater.quality[sample(208, 10), ]
  stations_list$groundwater.quality <- x
}

gw_data_list <- wasserportal::get_groundwater_data(
  stations_list = stations_list, 
  debug = TRUE
)

files <- wasserportal::list_timeseries_data_to_zip(gw_data_list)

files

# Data availability per parameter
gw_data_list %>%
  dplyr::bind_rows() %>% 
  dplyr::count(Parameter, Einheit) %>% 
  dplyr::arrange(dplyr::desc(.data$n))

The following groundwater data .zip files are available for download:

cat_file_enumeration(urls$gh_wasserportal, files)

Do Your Own Analysis!

Download CSV/JSON/ZIP files scraped and prepared each day at 5 a.m. UTC for re-use in R. The following data are available:

library(wasserportal)

stations_crosstable <- jsonlite::fromJSON(urls$stations_crosstable)

str(stations_crosstable)

Please find an example below for merging all this information into a single data frame:

library(wasserportal)

site_number_to_character <- function(data) {
  data %>%
    dplyr::mutate(
      Messstellennummer = as.character(.data$Messstellennummer)
    )
}

left_join_by_site <- function(data, master_data) {
  data %>%
    dplyr::left_join(master_data, by = c("Messstellennummer" = "Nummer"))
}

### GW levels
gwl_master <- jsonlite::fromJSON(urls$stations_gwl_meta)
gwl_data <- jsonlite::fromJSON(urls$stations_gwl_data) %>%  
  site_number_to_character() %>% 
  left_join_by_site(gwl_master)

str(gwl_data)

### GW quality (all available parameters!)
gwq_master <- jsonlite::fromJSON(urls$stations_gwq_meta)
gwq_data <- jsonlite::fromJSON(urls$stations_gwq_data) %>%  
  site_number_to_character() %>% 
  left_join_by_site(gwq_master)

str(gwq_data)

### Merge GW level and quality into one data frame
gw_data <- dplyr::bind_rows(gwl_data, gwq_data)

str(gw_data)

Data Availability

GW Quality

# Helper functions to be reused in different data summaries
select_main_columns <- function(data) {
  data %>%
    dplyr::select(dplyr::all_of(c(
      "Messstellennummer",
      "Parameter",
      "Datum",
      "Messwert"
    )))
}

summarise_min_max_n_arrange <- function(data) {
  data %>%
    dplyr::summarise(
      date_min = min(.data$Datum),
      date_max = max(.data$Datum),
      n = dplyr::n(), 
      .groups = "drop"
    ) %>%
    dplyr::arrange(dplyr::desc(.data$n))
}
gwq_data_by_parameter <- gwq_data %>%
    select_main_columns() %>%
    dplyr::group_by(.data$Parameter) %>%
    summarise_min_max_n_arrange()

top_filter_data_table(gwq_data_by_parameter)
gwq_data_by_parameter_and_station <- gwq_data %>%
  select_main_columns() %>%
  dplyr::group_by(.data$Parameter, .data$Messstellennummer) %>%
  summarise_min_max_n_arrange()

top_filter_data_table(gwq_data_by_parameter_and_station)

Export

GW Quality

openxlsx::write.xlsx(
  x = list(
  gwq_by_parameter = gwq_data_by_parameter,
  gwq_by_parameter_and_station = gwq_data_by_parameter_and_station,
  gwq_data = gwq_data,
  gwq_master = gwq_master
),
  file = "wasserportal_gwq_data.xlsx",
  overwrite = TRUE
)


KWB-R/wasserportal documentation built on June 6, 2024, 10:26 a.m.