knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library(dplyr) library(tidyr)
if (! require("remotes")) { install.packages("remotes", repos = "https://cloud.r-project.org") } remotes::install_github("kwb-r/kwb.geosalz", dependencies = TRUE)
# Define paths and resolve placeholders paths <- list( servername = Sys.getenv("servername"), root_server = "//<servername>/processing", project = "geosalz", task = "labor", processing = "<root>/<project>/<task>", input_dir = "<processing>/precleaned-data/v0.2", input_dir_meta = "<input_dir>/META", export_dir = "<processing>/precleaned-data/v0.3", export_dir_meta = "<export_dir>/META", cleaned_data_dir = "<processing>/cleaned-data", figures_dir = "<processing>/figures", foerdermengen = "<export_dir>/2018-04-27 Rohwasser Bericht - Galeriefördermengen.xlsx", parameters = "<export_dir_meta>/2018-06-01 Lab Parameter.xlsx", lookup_para = "<export_dir_meta>/lookup_para.csv", sites = "<export_dir_meta>/Info-Altdaten.xlsx" ) paths <- kwb.utils::resolve(paths, root = "root_server") #paths <- kwb.utils::resolve(paths, root = "C:/projects")
input_dir_exists <- dir.exists(paths$input_dir) knitr::opts_chunk$set(eval = input_dir_exists, message = FALSE, echo = TRUE)
Check if input directory selected above exists:
# Check if input directory exists input_dir <- kwb.utils::safePath(kwb.utils::selectElements(paths, "input_dir"))
Check and in case the export directory is not available create it:
# Check if exists and if not create it export_dir <- paths %>% kwb.utils::selectElements("export_dir") %>% kwb.utils::createDirectory() %>% kwb.utils::safePath()
## Convert xls to xlsx files kwb.geosalz::convert_xls_as_xlsx(input_dir, export_dir) ## Copy xlsx files kwb.geosalz::copy_xlsx_files(input_dir, export_dir, overwrite = TRUE)
Select files if they consist of:
# Get all xlsx files to be imported files <- dir(export_dir, ".xlsx", recursive = TRUE, full.names = TRUE) files_meta <- c( "Meta Info", "Header ident", "Parameter ident", "Parameter", "Info-Altdaten", "Brandenburg_Parameter_BWB_Stolpe", "Kopie von Brandenburg_Parameter_BWB_Stolpe", "2005-10BeschilderungProbenahmestellenGWWIII", "Bezeichnungen der Reinwasserstellen", "ReinwasserNomenklatur", "Info zu Altdaten 1970-1998", "2018-06-01 Lab Parameter" ) files_header1_meta <- c( "FRI_Br_GAL_C_Einzelparameter", "FRI_Roh_Rein_NH4+NO3_2001-2003", "MTBE_2003-11_2004", "Reinwasser_2003_Fe_Mn", ## unclean "VC_CN_in Brunnen bis Aug_2005 ", ## unclean "Wuhlheide_Beelitzhof_Teildaten" ## unclean ) files_header1 <- c( "2018-04-11 Chlorid in Brunnen - Übersicht", "2018-04-27 LIMS Reiw & Rohw Sammel ", "2018-04-27 Rohwasser Bericht - Galeriefördermengen" ) files_header4 <- c( "STO Rohw_1999-6_2004", "Wuhlheide_1999-2003_Okt - Neu", "KAU_1999-Okt2003" ) files_archive <- "Siebert" files_to_ignore <- c( files_meta, files_header1, files_header1_meta, files_header4, files_archive ) in_files_to_ignore <- kwb.utils::removeExtension(basename(files)) %in% files_to_ignore filepaths_header2 <- files[!in_files_to_ignore]
cond1 <- kwb.utils::removeExtension(basename(files)) %in% files_header1_meta filepaths_header1_meta <- files[cond1] labor_list_1meta <- kwb.geosalz::import_labor( filepaths_header1_meta , export_dir, func = kwb.geosalz::read_bwb_header1_meta ) has_errors <- sapply(labor_list_1meta, kwb.utils::isTryError) #has_errors labor_df_1meta <- data.table::rbindlist( l = labor_list_1meta[!has_errors], fill = TRUE )
library(dplyr) cond2 <- labor_df_1meta$OriginalName %in% c( "el. Leitfähigkeit (25 °C)", "Chlorid", "Sulfat" ) labor_df_1meta[cond2 & !is.na(labor_df_1meta$DataValue), ] %>% dplyr::group_by_("OriginalName", "Meßstelle") %>% dplyr::summarise(n = n())
labor_header2_list <- kwb.geosalz::import_labor( files = filepaths_header2, export_dir = export_dir, func = kwb.geosalz::read_bwb_header2 ) has_errors <- sapply(labor_header2_list, kwb.utils::isTryError) #has_errors labor_header2_df <- data.table::rbindlist( l = labor_header2_list[!has_errors], fill = TRUE )
cond3 <- kwb.utils::removeExtension(basename(files)) %in% files_header4 filepaths_header4 <- files[cond3] labor_header4_list <- kwb.geosalz::import_labor( files = filepaths_header4, export_dir = export_dir, func = kwb.geosalz::read_bwb_header4 ) has_errors <- sapply(labor_header4_list, inherits, "try-error") has_errors labor_header4_df <- data.table::rbindlist( l = labor_header4_list[!has_errors], fill = TRUE )
labor_all <- data.table::rbindlist( l = list( x1 = labor_header2_df, x2 = labor_header4_df ), fill = TRUE )
labor_all <- labor_all %>% dplyr::filter(!is.na(.data$DataValue)) %>% dplyr::mutate( Date = dplyr::if_else( condition = !is.na(.data$Datum), true = .data$Datum, false = .data$Probenahme ) ) %>% ### Some "Datum" rentries are missing in; ### K-TL_LSW-Altdaten-Werke Teil 1\Werke Teil 1\Kaulsdorf\KAU_1999-Okt2003.xlsx ### sheets: 66 KAU Rein 1999-2000, 65 KAU NordSüd 1999-2000 dplyr::filter(!is.na(.data$Date)) %>% dplyr::filter(!is.na(.data$VariableName_org)) nrow(labor_all)
labordaten_ww <- kwb.geosalz::add_para_metadata( df = labor_all, lookup_para_path = paths$lookup_para, parameters_path = paths$parameters ) labordaten_ww <- kwb.geosalz::add_site_metadata( df = labordaten_ww, site_path = paths$sites ) %>% dplyr::mutate( year = as.numeric(format(.data$Date,format = '%Y')), DataValue = as.numeric(.data$DataValue) ) nrow(labordaten_ww)
fs::dir_create(paths$cleaned_data_dir, recursive = TRUE) print(sprintf("Export cleaned data to: %s", paths$cleaned_data_dir)) foerdermengen_ww <- kwb.geosalz::get_foerdermengen(paths$foerdermengen) save( labordaten_ww, foerdermengen_ww, file = file.path(paths$cleaned_data_dir, "cleaned-data.Rds") ) write.csv2( labordaten_ww, file.path(paths$cleaned_data_dir, "labordaten_ww.csv"), row.names = FALSE ) write.csv2( foerdermengen_ww, file = file.path(paths$cleaned_data_dir, "foerdermengen_ww.csv"), row.names = FALSE )
library(ggplot2) fs::dir_create(paths$figures_dir, recursive = TRUE) print(sprintf("Export figures/plots to: %s", paths$figures_dir)) para_info <- kwb.geosalz::get_parameters_meta(paths$parameters) water_types <- c("Reinwasser", "Rohwasser") plot_to_pdf <- function(pdf_file, labordaten_ww, para_info, water_type, paths) { pdf(file = pdf_file, width = 14, height = 9) on.exit(dev.off()) for (sel_para_id in unique(labordaten_ww$para_id)) { my_selection <- sprintf( "%s (%s)", para_info$para_kurzname[para_info$para_id == sel_para_id], water_type ) tmp <- labordaten_ww %>% dplyr::filter(prufgegenstand == water_type) %>% dplyr::filter(para_id == sel_para_id) %>% dplyr::group_by(.data$para_kurzname, .data$werk, .data$year) %>% dplyr::summarise( mean_DataValue = mean(as.numeric(.data$DataValue), na.rm = TRUE) ) %>% dplyr::filter(!is.na(.data$werk)) %>% dplyr::left_join( y = kwb.geosalz::get_foerdermengen(paths$foerdermengen), by = c("werk", "year") ) if (nrow(tmp) > 0) { cat(sprintf("for %s\n", my_selection)) g <- ggplot2::ggplot(tmp, mapping = ggplot2::aes_string( x = "year", y = "mean_DataValue", col = "werk" )) + ggplot2::geom_point() + ggplot2::geom_line() + ggplot2::theme_bw() + ggplot2::ggtitle(label = my_selection) + ggplot2::labs(x = "", y = "Jahresmittelwert") print(g) } else { cat(sprintf("not data availabe for %s\n", my_selection)) } } } for (water_type in water_types) { pdf_file <- file.path( paths$figures_dir, sprintf( "Zeitreihen_Jahresmittelwerte_Werke_%s.pdf", water_type ) ) cat(sprintf("Creating plot:\n%s\n", pdf_file)) plot_to_pdf(pdf_file, labordaten_ww, para_info, water_type, paths) }
environment <- sessioninfo::session_info() knitr::kable(tibble::enframe(unlist(environment$platform)))
environment$packages
kwb.geosalz::get_pandoc_info()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.