knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

1 Installation

if(FALSE) {
Sys.setenv(GITHUB_PAT = "mysecret_access_token")
install.packages("remotes", repos = "https://cloud.r-project.org")
library(remotes)
remotes::install_github("r-lib/remotes@18c7302637053faf21c5b025e1e9243962db1bdc")
remotes::install_github("KWB-R/kwb.qmra")
remotes::install_github("KWB-R/qmra.db")
}
## Load packages
library(kwb.qmra)
library(qmra.db)
library(ggplot2)
library(tibble)
library(sessioninfo)
library(stringi)

2 Import data from "qmra.db"

See: https://github.com/KWB-R/qmra.db/tree/master/inst/database

create_inflow_metadata <- function(csv_dir = system.file("database/qmra-db_accdb", 
                                             package = "qmra.db")) {

paths_list <- list(
  csv_dir = system.file("database/qmra-db_accdb", package = "qmra.db"),
  inflow = "<csv_dir>/tbl_inflow.csv",
  pathogen = "<csv_dir>/tbl_pathogen.csv",
  pathogen_group = "<csv_dir>/tbl_pathogenGroup.csv",
  references = "<csv_dir>/tbl_reference.csv",
  water_source = "<csv_dir>/tbl_waterSource.csv"
  )

paths <- kwb.utils::resolve(paths_list)


inflow <- readr::read_csv(file = paths$inflow)
pathogen <- readr::read_csv(file = paths$pathogen)
pathogen_group <- readr::read_csv(file = paths$pathogen_group)
references <- readr::read_csv(file = paths$references)
water_source <- readr::read_csv(file = paths$water_source)

inflow_metadata <- inflow %>% 
  dplyr::left_join(pathogen) %>% 
  dplyr::left_join(pathogen_group) %>% 
  dplyr::left_join(water_source) %>% 
  dplyr::left_join(references) %>% 
  dplyr::arrange(.data$WaterSourceName, 
                 .data$PathogenGroup, 
                 .data$PathogenName, 
                 .data$ReferenceName) %>% 
  dplyr::mutate(row_id = 1:dplyr::n())
}

inflow_metadata <- create_inflow_metadata() 
head(inflow_metadata)

3 Test

3.1 Define Checking Function

check_inflow_distribution <- function(inflow_metadata,
### for valid "type" parameter values
### see: https://kwb-r.github.io/kwb.qmra/reference/create_random_distribution.html
type = "log10_norm",
number_of_repeatings = 1000, 
number_of_events = 365,
dbg = TRUE) {

metadata <- inflow_metadata %>% 
  dplyr::mutate(row_id = 1:dplyr::n()) %>%
    dplyr::mutate(distribution_type = type)

percentiles_list <- lapply(seq_len(nrow(inflow_metadata)), FUN = function(i) {
sel_dat <- inflow_metadata[i, ]  

print(sprintf("Water source: %s, Pathogen: %s, Reference: %s, Min: %f, Max: %f", 
              stringi::stri_enc_toutf8(sel_dat$WaterSourceName), 
              stringi::stri_enc_toutf8(sel_dat$PathogenName), 
              stringi::stri_enc_toutf8(sel_dat$ReferenceName), 
              sel_dat$min, 
              sel_dat$max))
inflow <- kwb.qmra::create_random_distribution(type = type,
                                     number_of_repeatings = number_of_repeatings,
                                     number_of_events = number_of_events,
                                     min = sel_dat$min, 
                                     max = sel_dat$max, 
                                     debug = dbg)

## Go ahead even if errors occur
try(tibble::enframe(quantile(inflow$events$values, probs = seq(0, 1, by = 0.01))))

}) 

dplyr::bind_rows(percentiles_list, .id = "row_id") %>% 
dplyr::rename(percentile = name) %>% 
dplyr::mutate(percentile = as.numeric(stringr::str_remove(.data$percentile, "%")), 
                                           distribution_type = type)


}

3.2 Define Plot Function

plot_percentiles <- function(inflow_stats, inflow_metadata) {


n <- unique(inflow_stats$row_id)
for (i in seq_along(n)) {
 sel_inflow_stats <- inflow_stats[inflow_stats$row_id == i,]
 metadata <- inflow_metadata[inflow_metadata$row_id == i, ]
 gg <- ggplot(sel_inflow_stats, aes_string(x = "percentile", 
                                    y = "value", 
                                    col = "distribution_type")) +
  ggplot2::scale_y_log10() +
  labs(title = sprintf("%s (water source: %s):\nmin = %f, max = %f",              
                       stringi::stri_enc_toutf8(metadata$PathogenName), 
                       stringi::stri_enc_toutf8(metadata$WaterSourceName),
                       metadata$min, 
                       metadata$max),
       subtitle = sprintf("Reference: [%s](%s)", 
                          stringi::stri_enc_toutf8(metadata$ReferenceName), 
                          stringi::stri_enc_toutf8(metadata$ReferenceLink)),
       x = "Percentile", 
       y = "Pathogens Per Litre") +
  geom_line() +
  theme_bw()
print(gg)
}
}

3.4 Run Checking Function And Plot

inflow_stats_list <- lapply(c("log10_norm", "log10_uniform", "lognorm", "norm", "uniform", "triangle"), 
                            function(type) {
                              check_inflow_distribution(inflow_metadata, 
                                          type = type)
                              })

inflow_stats_list_df <- dplyr::bind_rows(inflow_stats_list)  

plot_percentiles(inflow_stats_list_df, inflow_metadata)

3.5 Export Distribution Plots to PDF

The plots shown above are also available in a PDF document which can be downloaded by clicking on the link: inflow_test_random_distributions.pdf

pdff <- kwb.utils::preparePdf("inflow_test_random_distributions.pdf")
plot_percentiles(inflow_stats_list_df, inflow_metadata)
dev.off()
#kwb.utils::finishAndShowPdf(pdff)

4 Session Info

For reproducibility

sessioninfo::session_info()


KWB-R/kwb.qmra documentation built on June 15, 2021, 11:14 p.m.