knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE ) library(watina) library(dplyr) library(knitr)
General note: the below vignette contains frozen output of 2 Oct 2019. This makes it possible to build the package with vignettes without access to the Watina database.
Let's have a look at how chemical data, retrieved from Watina, look like. The below example requests available hydrochemical data since 2010 from locations in the area 'Zwarte Beek':
watina <- connect_watina() mydata <- get_locs(watina, area_codes = "ZWA") %>% get_chem(watina, "1/1/2010") mydata %>% count #> # Source: lazy query [?? x 1] #> # Database: Microsoft SQL Server #> n #> <int> #> 1 1916 class(mydata) #> [1] "tbl_Microsoft SQL Server" "tbl_dbi" #> [3] "tbl_sql" "tbl_lazy" #> [5] "tbl" mydata %>% head(15) %>% collect %>% as.data.frame #> loc_code date lab_project_id lab_sample_id chem_variable value #> 1 ZWAP033 2011-04-14 0 10431 Ca 19.690 #> 2 ZWAP033 2011-04-14 0 10431 Cl 19.582 #> 3 ZWAP033 2011-04-14 0 10431 CondF 425.000 #> 4 ZWAP033 2011-04-14 0 10431 CondL 372.000 #> 5 ZWAP033 2011-04-14 0 10431 Fe 60.809 #> 6 ZWAP033 2011-04-14 0 10431 HCO3 25.169 #> 7 ZWAP033 2011-04-14 0 10431 K 2.530 #> 8 ZWAP033 2011-04-14 0 10431 Mg 7.000 #> 9 ZWAP033 2011-04-14 0 10431 Na 5.330 #> 10 ZWAP033 2011-04-14 0 10431 N-NH4 0.318 #> 11 ZWAP033 2011-04-14 0 10431 N-NO2 0.015 #> 12 ZWAP033 2011-04-14 0 10431 N-NO3 0.041 #> 13 ZWAP033 2011-04-14 0 10431 pHF 6.297 #> 14 ZWAP033 2011-04-14 0 10431 pHL 5.830 #> 15 ZWAP033 2011-04-14 0 10431 P-PO4 0.016 #> unit below_loq loq elneutr #> 1 mg/l NA -99 0.610484 #> 2 mg/l NA -99 0.610484 #> 3 µS/cm NA -99 0.610484 #> 4 µS/cm NA -99 0.610484 #> 5 mg/l NA -99 0.610484 #> 6 mg/l NA -99 0.610484 #> 7 mg/l NA -99 0.610484 #> 8 mg/l NA -99 0.610484 #> 9 mg/l NA -99 0.610484 #> 10 mg/l NA -99 0.610484 #> 11 mg/l NA -99 0.610484 #> 12 mg/l NA -99 0.610484 #> 13 <NA> NA -99 0.610484 #> 14 <NA> NA -99 0.610484 #> 15 mg/l NA -99 0.610484
The first and last year of the dataset are:
mydata %>% pull(date) %>% lubridate::year(.) %>% (function(x) c(firstyear = min(x), lastyear = max(x))) #> firstyear lastyear #> 2011 2019
Let's suppose that we now want to select locations for which:
To express criteria, we need the numerical value of a date:
as.numeric(lubridate::dmy("1/1/2014")) #> [1] 16071
We can store specific conditions in a dataframe; the available statistics are explained by the documentation of eval_chem()
:
conditions_df <- tribble( ~chem_variable, ~statistic, ~criterion, ~direction, "N-NO3", "nrdates", 2, "min", "P-PO4", "nrdates", 2, "min", "P-PO4", "firstdate", 16071, "max", "P-PO4", "timespan_years", 5, "min" ) conditions_df %>% kable
You can also separately limit the chemical variables to be evaluated with the chem_var
argument in the selectlocs_chem()
function:
myresult <- mydata %>% selectlocs_chem(data_type = "data", chem_var = c("N-NO3", "P-PO4"), conditions = conditions_df) myresult #> # A tibble: 18 x 1 #> loc_code #> <chr> #> 1 ZWAP034 #> 2 ZWAP041 #> 3 ZWAP042 #> 4 ZWAP051 #> 5 ZWAP063 #> 6 ZWAP064 #> 7 ZWAP067 #> 8 ZWAP129 #> 9 ZWAP165 #> 10 ZWAP170 #> 11 ZWAP196 #> 12 ZWAP205 #> 13 ZWAP208 #> 14 ZWAP214 #> 15 ZWAP215 #> 16 ZWAP216 #> 17 ZWAP220 #> 18 ZWAP221
With the argument list = TRUE
you can also obtain intermediate test results.
For further information, see the documentation of the selectlocs_chem()
function.
The get_chem()
function in the above example retrieved hydrochemical data, from given locations, as a lazy object (unless collect = TRUE
).
A timeframe is used to filter hydrochemical data in Watina (with arguments startdate
and enddate
).
By default, the enddate
argument is set as today.
Here we only provide the startdate
:
mylocs <- get_locs(watina, area_codes = "ZWA") mylocs %>% get_chem(watina, "1/1/2017") #> # Source: lazy query [?? x 10] #> # Database: Microsoft SQL Server #> # Ordered by: area_code, loc_code, obswell_rank, area_code, loc_code, #> # loc_code, date, chem_variable #> loc_code date lab_project_id lab_sample_id chem_variable value #> <chr> <date> <chr> <chr> <chr> <dbl> #> 1 ZWAP034 2018-10-29 0 40121 Al 0.05 #> 2 ZWAP034 2018-10-29 0 40121 Ca 4.24 #> 3 ZWAP034 2018-10-29 0 40121 Cl 14.4 #> 4 ZWAP034 2018-10-29 0 40121 CondF 135 #> 5 ZWAP034 2018-10-29 0 40121 CondL 120. #> 6 ZWAP034 2018-10-29 0 40121 Fe 19.9 #> 7 ZWAP034 2018-10-29 0 40121 HCO3 5.97 #> 8 ZWAP034 2018-10-29 0 40121 K 1.98 #> 9 ZWAP034 2018-10-29 0 40121 Mg 1.36 #> 10 ZWAP034 2018-10-29 0 40121 Mn 0.05 #> # … with more rows, and 4 more variables: unit <chr>, below_loq <lgl>, #> # loq <dbl>, elneutr <dbl>
Retrieving the data locally:
mylocs %>% get_chem(watina, "1/1/2017", collect = TRUE) #> # A tibble: 608 x 10 #> loc_code date lab_project_id lab_sample_id chem_variable value #> <chr> <date> <chr> <chr> <chr> <dbl> #> 1 ZWAP034 2018-10-29 0 40121 Al 0.05 #> 2 ZWAP034 2018-10-29 0 40121 Ca 4.24 #> 3 ZWAP034 2018-10-29 0 40121 Cl 14.4 #> 4 ZWAP034 2018-10-29 0 40121 CondF 135 #> 5 ZWAP034 2018-10-29 0 40121 CondL 120. #> 6 ZWAP034 2018-10-29 0 40121 Fe 19.9 #> 7 ZWAP034 2018-10-29 0 40121 HCO3 5.97 #> 8 ZWAP034 2018-10-29 0 40121 K 1.98 #> 9 ZWAP034 2018-10-29 0 40121 Mg 1.36 #> 10 ZWAP034 2018-10-29 0 40121 Mn 0.05 #> # … with 598 more rows, and 4 more variables: unit <chr>, #> # below_loq <lgl>, loq <dbl>, elneutr <dbl>
Note below, the different number of results when using non-default settings for filtering according to electroneutrality!
The default range of electroneutrality is from -0.1 to 0.1 (argument en_range
).
mylocs %>% get_chem(watina, "1/1/2017") %>% count #> # Source: lazy query [?? x 1] #> # Database: Microsoft SQL Server #> n #> <int> #> 1 608 mylocs %>% get_chem(watina, "1/1/2017", en_exclude_na = TRUE) %>% count #> # Source: lazy query [?? x 1] #> # Database: Microsoft SQL Server #> n #> <int> #> 1 602 mylocs %>% get_chem(watina, "1/1/2017", en_range = c(-1, 1)) %>% count #> # Source: lazy query [?? x 1] #> # Database: Microsoft SQL Server #> n #> <int> #> 1 674
With en_exclude_na = TRUE
, samples for which no electroneutrality could be calculated are discarded!
By default, all measurements from water samples with a high iron / conductivity ratio are returned (hence,
in these cases the en_range
and en_exclude_na
arguments are ignored).
This is controlled by the en_fecond_threshold
argument.
If you want the en_range
and en_exclude_na
arguments to take effect also in these relatively iron-rich water samples,
set en_fecond_threshold = NA
:
mylocs %>% get_chem(watina, "1/1/2017", en_fecond_threshold = NA) %>% count #> # Source: lazy query [?? x 1] #> # Database: Microsoft SQL Server #> n #> <int> #> 1 524
Equivalence concentrations instead of mass concentrations can be returned with conc_type = "eq"
:
mylocs %>% get_chem(watina, "1/1/2017", conc_type = "eq") #> # Source: lazy query [?? x 10] #> # Database: Microsoft SQL Server #> # Ordered by: area_code, loc_code, obswell_rank, area_code, loc_code, #> # loc_code, date, chem_variable #> loc_code date lab_project_id lab_sample_id chem_variable value #> <chr> <date> <chr> <chr> <chr> <dbl> #> 1 ZWAP034 2018-10-29 0 40121 Al 5.00e-2 #> 2 ZWAP034 2018-10-29 0 40121 Ca 2.12e-1 #> 3 ZWAP034 2018-10-29 0 40121 Cl 4.07e-1 #> 4 ZWAP034 2018-10-29 0 40121 CondF 1.35e+2 #> 5 ZWAP034 2018-10-29 0 40121 CondL 1.20e+2 #> 6 ZWAP034 2018-10-29 0 40121 Fe 7.14e-1 #> 7 ZWAP034 2018-10-29 0 40121 HCO3 9.78e-2 #> 8 ZWAP034 2018-10-29 0 40121 K 5.07e-2 #> 9 ZWAP034 2018-10-29 0 40121 Mg 1.12e-1 #> 10 ZWAP034 2018-10-29 0 40121 Mn 5.00e-2 #> # … with more rows, and 4 more variables: unit <chr>, below_loq <lgl>, #> # loq <dbl>, elneutr <dbl>
Joining results to mylocs
:
mylocs %>% get_chem(watina, "1/1/2017") %>% left_join(mylocs %>% select(-loc_wid), .) %>% collect #> Joining, by = "loc_code" #> # A tibble: 765 x 20 #> loc_code area_code area_name x y loc_validitycode loc_validity #> <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> #> 1 ZWAP141 ZWA Zwarte B… 216785 198435 VLD Gevalideerd #> 2 ZWAP141 ZWA Zwarte B… 216785 198435 VLD Gevalideerd #> 3 ZWAP141 ZWA Zwarte B… 216785 198435 VLD Gevalideerd #> 4 ZWAP141 ZWA Zwarte B… 216785 198435 VLD Gevalideerd #> 5 ZWAP141 ZWA Zwarte B… 216785 198435 VLD Gevalideerd #> 6 ZWAP141 ZWA Zwarte B… 216785 198435 VLD Gevalideerd #> 7 ZWAP141 ZWA Zwarte B… 216785 198435 VLD Gevalideerd #> 8 ZWAP141 ZWA Zwarte B… 216785 198435 VLD Gevalideerd #> 9 ZWAP141 ZWA Zwarte B… 216785 198435 VLD Gevalideerd #> 10 ZWAP141 ZWA Zwarte B… 216785 198435 VLD Gevalideerd #> # … with 755 more rows, and 13 more variables: loc_typecode <chr>, #> # loc_typename <chr>, filterdepth <dbl>, soilsurf_ost <dbl>, #> # date <date>, lab_project_id <chr>, lab_sample_id <chr>, #> # chem_variable <chr>, value <dbl>, unit <chr>, below_loq <lgl>, #> # loq <dbl>, elneutr <dbl>
You can characterize the locations of a dataset of hydrochemical data, using the eval_chem()
function.
You are invited to read its documentation and try its examples!
The selectlocs_chem()
function, of which we saw a demonstration above, calls eval_chem()
by itself.
Alternatively the user can provide the result of those functions as input to selectlocs_chem()
.
dbDisconnect(watina)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.