options(htmltools.dir.version = FALSE) # Copyright 2018 Province of British Columbia # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. knitr::opts_chunk$set( collapse = TRUE, echo = FALSE, comment = "#>", fig.path = "graphics/prod/figs" ) options(scipen = 10)
library(tidyhydat) library(knitr) library(tidyverse) library(lubridate) library(corrr) library(leaflet) library(sf) library(mapview)
class: inverse background-image: url(https://upload.wikimedia.org/wikipedia/commons/3/3e/Clearwater_River_Wells_Gray_Park.jpg) background-size: cover
.VeryLarge[ - Common Analysis Problems - What is R and why use it? - What is tidyhydat? - Some R basics - An example of how R can help - Leveraging R and what I'm not showing you - Where to get help - Questions ]
class: inverse, center, middle
class: center, basic
include_graphics("graphics/ec_data_explorer2.gif")
class: basic, center
--
class: basic, center
--
--
class: inverse, left, middle
.pull-left[ (or more generally any programmatic code based analysis approach...) ]
.pull-left[
.large[ - Free and open source - Statistical programming language - Publication quality graphics - But definitely not intimidating... ] ]
-- .pull-right[
.large[ - Efficient - Reproducible - Scalable ] ]
--
.large[ - Are your methods reproducible? - What is your analysis recipe? - Can you share it? ]
class:basic
| R | Excel | |-------------------------------------------|--------------------------------------------------------| | Data and analysis are separate | Data and analysis are usually stored in the same place |
.footnote[ From: http://blog.yhat.com/posts/R-for-excel-users.html. ]
class:basic
| R | Excel | |-------------------------------------------|--------------------------------------------------------| | Data and analysis are separate | Data and analysis are usually stored in the same place | | Data structure is strict | Data structure is flexible |
.footnote[ From: http://blog.yhat.com/posts/R-for-excel-users.html. ]
class:basic
| R | Excel | |-------------------------------------------|--------------------------------------------------------| | Data and analysis are separate | Data and analysis are usually stored in the same place | | Data structure is strict | Data structure is flexible | | Operations are achieved through scripting | Operations are achieved through pointing and clicking |
.footnote[ From: http://blog.yhat.com/posts/R-for-excel-users.html. ]
class:basic
| R | Excel | |-------------------------------------------|--------------------------------------------------------| | Data and analysis are separate | Data and analysis are usually stored in the same place | | Data structure is strict | Data structure is flexible | | Operations are achieved through scripting | Operations are achieved through pointing and clicking | | Iteration is automated | Iteration is usually done by hand |
.footnote[ From: http://blog.yhat.com/posts/R-for-excel-users.html. ]
class:basic
The objective of tidyhydat is to provide a standard method of accessing ECCC hydrometric data sources (historical and real time) using a consistent and easy to use interface that employs tidy data principles within the R project.
--
stns <- hy_stations() %>% filter(HYD_STATUS == "ACTIVE") st_as_sf(stns, coords = c("LONGITUDE","LATITUDE"), crs = 4326, agr= "constant") %>% mapview(zcol = "STATION_NAME", legend = FALSE, map.types = "Esri.WorldImagery", cex = 4, popup = popupTable(., zcol = c("STATION_NUMBER", "STATION_NAME", "PROV_TERR_STATE_LOC"))) #leaflet(data = stns) %>% # addTiles() %>% # addMarkers(~LONGITUDE, ~LATITUDE, label=~as.character(STATION_NAME), clusterOptions = markerClusterOptions()) %>% # # setView(-96, 63, zoom = 3)
Tidy datasets are all alike but every messy dataset is messy in its own way1
--
.footnote[ [1] Wickham, Hadley. 2014. Tidy Data. Journal of Statistical Software 59 (10). Foundation for Open Access Statistics: 1–23. ]
src <- hy_src() tbl(src, "DLY_FLOWS") %>% filter(STATION_NUMBER == "08MF005") %>% select(-contains("_SYMBOL"), )
hy_daily_flows(station_number = "08MF005")
--
class: inverse, center, middle
class: basiclh
=SUM(A1:A23) =AVERAGE(A1:A23)
class: basiclh
flows_data <- hy_daily_flows(station_number = c("08MF005","09CD001","05KJ001","02KF005"))
<-
: assignment operatorflows_data
: objecthy_daily_flows
: functionstation_number
: argumentflows_data
class: basiclh, center
stns_tbl <- hy_stations(c("08MF005","09CD001","05KJ001","02KF005"))[,c("STATION_NUMBER", "STATION_NAME")] x <- stns_tbl %>% rename(`Station Name`=STATION_NAME, `Station Number`=STATION_NUMBER) %>% knitr::kable(format = 'html') %>% kableExtra::kable_styling(bootstrap_options = c("striped", "hover")) gsub("<thead>.*</thead>", "", x)
stns <- hy_stations(stns_tbl$STATION_NUMBER) st_as_sf(stns, coords = c("LONGITUDE","LATITUDE"), crs = 4326, agr= "constant") %>% mapview(zcol = "STATION_NAME", legend = FALSE, cex = 6, popup = popupTable(., zcol = c("STATION_NUMBER", "STATION_NAME", "PROV_TERR_STATE_LOC")))
flows_data
flows_data
: objectflows_data %>% spread(key = STATION_NUMBER, value = Value) #<<
%>%
: "then"spread
: functionflows_data %>% spread(key = STATION_NUMBER, value = Value) %>% select(-Date, -Symbol, -Parameter) #<<
select
: functionflows_data %>% spread(key = STATION_NUMBER, value = Value) %>% select(-Date, -Symbol, -Parameter) %>% correlate() #<<
correlation
: functionflows_data %>% spread(key = STATION_NUMBER, value = Value) %>% select(-Date, -Symbol, -Parameter) %>% correlate() %>% stretch() #<<
stretch
: functionstns <- hy_stations(prov_terr_state_loc = "NU") %>% filter(HYD_STATUS == "ACTIVE") st_as_sf(stns, coords = c("LONGITUDE","LATITUDE"), crs = 4326, agr= "constant") %>% mapview(zcol = "STATION_NAME", legend = FALSE, popup = popupTable(., zcol = c("STATION_NUMBER", "STATION_NAME", "PROV_TERR_STATE_LOC", "HYD_STATUS")))
stns <- hy_stations(prov_terr_state_loc = "NU") %>% filter(HYD_STATUS == "ACTIVE") nu_flows <- hy_daily_flows(station_number = stns$STATION_NUMBER) nu_flows
nu_flows %>% #<< spread(STATION_NUMBER, Value) %>% select(-Date, -Symbol, -Parameter) %>% correlate() %>% stretch()
tidyhydat
?.Large[ - Instantaneous peaks - Daily, monthly and yearly temporal summaries - Discharge, level, sediment, particle size - Data ranges - Station metadata ]
tidyhydat
?search_stn_name("fraser")
include_graphics("graphics/wateroffice.gif")
tidyhydat
?realtime_plot("08MF005", Parameter = "Flow")
raw_stns <- hy_stations() %>% select(STATION_NUMBER:PROV_TERR_STATE_LOC, DRAINAGE_AREA_GROSS) mad_long_avg <- hy_annual_stats(raw_stns$STATION_NUMBER) %>% filter(Sum_stat == "MEAN", Parameter == "Flow") %>% group_by(STATION_NUMBER) %>% summarise(Value = mean(Value, na.rm = TRUE)) %>% right_join(raw_stns) mad_long_avg #<<
library(ggplot2) ggplot(mad_long_avg,aes(x = Value, y = DRAINAGE_AREA_GROSS, colour = PROV_TERR_STATE_LOC)) + geom_point() + scale_y_continuous(trans = "log10") + scale_x_continuous(trans = "log10") + scale_colour_viridis_d(name = "Jurisdiction") + labs(x = "Mean long term annual discharge (m^3)", y = "Gross drainage area (km^2)") + theme_minimal()
tidyhydat
https://github.com/ropensci/tidyhydat
Any contribution helps. You don't have to be an R programmer!
.pull-left[ - Questions - Ideas / Feature-requests - Bugs - Bug-fixes - Development ] .pull-right[
Authors@R: c(person("Sam", "Albers",email = "sam.albers@gov.bc.ca", role = c("aut", "cre")), person("David", "Hutchinson", email = "david.hutchinson@canada.ca", role = "ctb"), #<< person("Dewey", "Dunnington", email = "dewey@fishandwhistle.net", role = "ctb"), #<< person("Province of British Columbia", role = "cph"))
class: inverse, center
Installing R & RStudio with local package libraries
-https://github.com/bcgov/bcgov-data-science-resources/wiki/Installing-R-&-RStudio
Installing tidyhydat
-https://cran.rstudio.com/web/packages/tidyhydat/README.html
Getting started with tidyhydat
-https://cran.rstudio.com/web/packages/tidyhydat/vignettes/tidyhydat_an_introduction.html -https://cran.rstudio.com/web/packages/tidyhydat/vignettes/tidyhydat_example_analysis.html
BC Gov data science resource wiki
-https://github.com/bcgov/bcgov-data-science-resources/wiki
class: basic background-image: url(https://media.giphy.com/media/TnDoEoXfT7YoE/giphy.gif) background-size: cover
.content-box-blue[ Slides available from
-https://github.com/ropensci/tidyhydat/blob/master/presentations/tidyhydat_intro.pdf -https://github.com/ropensci/tidyhydat/blob/master/presentations/tidyhydat_intro.Rmd
Contact sam.albers@gov.bc.ca ]
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.