knitr::opts_chunk$set( collapse = TRUE, eval = TRUE, comment = "#>" ) chk_online <- FALSE chk_stations <- FALSE chk_ts <- FALSE
This vignette shows how to create a tidy data frame for the stations' and time series' data from Hydroscope. We will use the Hydroscope's sub-domains kyy
, ypaat
and emy
, because their servers are maintened by the National Technical University Of Athens and work seamlessly.
This vignette requires an internet connection to run and that the Hydroscope's subdomains to be online. We can check if the sub-domains are online using the pingr
package:
library(pingr) # helper function to check if a sub-domain is online online <- function(h_url){ !is.na(pingr::ping(h_url, count = 1)) } # check if sub-domains are online kyy <- online("kyy.hydroscope.gr") emy <- online("emy.hydroscope.gr") ypaat <- online("ypaat.hydroscope.gr") chk_online <- kyy & emy & ypaat print(paste("All sub-domains are online:", chk_online))
Note that the following chucks will be evaluated only if all sub-domains are online and if the retrieved data have some expected variables. Downloading will take some time, depending on your internet connection and the Hydroscope's servers traffic.
With the following code we can download Hydroscope's sub-domains' data to a named list.
library(hydroscoper) library(tibble) library(plyr) subdomain <- c("kyy", "ypaat", "emy") # download all databases hydro_data <- lapply(subdomain, function(x) get_database(x)) names(hydro_data) <- subdomain
First of all, we must check if there are specific variables at the sub-domain's stations' data, because we will merge these data to a new data frame.
# check if all expected variables exist in all subdomains stations data vars <- c("id", "name", "owner", "point", "altitude", "water_basin", "water_division") chk1 <- laply(subdomain, function(id) { all(vars %in% names(hydro_data[[id]]$stations)) }) # check water_basin, water_division and owner variables vars2 <- c("id", "name") chk2 <- laply(subdomain, function(id) { all((vars2 %in% names(hydro_data[[id]]$water_basin)) & (vars2 %in% names(hydro_data[[id]]$water_division)) & (vars2 %in% names(hydro_data[[id]]$owner)) )}) chk_stations <- all(chk1) & all(chk2) print(paste("All expected variables exist in stations' related data:", chk_stations))
With the following code we will create a tidy data frame with the stations data. Note that the Greek terms are translated to English.
stations <- ldply(subdomain, function(id){ tmp <- hydro_data[[id]] # extract dataframes to join wbas <- tmp$water_basins[c("id", "name")] wdiv <- tmp$water_divisions[c("id", "name")] owners <- tmp$owners[c("id", "name")] # remove area from water_basin wbas$name <- laply(wbas$name, function(y) gsub("\\([^()]*\\)", "", y)) # translate names wdiv$name <- hydro_translate(wdiv$name, "division") owners$name <- hydro_translate(owners$name, "owner") # rename dataframes variables names(wbas) <- c("water_basin", "water_basin_name") names(wdiv) <- c("water_division", "water_division_name") names(owners) <- c("owner", "owner_name") # merge data res <- merge(tmp$stations, wbas, by = "water_basin", all.x = TRUE) res <- merge(res, wdiv, by = "water_division", all.x = TRUE) res <- merge(res, owners, by = "owner", all.x = TRUE) # create coords coords <- hydro_coords(res$point) # create a data frame with all the data data.frame(station_id = as.integer(res$id), name = as.character(res$name), water_basin = res$water_basin_name, water_division = res$water_division_name, owner = res$owner_name, longitude = as.numeric(coords$long), latitude = as.numeric(coords$lat), altitude = as.numeric(res$altitude), subdomain = rep(id, nrow(res)), stringsAsFactors = FALSE) }) stations <- as_tibble(stations) stations
Similarly, we will check if there are specific variables at each sub-domain's time series' data.
# check if variables exists in timeseries data vart <- c("id", "gentity", "start_date_utc", "end_date_utc", "variable", "time_step", "unit_of_measurement") chk3 <- laply(subdomain, function(id) { all(vart %in% names(hydro_data[[id]]$timeseries)) }) # check if variables exists in hydrometeorological variables and timesteps vart2 <- c("id", "descr") chk4 <- laply(subdomain, function(id) { all(vart2 %in% names(hydro_data[[id]]$variables)) & all(vart2 %in% names(hydro_data[[id]]$time_steps)) }) # check if variables exists in unit_of_measurement vart3 <- c("id", "symbol") chk5 <- laply(subdomain, function(id) { all(vart3 %in% names(hydro_data[[id]]$units)) }) chk_ts <- all(chk3) & all(chk4) & all(chk5) print(paste("All expected variables exist in time series' data:", chk_ts))
With the following code we will, also, create a tidy data frame with the time series data with translated terms.
# create stations' dataframe timeseries <- ldply(subdomain, function(id){ tmp <- hydro_data[[id]] # create data frames to join var_names <- tmp$variables[c("id", "descr")] ts_names <- tmp$time_steps[c("id", "descr")] ts_units <-tmp$units[c("id", "symbol")] # translate names var_names$descr <- hydro_translate(var_names$descr, "variable") ts_names$descr <- hydro_translate(ts_names$descr, "timestep") # rename dataframes variables names(var_names) <- c("variable", "variable_name") names(ts_names) <- c("time_step", "time_step_name") names(ts_units) <- c("unit_of_measurement", "unit_of_measurement_name") # merge data res <- merge(tmp$timeseries, var_names, by = "variable", all.x = TRUE) res <- merge(res, ts_names, by = "time_step", all.x = TRUE) res <- merge(res, ts_units, by = "unit_of_measurement", all.x = TRUE) data.frame( "time_id" = as.integer(res$id), "station_id" = as.integer(res$gentity), "variable" = as.character(res$variable_name), "timestep" = as.character(res$time_step_name), "units" = as.character(res$unit_of_measurement_name), "start_date" = as.character(res$start_date_utc), "end_date" = as.character(res$end_date_utc), "subdomain" = rep(id, nrow(res)), stringsAsFactors = FALSE) }) timeseries <- as_tibble(timeseries) timeseries
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.