Nothing
#' Download EA Catchment Data
#' @description Downloads classification data from EA Catchment Data
#' Explorer site. Data can be downloaded by specifying waterbody id
#' (\code{WBID}), Management Catchment (\code{MC}), Operational
#' Catchment (\code{OC}) or River Basin District (\code{RBD}).
#' Start year (\code{startyr}) and end year (\code{endyr}) allow
#' specific timeranges to be downloaded. This is not exported as it is
#' called by \code{\link{get_status}}.
#' For Management Catchment (\code{MC}), Operational
#' Catchment (\code{OC}) or River Basin District (\code{RBD}) level
#' downloads, waterbody \code{Type} can also be specified to allow
#' extraction of specific waterbody types (River, Lake etc).
#
#' @param ea_name A string representing the description (name) of the
#' features to be extracted. For example to extract data for the whole of
#' the Humber RBD, this would be "Humber"; also see examples. Must be an
#' exact match to the values used in the EA database.
#' Use the \code{\link{search_names}} function to search for specific values.
#'
#' @param column The column to be searched. Possible options are
#' \code{WBID} (waterbody id), \code{OC} (Operational Catchment), \code{MC}
#' (Management Catchment) and \code{RBD} (River Basin District)
#'
#' @param data_type The type of data to be retrieved, either status
#' classification ("class"), Reasons of Not Acheving Good ("rnag"),
#' objectives ("objectives"), measures ("measures") or Protected
#' Areas ("pa").
#'
#' @return An object of class \code{cde_df} containing the classifcation,
#' RNAG, measures, objectives or protected areas for the specified
#' combination of column and value.
#'
#' @noRd
download_cde <- function(ea_name = NULL, column = NULL, data_type=NULL) {
# do search to make sure that name is present
search_names(ea_name, column)
# this gives either index number (RBD, MC, OC) or wbid for next bit
index<-find_index(column, ea_name)
# do download using either zip or plain fread depending on type
if (column=="RBD" | column=="MC"){
cde_data <- zip_download(set_url(column, data_type, index))
} else{
cde_data <- tryCatch(data.table::fread(set_url(column, data_type, index),
showProgress = TRUE, header = TRUE, stringsAsFactors = FALSE,
check.names=TRUE, data.table=FALSE),
warning=function(w){})
}
# substitute . for _ in all column names
names(cde_data)<-gsub(".", "_", names(cde_data), fixed=TRUE)
# convert all to lower case
names(cde_data)<-tolower(names(cde_data))
return(cde_data)
} # end of function
#' Set end URL
#' @description Sets the final part of the download URL to the correct
#' string depending on data type to be downloaded (except for WBIDs).
#
#' @param data_type A string representing the type of data (class, rnag,
#' measures, pa or objectives) to be downloaded.
#'
#' @noRd
set_end_url<-function(data_type){
switch(data_type,
"class" = "/classification?item=all&status=all&format=csv",
"rnag" = "/ReasonsForNotAchievingGood?item=all&format=csv",
"measures" = "/Action?format=csv",
"pa" = "/pa/csv",
"objectives" = "/outcome?item=all&status=all&format=csv")
}
#' Set end URL for WBID level downloads (different format to rest)
#' @description Sets the final part of the download URL to the correct
#' string depending on data type to be downloaded for WBID level downloads.
#
#' @param data_type A string representing the type of data (class, rnag,
#' measures, objectives or pa) to be downloaded.
#'
#' @param index Either an integer representing the catchment number on
#' the EA site (RBD, MC or OC downloadas) or a string of the WBID (for
#' waterbody downloads).
#'
#' @noRd
wbid_end_url <- function(data_type, index){
switch(data_type,
"class" = paste0("data/classification.csv?waterBody=", index, "&_view=csv"),
"rnag" = paste0("data/reason-for-failure.csv?waterBody=", index, "&_view=csv"),
"pa" = paste0("WaterBody/", index, "/pa/csv"),
"objectives" = paste0("so/WaterBody/", index, "/objective-outcomes.csv?_view=csv"))
}
#' Set overall URL for downloads
#' @description Derive the overall download URL for all types (columns).
#'
#' @param column A string representing the column type to be downloaded.
#
#' @param data_type A string representing the type of data (class, rnag,
#' measures, objectives or pa) to be downloaded.
#'
#' @param index Numeric index of RBD/OC/MC to be downloaded or for
#' WBID downloads the WBID code.
#'
#' @noRd
set_url<-function(column, data_type, index){
start_url<-"http://environment.data.gov.uk/catchment-planning/"
switch(column,
"RBD"=paste0(start_url, "RiverBasinDistrict/", index, set_end_url(data_type)),
"MC"=paste0(start_url, "ManagementCatchment/", index, set_end_url(data_type)),
"OC"=paste0(start_url, "OperationalCatchment/", index, set_end_url(data_type)),
"WBID"=paste0(start_url, wbid_end_url(data_type, index))
)
}
#' Find column index number
#' @description Find column index number (or WBID) for setting up
#' download url.
#'
#' @param column A string representing the column type to be downloaded.
#'
#' @param ea_name A string representing the name of the catchment or
#' WBID for individual waterbodies to be downloaded.
#'
#' @noRd
find_index<-function(column, ea_name){
switch(column,
"RBD"=ea_wbids$RBD_num[which(ea_wbids[, column] == ea_name)][1],
"MC"=ea_wbids$MC_num[which(ea_wbids[, column] == ea_name)][1],
"OC"=ea_wbids$OC_num[which(ea_wbids[, column] == ea_name)][1],
"WBID"=ea_wbids$WBID[which(ea_wbids[, column] == ea_name)][1]
)
}
#' Download Zipfile and extract csv
#' @description Downloads zipfile from specified url, unzips to
#' csv file and reads csv into dataframe.
#'
#' @importFrom utils download.file
#' @importFrom utils unzip
#' @importFrom data.table fread
#
#' @param download_url A string representing the url to download the
#' zip file from.
#'
#' @noRd
zip_download <- function(download_url) {
temp <- tempfile()
utils::download.file(download_url, temp, mode = "wb", quiet=FALSE)
# extract data from zipfile to df using data.table to speed things up
csvfile <- utils::unzip(temp, junkpaths = TRUE)
message(paste0("Processing data from zipfile...","\n"))
cde_data <- data.table::fread(csvfile, stringsAsFactors = FALSE,
check.names = TRUE, data.table = FALSE, showProgress = TRUE)
# delete the intermediate files
unlink(temp)
unlink(csvfile)
return(cde_data)
} # end of function
#' Check common arguments to functions
#' @description Checks the ea_name, year ranges and waterbody type
#' for all functions
#
#' @param ea_name A string representing the description (name) of the
#' features to be extracted. For example to extract data for the whole of
#' the Humber RBD, this would be "Humber"; also see examples. Must be an
#' exact match to the values used in the EA database.
#' Use the \code{\link{search_names}} function to search for specific values.
#'
#' @param column The column to be searched. Possible options are
#' \code{WBID} (waterbody id), \code{OC} (Operational Catchment), \code{MC}
#' (Management Catchment) and \code{RBD} (River Basin District)
#'
#' @param startyr The data can be extracted for specific years using the
#' \code{startyr} and \code{endyr} arguments. If only \code{startyr} is
#' specified this extracts for a particular year. If no years are specified
#' all years are returned.
#'
#' @param endyr The data can be extracted for specific years using the
#' \code{startyr} and \code{endyr} arguments. The \code{endyr} should
#' only be specified if \code{startyr} is also included, otherwise an
#' error is returned.
#'
#' @param type Type of waterbody to be extracted. For Operational/Management
#' catchment level or RBD level queries, the data can also be subset by
#' waterbody type. Possible values are \code{River}, \code{Lake},
#' \code{GroundWaterBody}, \code{TransitionalWater} or \code{CoastalWater}.
#'
#' @noRd
check_args <- function(ea_name = NULL, column = NULL, startyr = NULL,
endyr = NULL, type = NULL) {
# check that both ea_name and column are present
if (is.null(ea_name) | is.null(column)) {
stop("Both ea_name (name) and column (\"WBID\", \"MC\", \"OC\",
or \"RBD\") should be specified", "\n")
}
# are years, if present, numeric?
if (!is.null(startyr) & !is.null(endyr)) {
if (!is.numeric(startyr) | !is.numeric(endyr)) {
stop("Please enter numeric values for the starting and ending years")
}
}
# if there is a startyr set
if (!is.null(startyr)) {
if (startyr < 2009) {
stop("Starting year cannot be before 2009")
}
# if there is an end year alsoset
if (!is.null(endyr)) {
# check values make sense
if (!endyr >= startyr) {
stop("End year is before start year: please correct.")
}
}
}
# catch when only endyr is set
if (is.null(startyr) & !is.null(endyr)){
stop("Only end year specified, also needs start year.")
}
# check that the waterbody type is a valid choice
if (!is.null(type)) {
types <- c("River", "Lake", "TransitionalWater", "GroundWaterBody",
"CoastalWater")
if (!type %in% types) {
stop("Type specified is not a valid choice (\"River\", \"Lake\",
\"CoastalWater\", \"TransitionalWater\" or \"GroundWaterBody\"")
}
}
}
# end of function
#' Subset data as required
#' @description Subsets data by year, year range, classification level
#' and waterbody type as required
#'
#' @param full_data The dataframe to be subset
#'
#' @param column The column to be searched. Possible options are
#' \code{WBID} (waterbody id), \code{OC} (Operational Catchment), \code{MC}
#' (Management Catchment) and \code{RBD} (River Basin District)
#'
#' @param level The level within the WFD quality status classification to be
#' extracted. Defaults to 'Overall Water Body'. See \code{\link{get_status}}.
# for possible values.
#'
#' @param startyr The data can be extracted for specific years using the
#' \code{startyr} and \code{endyr} arguments. If only \code{startyr} is
#' specified this extracts for a particular year. If no years are specified
#' all years are returned.
#'
#' @param endyr The data can be extracted for specific years using the
#' \code{startyr} and \code{endyr} arguments. The \code{endyr} should
#' only be specified if \code{startyr} is also included.
#'
#' @param type Type of waterbody to be extracted. For Operational/Management
#' catchment level or RBD level queries, the data can also be subset by
#' waterbody type. Possible values are \code{River}, \code{Lake},
#' \code{GroundWaterBody}, \code{TransitionalWater} or \code{CoastalWater}.
#'
#' @param data_type Type of data to be subset ("class", "obj" or "rnag"). If
#' "obj" then don't do subsetting to most recent as this is not appropriate.
#'
#' @return A data frame that has been subsetted by the
#' specified combination of column, value, level and dates.
#'
#' @noRd
#'
subset_data <- function(full_data, column = NULL,
level = "Overall Water Body", startyr = NULL, endyr = NULL, type = NULL, data_type=NULL) {
# if start year is set beyond the data range and not objectives
# (only value being passed in to data_type) then set to max and ignore endyr
if (!is.null(startyr) & is.null(data_type)){
if (startyr>max(full_data$year)){
message(paste0("Start year (", startyr, ") is beyond the most recent year of data (",
max(full_data$year),")"))
message("Just outputting most recent year")
startyr<-max(full_data$year)
# as only outputting most recent year, set endyr to null
if(!is.null(endyr)){
endyr<-NULL
}
}
}
# if endyr is set, is it beyond the data range?
if (!is.null(endyr)){
if (endyr>max(full_data$year)){
message(paste0("End year (", endyr, ") is beyond the most recent year of data (",
max(full_data$year),")"))
message("Subsetting to most recent year")
endyr<-max(full_data$year)
}
}
if (!is.null(startyr)) {
if(is.null(endyr)){
# if only single year specified, just take this
full_data <- full_data[full_data$year == startyr, ]
}else{
# if both years are specified, subset by range
full_data <- full_data[full_data$year >= startyr
& full_data$year <= endyr, ]
}
}
# level subsetting, defaults to "Overall Water Body"
# for Chemical and Supporting Elements levels, need to deal with options for
# surface waters and groundwaters
if (!is.null(level)){
if (level == "Chemical") {
full_data <- full_data[full_data$classification_item == "Chemical" |
full_data$classification_item == "Chemical (GW)", ]
}
else if (level == "Supporting elements") {
full_data <- full_data[full_data$classification_item ==
"Supporting elements (Surface Water)" |
full_data$classification_item ==
"Supporting elements (Groundwater)", ]
}
else {
full_data <- full_data[full_data$classification_item == level, ]
}
}
# now water_body_type
if (!is.null(type)) {
full_data <- full_data[full_data$water_body_type == type, ]
}
# if year range covers 2013 and 2014, subset to just include cycle 2 data
# avoids double counting of waterbodies
if ("cycle" %in% names(full_data)) {
full_data <- full_data[!(full_data$year == 2013 & full_data$cycle == 1 |
full_data$year == 2014 & full_data$cycle == 1), ]
}
return(full_data)
} # end of function
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.