R/hy_monthly_levels.R

Defines functions hy_monthly_levels

Documented in hy_monthly_levels

# Copyright 2017 Province of British Columbia
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.

#' Extract monthly levels information from the HYDAT database
#'
#' Tidy data of monthly river or lake levels information from the DLY_LEVELS HYDAT table. `station_number` and
#'   `prov_terr_state_loc` can both be supplied. If both are omitted all values from the `hy_stations` table are returned.
#'   That is a large vector for `hy_monthly_levels`.
#'
#' @inheritParams hy_stations
#' @param start_date Leave blank if all dates are required. Date format needs to be in YYYY-MM-DD. Date is inclusive.
#' @param end_date Leave blank if all dates are required. Date format needs to be in YYYY-MM-DD. Date is inclusive.
#'
#' @return A tibble of monthly levels. 
#'
#' @format A tibble with 8 variables:
#' \describe{
#'   \item{STATION_NUMBER}{Unique 7 digit Water Survey of Canada station number}
#'   \item{Year}{Year of record.}
#'   \item{Month}{Numeric month value}
#'   \item{Full_month}{Logical value is there is full record from Month}
#'   \item{No_days}{Number of days in that month}
#'   \item{Sum_stat}{Summary statistic being used.} 
#'   \item{Value}{Value of the measurement in metres.}
#'   \item{Date_occurred}{Observation date. Formatted as a Date class. MEAN is a annual summary 
#'   and therefore has an NA value for Date.}
#' }
#'
#' @examples
#' \dontrun{
#' hy_monthly_levels(station_number = c("02JE013","08MF005"), 
#'   start_date = "1996-01-01", end_date = "2000-01-01")
#'
#' hy_monthly_levels(prov_terr_state_loc = "PE")
#'           }
#' @family HYDAT functions
#' @source HYDAT
#' @export



hy_monthly_levels <- function(station_number = NULL,
                           hydat_path = NULL,
                           prov_terr_state_loc = NULL, 
                           start_date =NULL, 
                           end_date = NULL) {

  ## Determine which dates should be queried
  dates_null <- date_check(start_date, end_date)
  
  ## Read in database
  hydat_con <- hy_src(hydat_path)
  if (!dplyr::is.src(hydat_path)) {
    on.exit(hy_src_disconnect(hydat_con), add = TRUE)
  }

  ## Determine which stations we are querying
  stns <- station_choice(hydat_con, station_number, prov_terr_state_loc)
  
  ## Creating rlang symbols
  sym_YEAR <- sym("Year")
  sym_STATION_NUMBER <- sym("STATION_NUMBER")
  sym_variable <- sym("variable")
  sym_temp <- sym("temp")
  sym_temp2 <- sym("temp2")

  ## Data manipulations to make it "tidy"
  monthly_levels <- dplyr::tbl(hydat_con, "DLY_LEVELS")
  monthly_levels <- dplyr::filter(monthly_levels, !!sym_STATION_NUMBER %in% stns)

  ## Do the initial subset to take advantage of dbplyr only issuing sql query when it has too
  
  ## by year
  if (!dates_null[["start_is_null"]]) monthly_levels <- dplyr::filter(monthly_levels, !!sym_YEAR >= lubridate::year(start_date))
  if (!dates_null[["end_is_null"]]) monthly_levels <- dplyr::filter(monthly_levels, !!sym_YEAR <= lubridate::year(end_date))

  monthly_levels <- dplyr::select(monthly_levels, .data$STATION_NUMBER:.data$MAX)
  monthly_levels <- dplyr::collect(monthly_levels)
  
  if(is.data.frame(monthly_levels) && nrow(monthly_levels)==0)
  {stop("This station is not present in HYDAT")}
  
  ## Need to rename columns for gather
  colnames(monthly_levels) <- c("STATION_NUMBER","Year","Month", "PRECISION_CODE", "Full_month", "No_days", "MEAN_Value",
                           "TOTAL_Value", "MIN_DAY","MIN_Value", "MAX_DAY","MAX_Value")
  
  

  monthly_levels <- tidyr::gather(monthly_levels, !!sym_variable, !!sym_temp, -(.data$STATION_NUMBER:.data$No_days))
  monthly_levels <- tidyr::separate(monthly_levels, !!sym_variable, into = c("Sum_stat","temp2"), sep = "_")

  monthly_levels <- tidyr::spread(monthly_levels, !!sym_temp2, !!sym_temp)

  ## convert into R date for date of occurence.
  monthly_levels <- dplyr::mutate(monthly_levels, Date_occurred = paste0(.data$Year, "-", .data$Month, "-", .data$DAY))
  
  ## Check if DAY is NA and if so give it an NA value so the date parse correctly.
  monthly_levels <- dplyr::mutate(monthly_levels, Date_occurred = ifelse(is.na(.data$DAY), NA, .data$Date_occurred))
  monthly_levels <- dplyr::mutate(monthly_levels, Date_occurred = lubridate::ymd(.data$Date_occurred, quiet = TRUE))
  
  ## Then when a date column exist fine tune the subset
  if (!dates_null[["start_is_null"]]) monthly_levels <- dplyr::filter(monthly_levels, .data$Date_occurred >= start_date)
  if (!dates_null[["end_is_null"]]) monthly_levels <- dplyr::filter(monthly_levels, .data$Date_occurred <= end_date)

  monthly_levels <- dplyr::select(monthly_levels, -.data$DAY)
  monthly_levels <- dplyr::mutate(monthly_levels, Full_month = .data$Full_month == 1)
  
  
  attr(monthly_levels,'missed_stns') <- setdiff(unique(stns), unique(monthly_levels$STATION_NUMBER))
  as.hy(monthly_levels)
}

Try the tidyhydat package in your browser

Any scripts or data that you put into this service are public.

tidyhydat documentation built on Sept. 15, 2021, 5:08 p.m.