Nothing
# Copyright 2019 Province of British Columbia
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
#' @title Fills data gaps of missing dates
#'
#' @description Fills data gaps of missing dates of the data provided. Builds a continuous data set from the start date to the end date.
#' Only missing dates are filled, columns not specified as dates or groups will be filled with NA. Will completely fill first and
#' last years, unless specified using \code{pad_ends = FALSE}.
#'
#' @inheritParams calc_annual_stats
#' @param values Name of column in \code{data} that contains numeric flow values, in units of cubic metres per second. Not required as
#' of fasstr 0.3.3 as all other columns are filled with \code{NA}.
#' @param pad_ends Logical value indicating whether to fill incomplete start and end years with rows of dates.
#' If \code{FALSE} then only missing dates between the provided start and end dates will be filled. Default \code{TRUE}.
#'
#' @return A tibble data frame of the source data with additional rows where missing dates existed.
#'
#' @examples
#' # Run if HYDAT database has been downloaded (using tidyhydat::download_hydat())
#' if (file.exists(tidyhydat::hy_downloaded_db())) {
#'
#' # Fill missing dates with NA using calendar years
#' fill_missing_dates(station_number = "08NM116")
#'
#' # Fill missing dates with NA using water years starting in August
#' fill_missing_dates(station_number = "08NM116",
#' water_year_start = 8)
#'
#' }
#' @export
fill_missing_dates <- function(data,
dates = Date,
values = Value,
groups = STATION_NUMBER,
station_number,
water_year_start = 1,
pad_ends = TRUE){
## ARGUMENT CHECKS
## ---------------
if (missing(data)) {
data <- NULL
}
if (missing(station_number)) {
station_number <- NULL
}
if (!is.logical(pad_ends[1]))
stop("pad_ends must be logical (TRUE/FALSE).", call. = FALSE)
if (as.character(substitute(values)) != "Value")
message("values argument is deprected for this function and not required. values still filled with NA if provided.")
water_year_checks(water_year_start)
## FLOW DATA CHECKS AND FORMATTING
## -------------------------------
# Check if data is provided and import it
flow_data <- flowdata_import(data = data, station_number = station_number)
# Save the original columns and groups to return at the end
orig_cols <- names(flow_data)
orig_groups <- dplyr::group_vars(flow_data)
# Check and rename columns
flow_data <- format_dates_col(data = flow_data, dates = as.character(substitute(dates)))
flow_data <- format_groups_col(data = flow_data, groups = as.character(substitute(groups)))
## FILL IN GAPS
## ------------
# Loop through each station number, fill in gaps and append
flow_data <- dplyr::bind_rows(
lapply(unique(flow_data$STATION_NUMBER), function(stn){
# Filter for station number
flow_data_stn <- dplyr::filter(flow_data, STATION_NUMBER == stn)
flow_data_stn <- flow_data_stn[order(flow_data_stn$Date), ]
# Get the start/end dates
start_date <- min(flow_data_stn$Date, na.rm = TRUE)
end_date <- max(flow_data_stn$Date, na.rm = TRUE)
# Override start/end dates if filling end years
if (pad_ends[1]) {
min_month <- as.numeric(format(as.Date(min(flow_data_stn$Date, na.rm = TRUE)), format = "%m"))
min_year <- as.numeric(format(as.Date(min(flow_data_stn$Date, na.rm = TRUE)), format = "%Y"))
start_date <- as.Date(paste(ifelse(min_month < water_year_start, min_year - 1, min_year),
water_year_start, '01', sep = '-'), "%Y-%m-%d")
max_month <- as.numeric(format(as.Date(max(flow_data_stn$Date, na.rm = TRUE)), format = "%m"))
max_year <- as.numeric(format(as.Date(max(flow_data_stn$Date, na.rm = TRUE)), format = "%Y"))
end_date <- as.Date(paste(ifelse(max_month >= water_year_start, max_year + 1, max_year),
water_year_start, '01', sep = '-'), "%Y-%m-%d") - 1
}
# Fill in missing dates
flow_data_stn <- merge(flow_data_stn, data.frame(Date = seq(start_date, end_date, 1)), all.y = TRUE)
# Fill in station number and parameter gaps (will be removed if not originally there)
flow_data_stn$STATION_NUMBER <- stn
flow_data_stn$Parameter <- "Flow"
# Return and bind
flow_data_stn
}))
## Reformat to original names and groups
## -------------------------------------
# Return the original names of the columns
if (is.null(station_number)) {
names(flow_data)[names(flow_data) == "STATION_NUMBER"] <- as.character(substitute(groups))
names(flow_data)[names(flow_data) == "Date"] <- as.character(substitute(dates))
}
#Return columns to original order
flow_data <- flow_data[, orig_cols]
# Regroup by the original groups
flow_data <- dplyr::group_by(flow_data, dplyr::across(dplyr::all_of(orig_groups)))
#flow_data <- dplyr::group_by_at(flow_data, dplyr::vars(orig_groups))
dplyr::as_tibble(flow_data)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.