#' Download QLT data
#'
#' Downloads QLT data from SQLite database.
#'
#' @param station A character vector of the station codes.
#' @param frequency A string of the period to average by (by default 'hourly' but can also be 'raw' or 'daily').
#' @param start_date A Date element of the beginning of the time series.
#' @param end_date A Date element of the end of the time series.
#' @param status A string indicating which type of values should be included possible
#' values are 'reasonable', 'questionable' and 'erroneous'. The status of the values
#' is indicated in the returned data frame.
#' @param surrogate A string indicating whether to exclude surrogates ('none'),
#' replace missing values with surrogates ('missing')
#' or include all surrogates ('duplicate')
#' @param add_missing A flag indicating whether to add missing values.
#' @param file A string of the name of the SQLite database to create (without the extension .sqlite).
#' @param dir A string of the directory.
#' @return A tbl data frame.
#' @export
qlt_download_data <- function(
station = "DDM",
frequency = "hourly",
start_date = as.Date("2016-01-01"),
end_date = Sys.Date(),
status = "questionable",
surrogate = "missing",
add_missing = TRUE,
file = getOption("kootqlt.file", "kootqlt"),
dir = getOption("kootqlt.dir", ".")) {
check_vector(station, kootqlt::station$Station, length = c(1, Inf))
check_vector(frequency, c("raw", "hourly", "daily"), length = 1)
check_date(start_date)
check_vector(end_date)
check_vector(status, c("reasonable", "questionable", "erroneous"), length = 1)
check_vector(surrogate, c("none", "missing", "duplicate"), length = 1)
check_flag(add_missing)
check_string(dir)
check_string(file)
if (end_date > Sys.Date()) ps_error("end_date cannot be in the future")
if (end_date < start_date) ps_error("end_date must be after start_date")
if(dir != ".") file %<>% file.path(dir, .)
file %<>% paste0(".sqlite")
if (!file.exists(file)) ps_error("file '", file, "' does not exist")
conn <- DBI::dbConnect(RSQLite::SQLite(), file)
DBI::dbGetQuery(conn, "PRAGMA foreign_keys = ON;")
on.exit(DBI::dbDisconnect(conn))
data <- dplyr::tbl(conn, "Download") %>%
dplyr::filter_(~Station %in% station) %>%
dplyr::collect(n = Inf)
data$DateTimeReading %<>% as.POSIXct(tz = "Etc/GMT+8")
data$Date <- lubridate::date(data$DateTimeReading)
data %<>% dplyr::filter_(~Date >= start_date, ~Date <= end_date)
if (frequency != "raw") {
if (frequency == "daily") lubridate::hour(data$DateTimeReading) <- 0L
lubridate::minute(data$DateTimeReading) <- 0L
lubridate::second(data$DateTimeReading) <- 0L
data %<>%
dplyr::group_by(.data$Station, .data$DateTimeReading, .data$Date, .data$Surrogate) %>%
dplyr::summarise_("Value" = "mean(Value)", "Status" = "max(Status)") %>%
dplyr::ungroup()
}
status <- switch(status, "reasonable" = 1, "questionable" = 2, "erroneous" = 3)
data %<>% dplyr::filter_(~Status <= status)
data$Status %<>% ordered(levels = 1:3)
levels(data$Status) <- list(Reasonable = "1",
Questionable = "2",
Erroneous = "3",
Missing = "4")
data %<>%
dplyr::inner_join(kootqlt::station, by = "Station") %>%
dplyr::select_(~DateTimeReading, ~Station,
~Value, ~Variable, ~Units,
~Surrogate, ~Status, ~Location)
is.na(data$Surrogate[data$Surrogate == "no"]) <- TRUE
if (surrogate == "none") {
data %<>% dplyr::filter_(~is.na(Surrogate))
} else if(surrogate == "missing") {
surrogate_data <- dplyr::filter_(data, ~!is.na(Surrogate)) %>%
dplyr::filter_(~!is.na(Value))
data %<>% dplyr::filter_(~is.na(Surrogate))
if(nrow(surrogate_data)) {
surrogate_data %<>% dplyr::anti_join(data, by = c("Station", "DateTimeReading"))
dplyr::bind_rows(data, surrogate_data)
}
}
if(add_missing) {
data %<>% poisdata::ps_add_missing_sequence(
"DateTimeReading", by = c("Station", "Variable", "Units", "Location"))
data$Status[is.na(data$Status)] <- "Missing"
}
data %<>% dplyr::arrange(.data$Station, .data$DateTimeReading) %>%
dplyr::as.tbl()
data
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.