R/doctor.R

Defines functions ts_doctor_db

Documented in ts_doctor_db

#' Doctor Database
#'
#' @param check_limits A flag indicating whether to check if corrected values
#'   outside the lower and upper limits are coded as erroneous.
#' @param check_period A flag indicating whether to check if the periods are
#'   valid.
#' @param check_gaps A flag indicating whether to check if there are any gaps in
#'   the data given the period.
#' @param fix A flag indicating whether to fix any problems
#' @inheritParams ts_disconnect_db
#' @return A flag indicating whether or not the database passed the checks (or
#'   was fixed)
#' @export
ts_doctor_db <- function(check_limits = TRUE,
                         check_period = FALSE,
                         check_gaps = FALSE,
                         fix = FALSE,
                         conn = getOption("tsdbr.conn", NULL)) {
  chk_flag(check_limits)
  chk_flag(check_period)
  chk_flag(check_gaps)
  chk_flag(fix)
  
  on.exit(DBI::dbExecute(conn, "DELETE FROM Upload;"))
  on.exit(DBI::dbExecute(conn, "VACUUM;"), add = TRUE)
  
  span <- FALSE
  period <- FALSE
  limits <- FALSE
  
  if(check_limits) {
    res <- DBI::dbSendStatement(conn,
            "SELECT d.Station, d.DateTimeData, d.Recorded, d.Corrected, d.CommentsData
            FROM Station s
            INNER JOIN Data d ON s.Station = d.Station
            WHERE (d.Corrected < s.LowerLimit OR d.Corrected > s.UpperLimit)  AND
            d.Status != 3;")
    limits <- DBI::dbFetch(res)
    DBI::dbClearResult(res)
    
    if(nrow(limits)) {
      table <- table(limits$Station)
      table <- as.data.frame(table)
      colnames(table) <- c("Station", "Count")
      
      if(fix) {
        limits$Status <- 3L
        limits <- limits[c("Station", "DateTimeData", "Recorded",
                           "Corrected", "Status", "CommentsData")]      
        limits$UploadedUTC <- sys_time_utc()
        
        DBI::dbExecute(conn, "DELETE FROM Upload;")
        add(limits, "Upload", conn)
        
        DBI::dbExecute(conn, paste0("INSERT OR REPLACE INTO Data SELECT * FROM Upload;"))
        
        DBI::dbExecute(conn, paste0("INSERT INTO Log VALUES('", limits$UploadedUTC[1], "',
                               'UPDATE', 'Data', 'REPLACE fix limits');"))
        limits <- limits[integer(0),]
      }
      message("the following stations ", ifelse(fix, "had", "have"), 
              " non-erroneous (corrected) data", 
              " that are outside the lower and upper limits:\n",
              paste0(utils::capture.output(table), collapse = "\n"))
    } 
    limits <- nrow(limits) > 0
  }
  
  if(check_period) {
    
    warning("check for data period is temporarily disabled")
    
    # period <- DBI::dbGetQuery(conn, "
    #   SELECT d.Station AS Station, s.Period AS Period,
    #     MAX(STRFTIME('%m', d.DateTimeData)) != '01' AS MonthData,
    #     MAX(STRFTIME('%d', d.DateTimeData)) != '01' AS DayData,
    #     MAX(STRFTIME('%H', d.DateTimeData)) != '00' AS HourData,
    #     MAX(STRFTIME('%M', d.DateTimeData)) != '00' AS MinuteData,
    #     MAX(STRFTIME('%S', d.DateTimeData)) != '00' AS SecondData
    #   FROM Station s
    #   INNER JOIN Data d ON s.Station = d.Station
    #   GROUP BY s.Station, s.Period
    #   HAVING
    #     (SecondData == 1 AND Period IN ('year', 'month', 'day', 'hour', 'minute')) OR
    #     (MinuteData == 1 AND Period IN ('year', 'month', 'day', 'hour')) OR
    #     (HourData == 1 AND Period IN ('year', 'month', 'day')) OR
    #     (DayData == 1 AND Period IN ('year', 'month')) OR
    #     (MonthData == 1 AND Period IN ('year'));")
    # 
    # if(nrow(period)) {
    #   if(fix) {
    #     warning("fix period not yet implemented")
    #   }
    #   message("the following stations ", ifelse(FALSE, "had", "have"), 
    #           " date time data that are inconsistent with their periods: ",
    #           punctuate(period$Station, "and"))
    # }
    # 
    # period <- nrow(period) > 0
    
  }
  
  if(check_gaps) {
    
    res <- DBI::dbSendStatement(conn,
              "SELECT s.Station AS Station, s.Period AS Period,
              d.Start AS Start, d.End AS End
              FROM Station AS s INNER JOIN
              DataSpan AS d ON s.Station = d.Station")
    span <- DBI::dbFetch(res)
    DBI::dbClearResult(res)
    
    span <- split(span, 1:nrow(span))
    span <- lapply(span, FUN = function(x) {
      datetimes <- seq(as.POSIXct(x$Start, tz = "UTC", origin = "1970-01-01"),
                       as.POSIXct(x$End, tz = "UTC", origin = "1970-01-01"),
                       by = x$Period)
      datetimes <- format(datetimes, format = "%Y-%m-%d %H:%M:%S")
      data.frame(ID = paste(x$Station, datetimes)) })
    span <- do.call("rbind", span)
    
    res <- DBI::dbSendStatement(conn, "SELECT Station, DateTimeData FROM Data")
    data <- DBI::dbFetch(res)
    DBI::dbClearResult(res)
    
    data$DateTimeData <- as.character(data$DateTimeData)
    data$DateTimeData[!grepl(" ", data$DateTimeData)] <- paste(data$DateTimeData[!grepl(" ", data$DateTimeData)], "00:00:00")
    data <- data.frame(ID = paste(data$Station, data$DateTimeData))
    
    span <- data.frame(ID = setdiff(span$ID, data$ID))
    rm(data)
    
    span$Station <- sub("(.*)(\\s)(\\d{4,4}-\\d{2,2}-\\d{2,2} \\d{2,2}:\\d{2,2}:\\d{2,2})", "\\1", span$ID)
    span$DateTimeData <- sub("(.*)(\\s)(\\d{4,4}-\\d{2,2}-\\d{2,2} \\d{2,2}:\\d{2,2}:\\d{2,2})", "\\3", span$ID)
    span$DateTimeData <- as.numeric(dttr2::dtt_date_time(span$DateTimeData))
    span$ID <- NULL
    
    if(nrow(span)) {
      table <- table(span$Station)
      table <- as.data.frame(table)
      colnames(table) <- c("Station", "Count")
      
      if(fix) {
        span$Recorded <- NA_real_
        span$Corrected <- NA_real_
        span$Status <- 1L
        span$CommentsData <- NA_character_
        span$UploadedUTC <- sys_time_utc()
        
        DBI::dbExecute(conn, "DELETE FROM Upload;")
        add(span, "Upload", conn)
        
        DBI::dbExecute(conn, paste0("INSERT OR ABORT INTO Data SELECT * FROM Upload;"))
        
        DBI::dbExecute(conn, paste0("INSERT INTO Log VALUES('", span$UploadedUTC[1], "',
                               'INSERT', 'Data', 'ABORT - fix gaps');"))
        span <- span[integer(0),]
      }
      message("the following stations ", ifelse(fix, "had", "have"), 
              " gaps in their data:\n",
              paste0(utils::capture.output(table), collapse = "\n"))
    }
    span <- nrow(span) > 0
  }
  !limits && !span # && !period
}
poissonconsulting/tsdbr documentation built on June 30, 2023, 3:02 a.m.