R/mars_data_fetch_write_functions.R

Defines functions marsWriteDraindownData marsWriteOvertoppingData marsWritePercentStorageData marsWriteInfiltrationData marsFetchMonitoringData marsFetchRainEventData marsFetchLevelData marsFetchSMPSnapshot marsCheckSMPSnapshot marsFetchBaroData yday_decimal marsInterpolateBaro marsGapFillEventID marsFetchRainfallData marsFetchPrivateSMPRecords

Documented in marsCheckSMPSnapshot marsFetchBaroData marsFetchLevelData marsFetchMonitoringData marsFetchPrivateSMPRecords marsFetchRainEventData marsFetchRainfallData marsFetchSMPSnapshot marsGapFillEventID marsInterpolateBaro marsWriteDraindownData marsWriteInfiltrationData marsWriteOvertoppingData marsWritePercentStorageData yday_decimal

#Data Fetch and Write Functions

#marsFetchPrivateSMPRecords---------------------
#Rogygen
#' Return query results of private SMPs
#'
#' Returns private SMP tracking number, project name, SMP ID, and plan label.
#'
#' @param con Formal class PostgreSQL, a connection to the MARS Analysis database
#' @param tracking_number chr, tracking number for the requested SMP
#'
#' @return Output will be a dateframe with four columns:
#'   
#'     \item{tracking_number}{chr, requested SMP tracking number}
#'     \item{project name}{chr, SMP poject name}
#'     \item{smp_id}{int, SMP ID}
#'     \item{plan_label}{chr, name that the SMP is given on the plan set}
#'   
#'   If a tracking number is not found, that row will include an error message
#'   in the "project name" column, and NAs in "smp_id" and "plan_label".
#'
#' @export
#' 
#' 


marsFetchPrivateSMPRecords <- function(con, tracking_number){
  #Validate DB connection
  if(!odbc::dbIsValid(con)){
    stop("Argument 'con' is not an open ODBC channel")
  }

  #Rather than validating each individual tracking number and selecting them one at a time
  #We can grab the entire table and filter by our tracking numbers to find the valid ones
  planreviewtable <- odbc::dbGetQuery(con, "select p.\"TrackingNumber\" as tracking_number, p.\"Projectname\" as project_name, p.\"SMPID\" as smp_id, p.\"Plan Label\" as plan_label from external.tbl_planreview_crosstab p")
  hits <- dplyr::filter(planreviewtable, tracking_number %in% tracking_numbers)

  #If any of the tracking numbers weren't found, we can return an error message
  misses <- tracking_numbers[!(tracking_numbers %in% planreviewtable$tracking_number)]
  
  if(length(misses) > 0) {
    invalid <- data.frame(tracking_number = misses, project_name = "Tracking number not found. Did you type it wrong?", smp_id = NA, plan_label = NA)
    privateSMPs <- dplyr::bind_rows(hits, invalid)
  } else {
    privateSMPs <- hits
  }

  return(privateSMPs)
}

# marsFetchRainfallData ------------------------------------------
#' Return a dataframe with rain gage data
#'
#' Return data from the rain gage nearest a target SMP, for a specified date range.
#'
#' @param con Formal class 'PostgreSQL', a connection to the MARS Analysis database
#' @param target_id chr, an SMP_ID that where the user has requested data
#' @param source chr, either "gage" or "radar" to retrieve rain gage data or radar rainfall data
#' @param start_date string or POSIXCT date, format: "YYYY-MM-DD", start of data request range
#' @param end_date stringor POSIXCT date, format: "YYYY-MM-DD", end of data request range
#' @param daylightsavings logi, for now, this should always be \code{FALSE}
#'
#' @return Output will be a data frame with four columns, which corresponds to the specified SMP and date range:
#' 
#'   \item{dtime}{POSIXct datetime in America/New_York}
#'   \item{rainfall_in}{num, rainfall for the 15 minute preceding the corresponding datetime}
#'   \item{gage_uid OR radar_uid}{Unique identifier for where the data came from}
#'   \item{event_id}{event number during this timestep}
#' 
#' 
#' @seealso \code{\link[pwdgsi]{marsGapFillEventID}}, \code{\link{marsDetectEvents}}
#'
#' @export

marsFetchRainfallData <- function(con, target_id, source = c("gage", "radar"), start_date, end_date, daylightsavings){
  if(!odbc::dbIsValid(con)){
    stop("Argument 'con' is not an open ODBC channel")
  }
  start_date %<>% as.POSIXct(format = '%Y-%m-%d')
  end_date %<>% as.POSIXct(format = '%Y-%m-%d')
  
  # Was a string supplied to source?
  if( isTRUE(all.equal(source, c("gage","radar"))) ){
    stop("No argument supplied for 'source'. Provide a string of either 'gage' or 'radar'")
  }
  
  #Are we working with gages or radarcells?
  if(source == "gage" | source == "gauge"){
    rainparams <- data.frame(smptable = "admin.tbl_smp_gage", raintable = "data.viw_gage_rainfall", uidvar = "gage_uid", loctable = "admin.tbl_gage", eventuidvar = "gage_event_uid", stringsAsFactors=FALSE)
  } else if(source == "radar"){
    rainparams <- data.frame(smptable = "admin.tbl_smp_radar", raintable = "data.viw_radar_rainfall", uidvar = "radar_uid", loctable = "admin.tbl_radar", eventuidvar = "radar_event_uid", stringsAsFactors=FALSE)
  } else { #Parameter is somehow invalid
    stop("Argument 'source' is not one of 'gage' or 'radar'")
  }
  
  #Get closest rainfall source
  smp_query <- paste0("SELECT * FROM ", rainparams$smptable)
  #print(smp_query)
  rainsource <- DBI::dbGetQuery(con, smp_query) %>% dplyr::filter(smp_id == target_id) %>% dplyr::pull(rainparams$uidvar)
  
  #Collect gage data
  #First, get all the relevant data from the closest gage
  rain_query <- paste(paste0("SELECT * FROM ", rainparams$raintable, " "),
                      paste0("WHERE ", rainparams$uidvar, " = CAST('", rainsource, "' as int)"),
                      "AND dtime >= Date('", start_date, "')",
                      "AND dtime <= Date('", end_date + lubridate::days(1), "');")
  # Get rain data
  rain_temp <- DBI::dbGetQuery(con, rain_query)
  # Error messages if there are no rows in returned from the DB
  if(nrow(rain_temp) == 0){

    if(lubridate::month(start_date) == lubridate::month(lubridate::today())){
      stop(paste("Rainfall data appears in the MARS database on about a 5 week delay. \nData for", lubridate::month(start_date, label = TRUE, abbr = FALSE), "should be available in the second week of", lubridate::month(lubridate::today() + lubridate::dmonths(1), label = TRUE, abbr = FALSE)))
    }
    stop("There is no data in the database for this date range.")
  }
  # Return rainfall in order
  rain_temp |> dplyr::arrange(dtime)
}

# marsGapFillEventID -----------------------
#When determining the appropriate event ID for zero-punctuated timestamps in a rainfall series, use this function
#Zeroes that appear within an event (ie less than 6 hours of time has elapsed between measurements) should have the event ID of the event they occur in
#Zeroes that appear at event boundaries should have event NA
#This function returns an integer or NA as follows:
  #One or both of event_low or event_high is NA: return NA
    #This is a boundary adjacent to, or a hole within, an event of less than the minimum depth, and should not be counted
  #event_low != event_high and neither is NA: return NA
    #This is a boundary between two events, and should not be counted
  #event_low = event_high and neither is NA: return event_low
    #This is a hole within an event of greater than the minimum depth, and should be counted
#Check for NA arguments first because if x == NA returns NA, which chokes the conditional statement

#' Return a dataset with event IDs for zero-punctuated timesteps.
#'
#' Each rainfall event must be zero-punctuated. These zeroes are given event IDs based on the IDs of the
#' rainfall that precede and follow them, given by \code{\link{marsDetectEvents}}.
#'
#' @param event_low num, event ID of preceding rainfall.
#' @param event_high num, event ID of following rainfall.
#'
#' @return Output will be a vector containing either \code{NA} or \code{event low}. If one or both events are
#' \code{NA}, return {NA}. If the event IDs are not equal, return \code{NA}, since this is a boundary between
#' events. If the event IDs are equal, return \code{event low}.

marsGapFillEventID <- function(event_low, event_high){
  if(is.na(event_low) | is.na(event_high)){
    #print("One or both events are NA. Returning NA")
    return(NA) #This is a boundary adjacent to, or a hole within, an event of less than the minimum depth, and should not be counted
  }
  else if(event_low != event_high){
    #print("Events are not equal. Returning NA")
    return(NA) #This is a boundary between two events, and should not be counted
  }
  else if(event_low == event_high){
    #print(paste("Events are equal. Returning", event_low))
    return(event_low) #This is a hole within an event of greater than the minimum depth, and should be counted
  }
}

# marsInterpolateBaro -------------------------
#When requesting baro data, if an SMP has a baro with data on-site for a specific timestep, use that baro.
#If not, use the inverse distance weighted interpolation of all baros with data.
#baro_psi is a vector of baro pressures all measured at the same time
#smp_id is a vector of SMP IDs where the measurements took place
#weights is a vector of inverse distance weights to be applied
#target_id is a single SMP ID where the user has requested data

#roxygen2
#' Interpolate barometric pressure with inverse distance weighting
#'
#' Returns an interpolated barometric pressure reading or \code{NA}
#'
#' @seealso \itemize{
#'      \code{\link{marsFetchBaroData}},
#'      data: \code{\link{marsSampleBaro}}
#'  }   
#'
#' @param baro_psi vector, num, barometric pressures measured at the same timestamp
#' @param smp_id vector, chr, SMP IDs where the measurements took place
#' @param weight vector, num, of inverse distances weights for each baro, calculated by \code{\link{marsFetchBaroData}}
#' @param target_id chr, single SMP ID where the user has requested data
#'
#' @return Output will be a single barometric pressure reading.
#'   If there are 4 or greater baros with data,
#'   the reading will be an inverse distance-weighted
#'   interpolation of those readings.
#'   If there are fewer than 4 readings, return \code{NA}.
#'
#' @export
#' 
#' @examples
#' 
#'  data(marsSampleBaro)
#' 
#'  marsInterpolateBaro(
#'    baro_psi = marsSampleBaro[[1]]$baro_psi, 
#'    smp_id = marsSampleBaro[[1]]$smp_id, 
#'    weight = marsSampleBaro[[1]]$weight, 
#'    target_id = marsSampleBaro[[2]]
#'    )
#' 


marsInterpolateBaro <- function(baro_psi, smp_id, weight, target_id){

 if(length(baro_psi) >= 1){
   return(sum(baro_psi*weight)/sum(weight))
 }else{
   return(NA)
 }
}


# yday_decimal ------------------------------------------------------------

#' Fetch a decimal day from a datetime
#'
#' Return the day of the year, with hours and seconds as the decimal
#'
#' @param dtime POSIXct, format: "POSIXct, format: "YYYY-MM-DD HH:MM:SS""
#' 
#' @return Output with be a day with a decimal
#' 
#' @export

yday_decimal <- function(dtime){
  lubridate::yday(dtime) + lubridate::hour(dtime)/24 + lubridate::minute(dtime)/(24*60) + lubridate::second(dtime)/(24*60*60)
  #### Does not include POSIXct types
}



# marsFetchBaroData --------------------------------

#' Fetch barometric pressure data for a target SMP, date range, and interval
#'
#' Returns a data frame with datetime, barometric pressure, smp id, and number of neighbors
#'   interpolated from to collect the data. 
#'   
#' @param con An ODBC connection to the MARS Analysis database returned by odbc::dbConnect
#' @param target_id chr, single SMP ID where the user has requested data
#' @param start_date string or POSIXct, format: "YYYY-MM-DD", start of data request range
#' @param end_date string or POSIXct, format: "YYYY-MM-DD", end of data request range
#' @param data_interval chr, \code{"5 mins"} or \code{"15 mins"}, interval at which baro data will be returned.
#'
#' @return Output will be a dataframe with four columns: 
#'   
#'     \item{dtime}{POSIXct, format: "YYYY-MM-DD HH:MM:SS" in America/New_York}
#'     \item{baro_psi}{num, barometric pressure in psi}
#'     \item{smp_id}{chr, SMP ID for each baro}
#'     
#'     If there are fewer than five baros to interprolate from, based on \code{\link{marsInterpolateBaro}}
#' 
#' @export
#'
#' @seealso \code{\link{marsInterpolateBaro}}
#'
#'

marsFetchBaroData <- function(con, target_id, start_date, end_date, data_interval = c("5 mins", "15 mins")){
  if(!odbc::dbIsValid(con)){
    stop("Argument 'con' is not an open ODBC channel")
  }
  
  
  #Handle date Conversion
  start_date %<>% as.POSIXct(format = '%Y-%m-%d')
  end_date %<>% as.POSIXct(format = '%Y-%m-%d')
  
  #Get SMP locations, and the locations of the baro sensors
  smp_loc <- DBI::dbGetQuery(con, "SELECT * FROM admin.tbl_smp_loc")
  locus_loc <- dplyr::filter(smp_loc, smp_id == target_id)
  baro_smp <- DBI::dbGetQuery(con, "SELECT DISTINCT smp_id FROM admin.tbl_baro_rawfile;") %>% dplyr::pull(smp_id)
  
  # #Collect baro data
  # #Get all baro data for the specified time period
  baro <- DBI::dbGetQuery(con, paste0("SELECT * FROM data.viw_barodata_smp WHERE dtime >= '", start_date, "'", " AND dtime <= '", end_date + lubridate::days(1), "' order by dtime"))
  #
  baro_latest_dtime <- DBI::dbGetQuery(con, paste0("SELECT max(dtime) FROM data.tbl_baro WHERE dtime < '", end_date + lubridate::days(1), "'")) %>% dplyr::pull()
  baro_latest_valid <- DBI::dbGetQuery(con, paste0("SELECT max(dtime) FROM data.viw_barodata_neighbors WHERE neighbors >= 4 and dtime < '", end_date + lubridate::days(1), "'")) %>% dplyr::pull()

  if(length(baro$dtime) == 0){
    stop (paste0("No data available in the reqested interval. The latest available baro data is from ", baro_latest_dtime, "."))
  }

  #this is a seperate pipe so that it could be stopped before the error
  needs_thickening <- baro$dtime %>% lubridate::second() %>% {. > 0} %>% any() == TRUE
  if(needs_thickening == TRUE){
    baro %<>% padr::thicken(interval = "5 mins", rounding = "down") %>%
      dplyr::group_by(dtime_5_min, smp_id) %>%
      dplyr::summarize(baro_psi = max(baro_psi, na.rm = TRUE)) %>%
      dplyr::select(dtime = dtime_5_min, smp_id, baro_psi) %>%
      dplyr::ungroup()
  }else{
    baro %<>% dplyr::group_by(dtime, smp_id) %>%
      dplyr::summarize(baro_psi = max(baro_psi, na.rm = TRUE)) %>%
      dplyr::select(dtime, smp_id, baro_psi) %>%
      dplyr::ungroup()
  }

  baro$dtime %<>% lubridate::with_tz(tz = "America/New_York")

  #initialize countNAs_t in case the loop doesn't run. It is passed as a param to markdown so it needs to exist.
  countNAs_t <- 0

  #When the user requests data at a 5-minute resolution, we need to stretch our 15-minute data into 5-minute data
  #We can use tidyr::spread and padr::pad to generate the full 5 minute time series,
  #And then use zoo::na.locf (last observation carried forward) to fill the NAs with the most recent value
  if(data_interval == "5 mins"){

    #Spread data to have all baro measurements use the same dtime_est column
    #So we can pad every 15-minute time series at once
    baro <- tidyr::spread(baro, "smp_id", "baro_psi")

    #Pad installs 5 minute intervals in our 15 minute dtime_est column. All other columns become NA
    #End value is 10 minutes after the final period because that 15 minute data point is good for 10 more minutes
    baro_pad <- padr::pad(baro, start_val = min(baro$dtime), end_val = max(baro$dtime) + lubridate::minutes(10), interval = "5 mins")

    #To count the LOCF operations, we count the NAs in the data frame before and after the LOCF
    countNAs <- baro_pad[1,]
    for(i in 2:ncol(baro_pad)){
      countNAs[,i] <- sum(is.na(baro_pad[,i])) #count NAs before they are filled
      baro_pad[,i] <- zoo::na.locf(baro_pad[,i], maxgap = 2, na.rm = FALSE) #maxgap = 2 means only fill NAs created by the pad
      countNAs[,i] <- countNAs[,i]- sum(is.na(baro_pad[,i])) #subtract remaining NAs to get number of NAs filled
    }
    countNAs %<>% dplyr::select(-dtime)
    countNAs_t <- countNAs %>% t() %>% data.frame() %>% tibble::rownames_to_column() %>%  magrittr::set_colnames(c("Location", "No. of LOCFs"))

    #Return baro data to long data format
    baro <- tidyr::gather(baro_pad, "smp_id", "baro_psi", -dtime) %>%
      dplyr::filter(!is.na(baro_psi))
  }
  #Calculate the distance between every baro location and the target SMP, then add weight
  baro_weights <- dplyr::filter(smp_loc, smp_id %in% baro_smp) %>%
    dplyr::mutate(lon_dist = lon_wgs84 - locus_loc$lon_wgs84,
                  lat_dist = lat_wgs84 - locus_loc$lat_wgs84,
                  dist_total = sqrt(lon_dist**2 + lat_dist**2)) %>%
    dplyr::mutate(weight = 1/dist_total) %>% #inverse distance weight with power = 1
    dplyr::select(smp_id, weight) %>%
    dplyr::arrange(smp_id)

  #Cap weight at 1000
  baro_weights$weight <- replace(baro_weights$weight, baro_weights$weight > 1000, 1000)
  # Interpolate and return the baro data
  interpolated_baro <- dplyr::left_join(baro, baro_weights, by = "smp_id") %>% #join baro and weights
    dplyr::group_by(dtime) %>% #group datetimes, then calculate weighting effect for each datetime
    dplyr::summarize(baro_psi = marsInterpolateBaro(baro_psi, smp_id, weight, target_id),
                     smp_id =  "Interpolated",
                     neighbors = dplyr::n()) %>%
    zoo::na.trim(sides = "right") #trim trailing NAs
  
}

# marsCheckSMPSnapshot --------------------------------
#' Check if a data snapshot for an SMP exists
#'
#' Returns a vector of boolean outcomes, TRUE if a snapshot exists or is written, FALSE if one does not exist
#'   
#' @param con An ODBC connection to the MARS Analysis database returned by odbc::dbConnect
#' @param smp_id vector of chr, SMP ID, where the user has requested data
#' @param ow_suffix vector of chr, OW Suffix, where the user has requested data
#' @param request_date single date or vector the length of SMP ID, either "YYYY-MM-DD" or "today", of the request data
#'
#' @return vector of boolean outcomes,
#'   
#'     \item{snapshot_present}{bool, TRUE or FALSE value based on whether a snapshot can exist or is made}
#'     
#' @export
#'   

marsCheckSMPSnapshot <- function(con, smp_id, ow_suffix, request_date){
  
  #1 Argument Validation
  #1.1 Check database connection
  if(!odbc::dbIsValid(con)){
    stop("Argument 'con' is not an open ODBC channel")
  }
  
  #1.2 check argument lengths
  if(length(smp_id) != length(ow_suffix)){
    stop("smp_id and ow_suffix must be of equal length")
  }
  
  if(length(smp_id) != length(request_date) & length(request_date) != 1){
    stop("request_date must be a single date, or equal length to smp_id and ow_suffix")
  }
  
  #1.3 Assign today() to 'today'
  request_date <- stringr::str_replace(request_date, "today", as.character(lubridate::today()))
  
  #1.4 Check if smp_id and ow_suffix combination are valid
  #1.4.1 Create dataframe
  request_df <- data.frame(smp_id, ow_suffix, request_date, stringsAsFactors = FALSE)
  
  #1.4.2 Query fieldwork.tbl_ow and check if each smp id and observation well are there
  # Initialize dataframe
  ow_validity <- data.frame(ow_uid = numeric(), 
                            smp_id =  character(),  
                            ow_suffix = character(), 
                            stringsAsFactors = FALSE)
  
  # Check if smp_id and ow_suffix are in the MARS table "fieldwork.tbl_ow"
  # Return matches
  for(i in 1:length(request_df$smp_id)){
    ow_valid_check <- odbc::dbGetQuery(con, "SELECT * FROM fieldwork.tbl_ow") %>% dplyr::select(-facility_id) %>%  dplyr::filter(smp_id == request_df$smp_id[i] & ow_suffix == request_df$ow_suffix[i])
    ow_validity <- dplyr::bind_rows(ow_validity, ow_valid_check)
  }
  
  
  # Join dates back to observation wells and ow_uids back to request criteria
  ow_validity %<>% dplyr::left_join(request_df, by = c("smp_id", "ow_suffix")) 
  request_df_validity <- dplyr::left_join(request_df  %>% dplyr::select(-request_date), ow_validity, by = c("smp_id", "ow_suffix"))
  
  #2 Query
  #2.1 initialize dataframe
  result <- data.frame("snapshot_uid" = numeric(),
                       "ow_uid" = numeric(),
                       "dcia_ft2" = numeric(),
                       "storage_footprint_ft2" = numeric(), 
                       "orifice_diam_in" = numeric(),
                       "infil_footprint_ft2" = numeric(),
                       "storage_depth_ft" = numeric(),
                       "lined" = character(), # make logical later 
                       "surface" = character(),  # make logical later 
                       "storage_volume_ft3" = numeric(),
                       "infil_dsg_rate_inhr" = numeric(),
                       stringsAsFactors = FALSE)
  
  
  #2.2 Run get_arbitrary_snapshot in a loop and bind results
  for(i in 1:length(ow_validity$smp_id)){
    snapshot_query <- paste0("SELECT * FROM metrics.fun_get_arbitrary_snapshot('", ow_validity$smp_id[i], "','", ow_validity$ow_suffix[i], "','", ow_validity$request_date[i], "') ORDER BY snapshot_uid DESC LIMIT 1")
    new_result <- odbc::dbGetQuery(con, snapshot_query)
    result <- dplyr::bind_rows(result, new_result)
  }
  
  #3 Are the snapshots we have equal to the current GreenIT/PlanReview values!?
  
  #3.0.5 Where are we looking?
  table_query <- paste0('select smp_id, \'viw_planreview_crosstab_snapshot\' as location from external.tbl_planreview_crosstab
                         where smp_id IN (\'',paste(ow_validity$smp_id, collapse = "', '"),'\')  
                         UNION
                         select smp_id, \'viw_greenit_unified\' as location from external.viw_greenit_unified
                         where  smp_id IN (\'',paste(ow_validity$smp_id, collapse = "', '"),'\')')
  
  check_table <- odbc::dbGetQuery(con, table_query)
  
  
  #3.1 Let's look at the view greenit unified/planreview crosstab
  current_values <- data.frame("ow_uid" = numeric(),
                               "dcia_ft2" = numeric(),
                               "storage_footprint_ft2" = numeric(), 
                               "orifice_diam_in" = numeric(),
                               "infil_footprint_ft2" = numeric(),
                               "storage_depth_ft" = numeric(),
                               "lined" = character(), # make logical later 
                               "surface" = character(),  # make logical later 
                               "storage_volume_ft3" = numeric(),
                               "infil_dsg_rate_inhr" = numeric(),
                               stringsAsFactors = FALSE)
  
  # change the query based on the smp chosen
  for(i in 1:nrow(check_table)){
    current_values_query <- paste0("SELECT * FROM external.", check_table$location[i]," where smp_id IN ('",check_table$smp_id[i],"')")
    current_value <- odbc::dbGetQuery(con, current_values_query)
    current_values <- dplyr::bind_rows(current_values, current_value)
  }

  
  #3.2 Now let's compare between the two for differences
  comp_fields <- c("ow_uid", "dcia_ft2", "storage_footprint_ft2",
                   "orifice_diam_in", "infil_footprint_ft2", "storage_depth_ft",
                   "lined", "surface", "storage_volume_ft3", "infil_dsg_rate_inhr")

  comp_values <- current_values %>% dplyr::select(all_of(comp_fields)) %>% arrange(ow_uid)
  comp_result <- result %>% dplyr::select(all_of(comp_fields)) %>% arrange(ow_uid)
  
  # let's compare
  for(i in length(comp_result$ow_uid)){
  result_hash <- digest::digest(comp_result[i,], algo = "md5")
  value_hash <- digest::digest(comp_values[i,], algo = "md5")  
  
  if(result_hash != value_hash){
    
    smp_id_x <- ow_validity$smp_id[ow_validity$ow_uid == comp_result$ow_uid[i]]
    suffix_x <- ow_validity$ow_suffix[ow_validity$ow_uid == comp_result$ow_uid[i]]
    
    #new snapshot entry dbSendQuery, complete tonight/tomorrow
    update_query <- paste0("select * from metrics.fun_insert_snapshot('",smp_id_x,"', '",suffix_x,"')")
    dbGetQuery(con, update_query)
    }
  
  }
  
  
  #4 Join results to original request df
  #4.1 not.na function
    not.na <- function(x){
    !is.na(x)
  }
  
  #4.2 turn it into a bool
  result_bool <- request_df_validity %>% dplyr::left_join(result, by = "ow_uid") %>%
                 dplyr::select(snapshot_uid) %>% not.na() %>% as.vector
  
  return(result_bool)
  
}



# marsFetchSMPSnapshot --------------------------------

#' Fetch data snapshot for an SMP
#'
#' Returns a data frame with requested snapshot date, SMP ID, OW suffix, and SMP snapshot
#'   
#' @param con An ODBC connection to the MARS Analysis database returned by odbc::dbConnect
#' @param smp_id vector of chr, SMP ID, where the user has requested data
#' @param ow_suffix vector of chr, OW Suffix, where the user has requested data
#' @param request_date single date or vector the length of SMP ID, either "YYYY-MM-DD" or "today", of the request data
#'
#' @return Output will be a dataframe with the following columns: 
#'   
#'     \item{smp_id}{chr, requested SMP ID}
#'     \item{ow_suffix}{chr, request OW suffix}
#'     \item{ow_uid}{num, ow_uid derived from smp_id and ow_suffix} 
#'     \item{request_date}{chr, date of requested snapshot, format: "YYYY-MM-DD}
#'     \item{snapshot_uid}{num}
#'     \item{dcia_ft2}{num}
#'     \item{storage_footprint_ft2}{num}
#'     \item{orifice_diam_in}{num}
#'     \item{infil_footprint_ft2}{num}
#'     \item{assumption_orificeheight_ft}{num}
#'     \item{storage_depth_ft}{num}
#'     \item{sumpdepth_ft}{num}
#'     \item{lined}{chr, 0 or 1 for unlined or lined, respectively}
#'     \item{surface}{chr, 0 or 1 for subsurface or surface, respectively}
#'     
#'     If a requested smp_id and ow_suffix combination is not valid, the function will return NAs for that snapshot.
#'     Function queries the SQL function "insert_snapshot", which inserts rows into the MARS table "snapshot", and, in turn, "snapshot_metadata".
#'     
#' @export
#'

marsFetchSMPSnapshot <- function(con, smp_id, ow_suffix, request_date){
  
  # Check database connection
  if(!DBI::dbIsValid(con)){
    stop("Argument 'con' is not an open ODBC channel")
  }
  
  # Check that arguemnt lengths match
  if(length(smp_id) != length(ow_suffix)){
    stop("smp_id and ow_suffix must be of equal length")
  }
  # Check that there is a single date or equal to the number of smp_ids and ow_suffixes
  #### I don't think this works as intended/described
  if(length(smp_id) != length(request_date) & length(request_date) != 1){
    stop("request_date must be a single date, or equal length to smp_id and ow_suffix")
  }
  
  # Assign today() to 'today'
  request_date <- stringr::str_replace(request_date, "today", as.character(lubridate::today()))
  
  # Check if smp_id and ow_suffix combination are valid
  #### There is no check other than the script crashes
  request_df <- data.frame(smp_id, ow_suffix, request_date, stringsAsFactors = FALSE)

  #1.4.2 Query fieldwork.tbl_ow and check if each smp id and observation well are there
  # Initialize dataframe
  ow_validity <- data.frame(ow_uid = numeric(),
                            smp_id =  character(),
                            ow_suffix = character(),
                            stringsAsFactors = FALSE)

  # Add ow_data for each one requested
  for(i in 1:length(request_df$smp_id)){
    ow_valid_check <- DBI::dbGetQuery(con, "SELECT * FROM fieldwork.tbl_ow") %>% dplyr::select(-facility_id) %>%  dplyr::filter(smp_id == request_df$smp_id[i] & ow_suffix == request_df$ow_suffix[i])
    ow_validity <- dplyr::bind_rows(ow_validity, ow_valid_check)
  }

  # Join dates back to observation wells and ow_uids back to request criteria
  ow_validity %<>% dplyr::left_join(request_df, by = c("smp_id", "ow_suffix"))
  request_df_validity <- dplyr::left_join(request_df  %>% dplyr::select(-request_date), ow_validity, by = c("smp_id", "ow_suffix"))
  request_df_validity

  # Create dataframe for storing snapshot data
  result <- data.frame("snapshot_uid" = numeric(),
                       "ow_uid" = numeric(),
                       "well_measurements_uid" = integer(),
                       "smp_id" = character(),
                       "ow_suffix" = character(),
                       "dcia_ft2" = numeric(),
                       "storage_footprint_ft2" = numeric(),
                       "orifice_diam_in" = numeric(),
                       "infil_footprint_ft2" = numeric(),
                       "storage_depth_ft" = numeric(),
                       "lined" = logical(),
                       "surface" = logical(),
                       "storage_volume_ft3" = numeric(),
                       "infil_dsg_rate_inhr" = numeric(),
                       "orifice_lookup_uid" = integer(),
                       "orificedepth_ft" = numeric(),
                       "sumpdepth_lookup_uid" = integer(),
                       "sumpdepth_ft" = numeric(),
                       stringsAsFactors = FALSE)


  # Get snapshot for each ow
  for(i in 1:length(ow_validity$smp_id)){
    snapshot_query <- paste0("SELECT * FROM metrics.viw_snapshot_well_measurements where ow_uid in (",
                             ow_validity$ow_uid[i],") ORDER BY snapshot_uid DESC LIMIT 1")
    new_result <- DBI::dbGetQuery(con, snapshot_query)
    new_result$lined <- new_result$lined %>% as.logical()
    new_result$surface <- new_result$surface %>% as.logical()
    result <- dplyr::bind_rows(result, new_result)
  }
  
  result
}

# marsFetchLevelData --------------------------------

#' Fetch water level data for an SMP
#'
#' Returns a data frame with requested SMP water level data
#'   
#' @param con An connection to the MARS Analysis database returned
#' @param target_id vector of chr, SMP ID, where the user has requested data
#' @param ow_suffix vector of chr, SMP ID, where the user has requested data
#' @param start_date string, format: "YYYY-MM-DD", start of data request range
#' @param end_date string, format: "YYYY-MM-DD", end of data request range
#' @param sump_correct logical, TRUE if water level should be corrected for to account for sump depth
#'
#' @return Output will be a dataframe with the following columns: 
#' 
#'     \item{ow_leveldata_uid}{int}
#'     \item{dtime}{POSIXct datetime in America/New_York}
#'     \item{level_ft}{num, recorded water level in feet}
#'     \item{ow_uid}{num, ow_uid derived from smp_id and ow_suffix} 
#'     
#' @export
#' 
#' @seealso \code{\link{marsFetchRainfallData}}, \code{\link{marsFetchRainEventData}}, \code{\link{marsFetchMonitoringData}}
#' 
marsFetchLevelData <- function(con, target_id, ow_suffix, start_date, end_date, sump_correct){
  # Check DB connection
  if(!DBI::dbIsValid(con)){
    stop("Argument 'con' is not an open ODBC channel")
  }
  
  
  #Check if smp_id and ow_suffix are in the MARS table "ow_validity"
  # Return match
  validity_query <- paste0("select * from fieldwork.fun_get_ow_uid('",target_id,"','",ow_suffix,"', NULL)")
  ow_uid <- DBI::dbGetQuery(con, validity_query)
  
  # Select query table
  #### This needs to be reworked/simplified with str_detect and only one else if
  #### Can we get rid of PZ because it doesnt' seem to exist in ow_prefixes
  #### This replace gets rid of the number at the end. It should be just letters before number?
  if(stringr::str_replace(ow_suffix, ".$", "") %in% c("CW", "GW", "PZ")){
    level_table <- "data.tbl_gw_depthdata_raw"
  }else if(!(stringr::str_replace(ow_suffix, ".$", "") %in% c("CW", "GW", "PZ")) & sump_correct == TRUE){
    level_table <- "data.viw_ow_leveldata_sumpcorrected"
  }else if(!(stringr::str_replace(ow_suffix, ".$", "") %in% c("CW", "GW", "PZ")) & sump_correct == FALSE){
    level_table <- "data.tbl_ow_leveldata_raw"
  }
  # Make sure dates are POSIXct types
  start_date %<>% as.POSIXct(format = '%Y-%m-%d')
  end_date %<>% as.POSIXct(format = '%Y-%m-%d')

  # Add buffer to requested dates based on DB time zone
  start_date <- lubridate::round_date(start_date) - lubridate::days(1)
  end_date <- lubridate::round_date(end_date) + lubridate::days(1)

  # Query database for level data
  leveldata_query <- paste0("SELECT * FROM ", level_table, " WHERE ow_uid = '", ow_uid,
  "' AND dtime BETWEEN '", start_date,"' AND '", end_date, "'")

  leveldata <- DBI::dbGetQuery(con, leveldata_query) %>% dplyr::arrange(dtime) 
  leveldata <- leveldata |> 
    dplyr::mutate(dtime = lubridate::round_date(dtime, "minute"))
}

# marsFetchRainEventData --------------------------------

#' Fetch rain event data for an SMP
#'
#' Returns a data frame with rain event data from the rain gage closest to the request SMP
#'   
#' @param con An ODBC connection to the MARS Analysis database returned by odbc::dbConnect
#' @param target_id vector of chr, SMP ID, where the user has requested data
#' @param source string, one of "gage" or "radar" to select rain gage events or radar rainfall events
#' @param start_date string, format: "YYYY-MM-DD", start of data request range
#' @param end_date string, format: "YYYY-MM-DD", end of data request range
#'
#' @return Output will be a dataframe with the following columns: 
#' 
#'     \item{rainfall_gage_event_uid}{int}
#'     \item{gage_uid OR radar_uid}{int, unique identifier for rain source, depending on value of source argument}
#'     \item{eventdatastart_edt}{POSIXct datetime}
#'     \item{eventdataend_edt}{POSIXct datetime}
#'     \item{eventduration_hr}{num, duration of event}
#'     \item{eventpeakintensity_inhr}{num, peak intensity of rain event} 
#'     \item{eventavgintensity_inhr}{num, average intensity of rain event} 
#'     \item{eventdepth_in}{num, average intensity of rain event} 
#'     
#' @export
#' 
#' @seealso \code{\link{marsFetchRainfallData}}, \code{\link{marsFetchLevelData}}, \code{\link{marsFetchMonitoringData}}
#' 
marsFetchRainEventData <- function(con, target_id, source = c("gage", "radar"), start_date, end_date){
  # Check database connection
  if(!DBI::dbIsValid(con)){
    stop("Argument 'con' is not an open database connection")
  }
  
  #Sanitize start and end date
  start_date %<>% as.POSIXct(format = '%Y-%m-%d')
  end_date %<>% as.POSIXct(format = '%Y-%m-%d')
  
  # Was a string supplied to source?
  if(isTRUE(all.equal(source, c("gage","radar")))){
    stop("No argument supplied for 'source'. Provide a string of either 'gage' or 'radar'")
  }
  
  # Are we working with gages or radarcells?
  if(source == "gage" | source == "gauge"){
    rainparams <- data.frame(smptable = "admin.tbl_smp_gage", eventtable = "data.tbl_gage_event", uidvar = "gage_uid", loctable = "admin.tbl_gage", eventuidvar = "gage_event_uid", stringsAsFactors=FALSE)
  } else if(source == "radar"){
    rainparams <- data.frame(smptable = "admin.tbl_smp_radar", eventtable = "data.tbl_radar_event", uidvar = "radar_uid", loctable = "admin.tbl_radar", eventuidvar = "radar_event_uid", stringsAsFactors=FALSE)
  } else { #Parameter is somehow invalid
    stop("Argument 'source' is not one of 'gage' or 'radar'")
  }
  
  
  # Get closest rain source
  rainsource <- odbc::dbGetQuery(con, paste0("SELECT * FROM ", rainparams$smptable)) %>% 
    dplyr::filter(smp_id == target_id) %>%
    dplyr::pull(rainparams$uidvar)
  
  # Query event data
  event_query <- paste(paste0("SELECT * FROM ", rainparams$eventtable),
                       "WHERE", rainparams$uidvar, "= CAST('", rainsource, "' as int)",
                       "AND eventdatastart >= Date('", start_date, "')",
                       "AND eventdataend <= Date('", end_date + lubridate::days(1), "');")
  
  events <- DBI::dbGetQuery(con, event_query) 
}

# marsFetchMonitoringData --------------------------------

#' Fetch monitoring data for an SMP
#'
#' Returns a list with data frames: Rain Event Data, Rain Gage Data, and Level Data. All data is filtered based on available water level data.
#'   
#' @param con An ODBC connection to the MARS Analysis database returned by odbc::dbConnect
#' @param target_id vector of chr, SMP ID, where the user has requested data
#' @param ow_suffix vector of chr, OW suffixes corresponding to SMP IDs, where the user has requested data
#' @param source string, one of "gage" or "radar" to select rain gage events or radar rainfall events
#' @param start_date string, format: "YYYY-MM-DD", start of data request range
#' @param end_date string, format: "YYYY-MM-DD", end of data request range
#' @param sump_correct logical, TRUE if water level should be corrected for to account for sump depth
#' @param rain_events logical, TRUE if rain event data should be included in result
#' @param rainfall logical, TRUE if rainfall data should be included in result
#' @param level logical, TRUE if water level should be included in result
#' @param daylight_savings logical, WILL NOT WORK in current version 
#' @param debug logical, whether to print lookup times and outputs
#'
#' @return Output will be a list consisting of the follow elements filtered by level data:
#' 
#'     \item{Rain Event Data}{dataframe, output from \code{\link{marsFetchRainEventData}}}
#'     \item{Rain Gage Data}{dataframe, output from \code{\link{marsFetchRainfallData}}}
#'     \item{Level Data}{dataframe, output from \code{\link{marsFetchLevelData}}, plus rainfall_gage_event_uids}
#'     
#' @export
#' 
#' @seealso \code{\link{marsFetchRainfallData}}, \code{\link{marsFetchLevelData}}, \code{\link{marsFetchRainEventData}}
#' 


marsFetchMonitoringData <- function(con, target_id, ow_suffix, source = c("gage", "radar"), start_date, end_date,
                                    sump_correct = TRUE, rain_events = TRUE, rainfall = TRUE, level = TRUE, daylight_savings = FALSE,
                                    debug = FALSE){
  # Check database connection
  if(!DBI::dbIsValid(con)){
    stop("Argument 'con' is not an open database connection")
  }
  
  # Was a string supplied to source?
  if(isTRUE(all.equal(source, c("gage","radar")))){
    stop("No argument supplied for 'source'. Provide a string of either 'gage/gauge' or 'radar'")
  }
  
  #Are we working with gages or radarcells?
  if(source == "gage" | source == "gauge"){
    rainparams <- data.frame(smptable = "admin.tbl_smp_gage", eventtable = "data.tbl_gage_event", uidvar = "gage_uid", loctable = "admin.tbl_gage", eventuidvar = "gage_event_uid", stringsAsFactors=FALSE)
  } else if(source == "radar"){
    rainparams <- data.frame(smptable = "admin.tbl_smp_radar", eventtable = "data.tbl_radar_event", uidvar = "radar_uid", loctable = "admin.tbl_radar", eventuidvar = "radar_event_uid", stringsAsFactors=FALSE)
  } else { #Parameter is somehow invalid
    stop("Argument 'source' is not one of 'gage' or 'radar'")
  }
  
  # Initialize list
  results <- list()
  
  #Get closest gage
  #### We shoudl get rid fo these debug check points.
  if(debug){
    ptm <- proc.time()
  }
  # Find smp_gage_uid, smp_id, and gage_uid 
  smp_rain <- DBI::dbGetQuery(con, paste0("SELECT * FROM ", rainparams$smptable)) %>% dplyr::filter(smp_id %in% target_id)
  # Pull all OWs that are in the fieldwork app
  ow_validity <- DBI::dbGetQuery(con, "SELECT * FROM fieldwork.tbl_ow")
  # Join smp_rain with fieldwork data 
  #### This seems like a lot of additional information we don't need.
  ow_uid_gage <- ow_validity %>% dplyr::right_join(smp_rain, by = "smp_id")

  if(debug){
  print(paste0(source, "_lookup_time: ", (proc.time()-ptm)[3]))
  }

  #Set datetime date types
  start_date %<>% as.POSIXct(format = '%Y-%m-%d')
  end_date %<>% as.POSIXct(format = '%Y-%m-%d')

  if(debug){
    ptm <- print(paste("date_time conversion time:", (proc.time()-ptm)[3]))
  }

  # Add rain events
  if(rain_events == TRUE){
    for(i in 1:length(target_id)){
      results[["Rain Event Data step"]] <- marsFetchRainEventData(con, target_id[i], source, start_date[i], end_date[i])
      #### are start_date[i] and end_date[i] doing anything?
      start_date[i] <- ifelse(nrow(results$`Rain Event Data step`) > 1,
                              min(results[["Rain Event Data step"]]$eventdatastart),
                              start_date[i])
      end_date[i] <-  ifelse(nrow(results$`Rain Event Data step`) > 1,
                           max(results[["Rain Event Data step"]]$eventdatastart),
                           end_date[i])
      results[["Rain Event Data"]] <- dplyr::bind_rows(results[["Rain Event Data"]], results[["Rain Event Data step"]])
      results[["Rain Event Data step"]] <- NULL
    }
  }

  if(debug){
    ptm <- proc.time()
  }

  # 4 Add Rainfall
  if(rainfall == TRUE){
    for(i in 1:length(target_id)){
      results[["Rainfall Data step"]] <- marsFetchRainfallData(con, target_id[i], source, start_date[i], end_date[i], daylight_savings)
      #### The following start_date and end_date lines don't seem to be used/saved anywhere
      start_date[i] <- min(results[["Rainfall Data step"]]$dtime - lubridate::days(1), na.rm = TRUE)
      end_date[i] <- max(results[["Rainfall Data step"]]$dtime + lubridate::days(1), na.rm = TRUE)
      results[["Rainfall Data"]] <- dplyr::bind_rows(results[["Rainfall Data"]], results[["Rainfall Data step"]])
      results[["Rainfall Data step"]] <- NULL
    }
  }

  if(debug){
    print(paste("rainfall_lookup_time:", (proc.time()-ptm)[3]))
  }

  if(debug){
    ptm <- proc.time()
  }
  # Add level data
  if(level == TRUE){

    for(i in 1:length(target_id)){
      results[["Level Data step"]] <- marsFetchLevelData(con, target_id[i], ow_suffix[i], start_date[i], end_date[i], sump_correct) %>%
        dplyr::left_join(ow_uid_gage, by = "ow_uid") %>%  #join rain gage uid
        dplyr::select(dtime, level_ft, ow_uid, rainparams$uidvar) #remove extra columns
      if(rain_events == TRUE){
        level_data_step <- results[["Level Data step"]] #coerce to data frame

        results_event_data <- results[["Rain Event Data"]]

        level_data_step <- level_data_step[(!is.na(level_data_step$dtime)),]

        #select relevant columns from the results
        results_event_data %<>% dplyr::select(rainparams$eventuidvar, rainparams$uidvar, eventdatastart)

        #join by gage uid and by start time, to give a rainfall gage event uid at the start of each event
        level_data_step %<>% dplyr::left_join(results_event_data,
                                              by = c(rainparams$uidvar, "dtime" = "eventdatastart"))

        #carry event uids forward from event start to start of next event
        level_data_step[[rainparams$eventuidvar]] %<>% zoo::na.locf(na.rm = FALSE)

        #isolate event data needed for assuring that the rainfall gage event uid isn't assigned too far past the event end
        event_data <- results[["Rain Event Data"]]  %>%
          dplyr::select(rainparams$eventuidvar, eventdataend)

        #browser()

        #join event end times to level data by event uid
        #check that any dtime that has that event uid does not exceed the end time by greater than three days
        #if it does, reassign NA to event uid
        level_data_step %<>% dplyr::left_join(event_data, by = rainparams$eventuidvar) %>%
          dplyr::mutate(new_event_uid = dplyr::case_when(dtime >= (eventdataend + lubridate::days(4)) ~ NA_integer_,
                                                         TRUE ~ level_data_step[[rainparams$eventuidvar]])) %>%
          dplyr::select(-!!rainparams$eventuidvar, -eventdataend) %>%
          dplyr::rename(!!rainparams$eventuidvar := new_event_uid)


        results[["Level Data step"]] <- NULL
        results[["Level Data step"]] <- level_data_step
      }

      results[["Level Data"]] <- dplyr::bind_rows(results[["Level Data"]], results[["Level Data step"]])
      results[["Level Data step"]] <- NULL
    }
  }

  if(debug){
    print(paste("level_lookup_time:", (proc.time()-ptm)[3]))
  }

  if(debug){
    ptm <- proc.time()
  }

  # Filter all dataframes by available water level data if all datafames are requested
  if(rain_events == TRUE & rainfall == TRUE & level == TRUE){
  test_df_id <- dplyr::full_join(results[["Rainfall Data"]], results[["Level Data"]], by = c("dtime", rainparams$eventuidvar, rainparams$uidvar)) %>%
    dplyr::arrange(dtime) %>%
    dplyr::filter(!is.na(rainparams$eventuidvar) & is.na(level_ft)) %>%  #filter events that are not NA and and water level that is not NA
    dplyr::pull(rainparams$eventuidvar)
  results[["Level Data"]] %<>% dplyr::filter(!(rainparams$eventuidvar %in% test_df_id))
  # !!sym syntax comes from here: https://stackoverflow.com/questions/49786597/r-dplyr-filter-with-a-dynamic-variable-name
  # Because our variable name is a string, we need to make it evaluate as an R symbol instead of the string
  # choked during recent deployment, going to attempt to use rlang::sym() instead
  # filter out rain data for events that do have corresponding water level data
  results[["Rainfall Data"]] %<>% dplyr::filter((!!rlang::sym(rainparams$eventuidvar) %in% results[["Level Data"]][, rainparams$eventuidvar]))
  #fiter out rain events that no longer have corresponding rainfall data
  results[["Rain Event Data"]] %<>% dplyr::filter((!!rlang::sym(rainparams$eventuidvar) %in% results[["Rainfall Data"]][, rainparams$eventuidvar]))

  if(debug){
    print(paste("filtering_time:", (proc.time()-ptm)[3]))
    }
  }
  return(results)
}

# marsWriteInfiltrationData ------------------------------------------
#' Write Infiltration Performance Data to Database 
#' 
#' Receive vectors of infiltration rate and infiltration baseline data calculated with \code{\link{marsInfiltrationRate_inhr}},
#' gather data, and write to MARS Analysis Database performance_infiltration table. Replaces error codes with NAs and moves
#' them to a separate column, \code{error_lookup_uid}.
#' 
#' @param con Formal class PostgreSQL, a connection to the MARS Analysis database
#' @param baseline_ft vector, numeric, point that water level returns to at the end of an event (default ??)
#' @param infiltration_rate_inhr vector, numeric, infiltration rate (in/hr)
#' @param ow_uid vector, numeric observation well UID
#' @param radar_event_uid vector, numeric event UIDs for rain events from radar data
#' @param snapshot_uid vector, numeric
#' @param observed_simulated_lookup_uid vector, numeric, 1 if observed, 2 if simulated
#' 
#' @return \code{TRUE} if the write is succesful, or an error message if unsuccessful
#' 
#' @seealso \code{\link[pwdgsi]{marsWriteOvertoppingData}}, \code{\link{marsWritePercentStorageData}}
#' 
#' @export
#' 
#' @examples 
#' 
#' marsWriteInfiltrationData(con = mars, 
#'   infiltration_rate_inhr = summary_250$infiltration_rate_inhr,
#'   ow_uid = summary_250$ow_uid,
#'   radar_event_uid = summary_250$rainfall_gage_event_uid,
#'   snapshot_uid = summary_250$snapshot_uid,
#'   observed_simulated_lookup_uid = summary_250$observed_simulated_lookup_uid)
#' 
marsWriteInfiltrationData <- function(con, 
                                   infiltration_rate_inhr,
                                   baseline_ft = NA,
                                   ow_uid,
                                   radar_event_uid,
                                   snapshot_uid,
                                   observed_simulated_lookup_uid){

  #check that vectors are the same length
  if(!(length(infiltration_rate_inhr) == length(ow_uid) &
       length(infiltration_rate_inhr) == length(radar_event_uid) &
       length(infiltration_rate_inhr) == length(snapshot_uid))){
    stop("Vectors must be the same length")
  }
    
  #add vectors to dataframe
  summary_df <- data.frame(infiltration_rate_inhr,
                           baseline_ft,
                           ow_uid,
                           radar_event_uid,
                           snapshot_uid) %>% 
    dplyr::filter(observed_simulated_lookup_uid == 1)
  
  #select columns for dataframe
  saturatedperformance_df <- summary_df %>% 
    dplyr::mutate("error_lookup_uid" = ifelse(infiltration_rate_inhr <0,
                                              infiltration_rate_inhr, NA),
                  infiltration_rate_inhr = ifelse(!is.na(error_lookup_uid),
                                                     NA, infiltration_rate_inhr))
    
  #write to table, and return either TRUE (for a succesful write) or the error (upon failure)
  result <- tryCatch(odbc::dbWriteTable(con, DBI::SQL("metrics.tbl_infiltration"), saturatedperformance_df, overwrite = FALSE, append = TRUE), 
                     error = function(error_message){
                       return(error_message$message)
                     }
  )
  
  return(result)
}


# marsWritePercentStorageData ------------------------------------------
#' Write Percent of Storaged Used Data to Database 
#' 
#' Receive vectors of raw and relative percent storage data, calculated with \code{\link{marsPeakStorage_percent}},
#' gather data, and write to MARS Analysis performance_percentstorage table
#' 
#' @param con Formal class PostgreSQL, a connection to the MARS Analysis database
#' @param percentstorageused_peak vector, numeric, peak percent of storage (\%)
#' @param percentstorageused_relative vector, numeric, relative percent storage (\%) 
#' @param ow_uid vector, numeric observation well UID
#' @param radar_event_uid vector, numeric event UIDs for rain events from radar data
#' @param snapshot_uid vector, numeric
#' @param observed_simulated_lookup_uid vector, numeric, 1 if observed, 2 if simulated
#' 
#' @seealso \code{\link{marsWriteOvertoppingData}},  \code{\link{marsWriteDraindownData}}
#' 
#' @return \code{TRUE} if the write is successful, or an error message if unsuccessful
#' 
#' @export
#' 
#' @examples
#' 
#' marsWritePercentStorageData(con = mars, 
#'    percentstorageused_peak = summary_250$percentstorageused_peak,
#'    percentstorageused_relative = summary_250$percentstorageused_relative,
#'    ow_uid = summary_250$ow_uid,
#'    radar_event_uid = summary_250$rainfall_gage_event_uid,
#'    snapshot_uid = summary_250$snapshot_uid,
#'    observed_simulated_lookup_uid = summary_250$observed_simulated_lookup_uid)
#' 


marsWritePercentStorageData <- function(con, 
                                        percentstorageused_peak,
                                        percentstorageused_relative,
                                        ow_uid,
                                        radar_event_uid,
                                        snapshot_uid,
                                        observed_simulated_lookup_uid){
  
  #check that vectors are the same length
  if(!(length(percentstorageused_peak) == length(ow_uid) &
       length(percentstorageused_peak) == length(radar_event_uid) &
       length(percentstorageused_peak) == length(snapshot_uid) &
       length(percentstorageused_peak) == length(observed_simulated_lookup_uid) &
       length(percentstorageused_peak) == length(percentstorageused_relative))){
    stop("Vectors must be the same length")
  }
    
  #add vectors to dataframe
  summary_df <- data.frame(percentstorageused_peak,
                           percentstorageused_relative,
                           ow_uid,
                           radar_event_uid,
                           observed_simulated_lookup_uid,
                           snapshot_uid)
  
  #gather percent storage types in one column
  percentstorage_table <- tidyr::gather(summary_df, key = "relative", value = "percentstorage", percentstorageused_peak, percentstorageused_relative)
  
  #reassign raw and relative percent storage to FALSE and TRUE 
  percentstorage_table[percentstorage_table$relative == "percentstorageused_peak", "relative"] <- FALSE
  percentstorage_table[percentstorage_table$relative == "percentstorageused_relative", "relative"] <- TRUE
  
  #select columns for dataframe
  percentstorage_df <- percentstorage_table %>% 
    dplyr::select(percentstorage,
                  relative,
                  observed_simulated_lookup_uid,
                  ow_uid,
                  radar_event_uid,
                  snapshot_uid)

  
  #write to table, and return either TRUE (for a succesful write) or the error (upon failure)
  result <- tryCatch(odbc::dbWriteTable(con, DBI::SQL("metrics.tbl_percentstorage"), percentstorage_df, overwrite = FALSE, append = TRUE), 
                     error = function(error_message){
                       return(error_message$message)
                     }
  )
  
  return(result)
}


# marsWriteOvertoppingData ------------------------------------------
#' Write Overtopping Data to Database 
#' 
#' Receive vector of overtopping data, calculated with \code{\link{marsOvertoppingCheck_bool}},
#' and write to MARS Analysis Database performance_overtopping table
#' 
#' @param con Formal class PostgreSQL, a connection to the MARS Analysis database
#' @param overtopping vector, logical, TRUE if water level reaches max storage depth
#' @param ow_uid vector, numeric observation well UID
#' @param radar_event_uid vector, numeric event UIDs for rain events from radar data
#' @param snapshot_uid vector, numeric
#' @param observed_simulated_lookup_uid vector, numeric, 1 if observed, 2 if simulated
#' 
#' @return \code{TRUE} if the write is succesful, or an error message if unsuccessful
#' 
#' @seealso \code{\link{marsWritePercentStorageData}},  \code{\link{marsWriteDraindownData}}
#' 
#' @export
#' 
#' @examples
#' 
#' marsWriteOvertoppingData(con = mars, 
#'   overtopping = summary_250$overtop, 
#'   observed_simulated_lookup_uid = summary_250$observed_simulated_lookup_uid, 
#'   ow_uid = summary_250$ow_uid, 
#'   radar_event_uid = summary_250$rainfall_gage_event_uid,
#'   snapshot_uid = summary_250$snapshot_uid)
#' 
#' 
marsWriteOvertoppingData <- function(con, 
                                     overtopping, 
                                     observed_simulated_lookup_uid, 
                                     ow_uid, 
                                     radar_event_uid,
                                     snapshot_uid){
  
  #check that vectors are the same length
  if(!(length(overtopping) == length(ow_uid) &
       length(overtopping) == length(radar_event_uid) &
       length(overtopping) == length(snapshot_uid) &
       length(overtopping) == length(observed_simulated_lookup_uid))){
    stop("Vectors must be the same length")
  }
  
  #add vectors to dataframe
  overtopping_df <- data.frame(overtopping,
                               observed_simulated_lookup_uid,
                               ow_uid,
                               radar_event_uid,
                               snapshot_uid)
  
  #write to table, and return either TRUE (for a succesful write) or the error (upon failure)
  result <- tryCatch(odbc::dbWriteTable(con, DBI::SQL("metrics.tbl_overtopping"), overtopping_df, overwrite = FALSE, append = TRUE), 
                     error = function(error_message){
                       return(error_message$message)
                     }
  )
  
  return(result)
  
}

# marsWriteDraindownData ------------------------------------------
#' Write Draindown Data to Database 
#' 
#' Receive vector of draindown data, calculated with \code{\link{marsDraindown_hr}},
#' and write to MARS Analysis Database performance_draindwown table
#' 
#' @param con Formal class PostgreSQL, a connection to the MARS Analysis database
#' @param draindown_hr vector, numeric, draindown time (hr)
#' @param draindown_assessment_lookup_uid vector, int, assessment of draindown duration from \code{\link{marsDraindownAssessment}} 
#' @param ow_uid vector, numeric observation well UID
#' @param radar_event_uid vector, numeric
#' @param snapshot_uid vector, numeric
#' @param observed_simulated_lookup_uid vector, numeric, 1 if observed, 2 if simulated
#' 
#' @return \code{TRUE} if the write is succesful, or an error message if unsuccessful
#' 
#' @seealso \code{\link{marsWritePercentStorageData}}, \code{\link{marsWriteOvertoppingData}}, 
#' 
#' @export
#' 
#' @examples 
#' 
#' marsWriteDraindownData(con,
#'   draindown_hr = summary_250$draindown_hr,
#'   draindown_assessment_lookup_uid = summary_250$draindown_assessment_lookup_uid,
#'   ow_uid = summary_250$ow_uid,
#'   radar_event_uid = summary_250$rainfall_gage_event_uid, 
#'   snapshot_uid = summary_250$snapshot_uid,
#'   observed_simulated_lookup_uid = summary_250$observed_simulated_lookup_uid)
#' 
marsWriteDraindownData <- function(con,
                                   draindown_hr,
                                   draindown_assessment_lookup_uid,
                                   ow_uid,
                                   radar_event_uid, 
                                   snapshot_uid,
                                   observed_simulated_lookup_uid){
  
  #check that vectors are the same length
  if(!(length(draindown_hr) == length(ow_uid) &
       length(draindown_hr) == length(draindown_assessment_lookup_uid) &
       length(draindown_hr) == length(radar_event_uid) &
       length(draindown_hr) == length(snapshot_uid) &
       length(draindown_hr) == length(observed_simulated_lookup_uid))){
    stop("Vectors must be the same length")
  }  
  
  #add vectors to dataframe
  draindown_df <- data.frame(draindown_hr,
                             observed_simulated_lookup_uid,
                             ow_uid,
                             radar_event_uid,
                             snapshot_uid, 
                             draindown_assessment_lookup_uid)
  
  #select columns for dataframe
  draindown_df <- draindown_df %>% 
    dplyr::mutate("error_lookup_uid" = ifelse(draindown_hr <0,
                                              draindown_hr, NA),
                  draindown_hr = ifelse(!is.na(error_lookup_uid),
                                                  NA, draindown_hr))
  
  #write to table, and return either TRUE (for a succesful write) or the error (upon failure)
  result <- tryCatch(odbc::dbWriteTable(con, DBI::SQL("metrics.tbl_draindown"), draindown_df, overwrite = FALSE, append = TRUE), 
                     error = function(error_message){
                       return(error_message$message)
                     }
  )
  
  return(result)
  
}  
taywater/pwdgsi documentation built on June 14, 2025, 9 p.m.