R/check_yield.R

Defines functions pa_check_yield

Documented in pa_check_yield

#'
#' @title Check the yield data before processing with the
#'   pa_yield function
#' @description  This function will check for red flags so
#'   the user can know of potential problems before using
#'   the pa_yield functions
#' @name pa_check_yield
#' @rdname pa_check_yield
#' @param input an sf object containing the input data from
#'   a yield monitor
#' @param algorithm for which algorithm should the function
#'   check the data. Different algorithms require different
#'   information to be present in the input data set.
#' @details This function will check the input yield data
#'   for any potential problems before the user runs the
#'   `pa_yield()` function. Ideally, this function warn the user
#'   of potential problems.
#' @return object of class check.yield
#' @author Caio dos Santos and Fernando Miguez
#' @export
#' @examples
#' \donttest{
#' extd.dir <- system.file("extdata", package = "pacu")
#' raw.yield <- sf::read_sf(file.path(extd.dir, '2012-basswood.shp'),
#'                          quiet = TRUE)
#' chk <- pa_check_yield(raw.yield)
#' chk
#' }
#'
pa_check_yield <- function(input,
                           algorithm = c('all', 'simple', 'ritas')){

  column <- NULL

  s.wrns <-  get("suppress.warnings", envir = pacu.options)
  s.msgs <-  get("suppress.messages", envir = pacu.options)
  res <- list()
  
  algorithm <- match.arg(algorithm)
   ## checking for columns and units
  checks <- list(simple = c('yield', 'moisture', 'interval'),
                 ritas =  c('mass', 'flow', 'moisture', 'interval', 'angle', 'width', 'distance'))

  if (algorithm != 'all')
    checks <- checks[algorithm]

  crt.crs <- sf::st_crs(input)
  if(is.na(crt.crs)) {
    sf::st_crs(input) <- 'epsg:4326'
  }

  center <- suppressWarnings(sf::st_coordinates(sf::st_centroid(sf::st_union(input))))
  lat <- center[2]
  lon <- center[1]
  area <- sf::st_area(.pa_field_boundary(sf::st_geometry(input)))
  area <- as.numeric(area)


  res[[1]] <- data.frame(category = c('# of points', 'Approx. area (ha)', 'Approx. area (ac)', 'Latitude', 'Longitude', 'CRS'),
                                              values = c(nrow(input), area / 1e4, area / 4046, lat, lon, crt.crs$input))

  names(res)[1] <- 'field.info'
  
  if (sf::st_is_longlat(input)){
    if(lon < -180 || lon > 180) warning("longitude should be between -180 and 180")
    if(lat < -90 || lat > 90) warning("latitude should be between -90 and 90")
  }

  message.list <- list()

  res[[2]] <- NA
  names(res)[2] <- c('check.simple')
  
  if ('simple' %in% names(checks)){

    alg <-  'simple'


    length.ans <- length(checks[[alg]])
    ans <- data.frame(variable = rep(NA, length.ans),
                      column = rep(NA, length.ans),
                      mean = rep(NA, length.ans),
                      units = rep(NA, length.ans))


    ans2 <- data.frame(variable = rep(NA, length.ans),
                       min = rep(NA, length.ans),
                       max = rep(NA, length.ans),
                       median = rep(NA, length.ans),
                       NAs = rep(NA, length.ans),
                       extreme = rep(NA, length.ans))

    for (i in 1:length.ans){
      var <- checks[[alg]][i]
      tgt.col <- .pa_get_variable_names(var)
      tgt.index <- which(tolower(names(input)) %in% tgt.col)

      if (var == 'interval' && length(tgt.index) < 1) {
        time.col <- .pa_get_variable_columns(input, 'time', TRUE)
        if (!is.null(time.col)){
          time <- input[[time.col]]
          interval <- try(.pa_time2interval(time), silent = TRUE)
          if (!inherits(interval, 'try-error')){
            interval <- .pa_enforce_units(interval, 'time')
            input$interval <- interval
            tgt.index <- dim(input)[2]
            if (!s.wrns)
              warning('The variable interval was not found, thus, it was estimated from the col ', time.col)
          }else{
            if (!s.wrns)
              warning('The variable interval was not found and the function could not estimate it from ', time.col)
          }
        }

      }

      if(length(tgt.index) < 1) {
        warnings.list <- paste('Could not find column for', var)
        ans[i, 'variable'] <- var
        ans[i, -1] <- '-'

        ans2[i, 'variable'] <- var
        ans2[i, -1] <- '-'

      }else{
        variable.column <- names(input)[tgt.index]
        variable.data <- input[[variable.column]]
        variable.units <-  suppressMessages(.pa_guess_units(variable.data, var, verbose = FALSE))
        ul <- units(variable.units)
        ul <- paste(ul$numerator, ul$denominator,
                    sep = ifelse(length(ul$denominator) > 0, '/', ''))

        ans[i, 'variable'] <- var
        ans[i, 'column'] <- variable.column
        ans[i, 'mean'] <- round(mean(variable.data, na.rm = TRUE), 1)
        ans[i, 'units'] <- ul


        ans2[i, 'variable'] <- var
        ans2[i, 'min'] <- min(variable.data, na.rm = TRUE)
        ans2[i, 'max'] <- max(variable.data, na.rm = TRUE)
        ans2[i, 'median'] <- stats::median(variable.data, na.rm = TRUE)
        ans2[i, 'NAs'] <- sum(is.na(variable.data))
        pq <- 3 * stats::sd(variable.data, na.rm = TRUE)
        extreme.values <- variable.data[variable.data < mean(variable.data, na.rm = TRUE) - pq |
                                          variable.data > mean(variable.data, na.rm = TRUE) + pq]
        ans2[i, 'extreme'] <- length(extreme.values)
      }

      ans$units <- gsub('bushel/acre', 'bu/ac', ans$units)
    }
    
    res[[2]] <- list(ans, ans2)
    names(res[[2]]) <- c('data', 'values')



    
  }

  res[[3]] <- NA
  names(res)[3] <- 'check.ritas'
  
  if ('ritas' %in% names(checks)){

    alg <- 'ritas'

    length.ans <- length(checks[[alg]])
    ans <- data.frame(variable = rep(NA, length.ans),
                      column = rep(NA, length.ans),
                      mean = rep(NA, length.ans),
                      units = rep(NA, length.ans))

    ans2 <- data.frame(variable = rep(NA, length.ans),
                       min = rep(NA, length.ans),
                       max = rep(NA, length.ans),
                       median = rep(NA, length.ans),
                       NAs = rep(NA, length.ans),
                       extreme = rep(NA, length.ans))

    for (i in 1:length.ans){
      var <- checks[[alg]][i]
      tgt.col <- .pa_get_variable_columns(input, var, verbose = FALSE)

      if (var == 'interval' && is.null(tgt.col)) {
        time.col <- .pa_get_variable_columns(input, 'time', verbose = FALSE)
        if (!is.null(time.col)){
          time <- input[[time.col]]
          interval <- try(.pa_time2interval(time), silent = TRUE)
          if (!inherits(interval, 'try-error')){
            interval <- .pa_enforce_units(interval, 'time')
            input$interval <- interval
            tgt.index <- dim(input)[2]
            if (!s.wrns)
              warning('The variable interval was not found, thus, it was estimated from the col ', time.col)
          }else{
            if (!s.wrns)
              warning('The variable interval was not found and the function could not estimate it from ', time.col)
          }
        }
        
      }

      if(is.null(tgt.col)) {
        ans[i, 'variable'] <- var
        ans[i, -1] <- '-'

        ans2[i, 'variable'] <- var
        ans2[i, -1] <- '-'
      }else{
        variable.column <- tgt.col
        variable.data <- input[[variable.column]]
        comparison.vector <- .pa_get_comparison_vector(input, var)
        variable.units <-  suppressMessages(.pa_guess_units(variable.data, var, comparison.vector, verbose = FALSE))
        ul <- units(variable.units)
        ul <- paste(ul$numerator, ul$denominator,
                    sep = ifelse(length(ul$denominator) > 0, '/', ''))

        ans[i, 'variable'] <- var
        ans[i, 'column'] <- variable.column
        ans[i, 'mean'] <- round(mean(variable.data, na.rm = TRUE), 1)
        ans[i, 'units'] <- ul

        ans2[i, 'variable'] <- var
        ans2[i, 'min'] <- min(variable.data, na.rm = TRUE)
        ans2[i, 'max'] <- max(variable.data, na.rm = TRUE)
        ans2[i, 'median'] <- stats::median(variable.data, na.rm = TRUE)
        ans2[i, 'NAs'] <- sum(is.na(variable.data))
        pq <- 3 * stats::sd(variable.data, na.rm = TRUE)
        extreme.values <- variable.data[variable.data < mean(variable.data, na.rm = TRUE) - pq |
                                          variable.data > mean(variable.data, na.rm = TRUE) + pq]
        ans2[i, 'extreme'] <- length(extreme.values)
      }
      ans$units <- gsub('bushel/acre', 'bu/ac', ans$units)
    }

    res[[3]] <- list(ans, ans2, NA)
    names(res[[3]]) <- c('data', 'values', 'overlap')
    
    median.overlap <- try(.pa_check_polygon_overlap(input), silent = TRUE)
    if (!inherits(median.overlap, 'try-error')){
      median.overlap <- 100 * (1 - median.overlap)
      median.overlap <- round(median.overlap, 1)
      res$check.ritas$overlap <- median.overlap
    }
  }
  
  class(res) <- 'check.yield'
  invisible(res)
}

Try the pacu package in your browser

Any scripts or data that you put into this service are public.

pacu documentation built on June 8, 2025, 10:44 a.m.