R/outside_checker.R

outside_checker <- function(df){
  library(tidyverse)

  canal_list <- c(28, 29, 132, 137, 147, 148, 263, 266,
                  277, 278, 282, 287, 288, 290, 293, 458,
                  459, 472, 476, 114, 118, 120, 145, 150, 152)

  cat("\n Checking data quality \n")

  ##Dataset
  print(paste("Dataset column is ... ",
              ifelse(
                all(df$dataset %in% c("EPA/Monroe County Demonstration Canal", "Islamorada Canal Monitoring")),
                yes = "All good", no = "There's a problem")))

  #sampling_trip
  print(paste("sampling_trip column is ... ",
              ifelse(
                all(str_length(df$sampling_trip) <= 7 & grepl(pattern = "_",df$sampling_trip)),
                yes = "All good", no = "There's a problem")))

  #Month
  print(paste("Month column is ... ",
              ifelse(
                !anyNA(df$month),
                yes = "All good", no = "There's a problem")))

  #canal
  print(paste("canal number column is ... ",
              ifelse(
                all(df$canal %in% canal_list),
                yes = "All good", no = "There's a problem")))

  #quadrat
  print(paste("quadrat column is ... ",
              ifelse(
                all(df$quadrat %in% c(0, 10, 50, 100, 250)),
                yes = "All good", no = "There's a problem")))

  #substrate
  print(paste("substrate column is ... ",
              ifelse(
                all(df$substrate %in% c("Muck", NA, "M", "MS", "Rub", "S", "Rock", "Wrack", "SM", "HH")),
                yes = "All good", no = "There's a problem")))


  #canopy_height_cm
  print(paste("canopy height column is ... ",
              ifelse(
                all((!is.na(df$canopy_height_cm) & df$canopy_height_cm < 120) | (is.na(df$canopy_height_cm))),
                yes = "All good", no = "There's a problem")))


  #BB's
  cat("\n Check all species for realistic BB scores \n")

  species <- df %>%
    select(-c(names(df)[1:9])) %>%
    names


  for (i in seq_along(species)){
    results = 0
    tested_species = species[i]
    results[i] = ifelse(all(df[[tested_species]] %in% c(0, 0.1, 0.5, 1, 2, 3, 4, 5, NA)),
                           yes = "All good", no = "There's a problem")
    print(paste(species[i], "column is ... ", results[i]))
      }

#row checks
cat("\n Check if all the required canals have been entered \n")

# all canals are entered

print(paste("These canals are not included in the data table:",
              ifelse(all(canal_list %in% df$canal),
                     "All canals entered",
                     paste(canal_list[!canal_list %in% df$canal], collapse = ", ")
                         )))

}
Halophila/CanalProjectQAQC documentation built on May 7, 2019, 2:58 p.m.