R/clean_theta_data.R

Defines functions clean_theta_data

Documented in clean_theta_data

#' @title Automated Cleaning of ThetaProbe Data
#'
#'@description This function checks for and removes outliers in the
#' University of Southern Queensland National Centre for Engineering in
#' Agriculture's (NCEA) Theta Probe data of soil moisture for the USQ CCH Summer
#' Crops Pathology group, saving the results as a CSV file with daily values.
#'
#' @param csv_in A CSV file generated by \code{\link{get_soil_moisture}}, which
#' will be cleaned of outliers and aggregated to daily values
#'
#' @details This function will check the input file for outliers in five minute
#' data, filter them and generate a new file with daily moisture values that
#' will be written to disk in the user's specified location.
#'
#' @examples
#' \dontrun{
#' cleaned <- clean_theta_data(csv_in = "~/Soil_Moisture.csv")
#'
#' write.csv(cleaned, "~/Documents/cleaned_data.csv")
#' }
#' @export

clean_theta_data <- function(csv_in = NULL) {
  Date <- Probe <- Moisture <- NULL

  # import data ----------------------------------------------------------------
  observations <- stats::na.omit(as.data.frame(readr::read_csv(
    csv_in,
    col_names = c("Date", "Time", "Moisture", "Probe"),
    col_types = c("ctdc")
  )))

  # reformate date column ------------------------------------------------------
  observations[, 1] <-
    gsub(pattern = "/", replacement = "", observations[, 1])
  observations[, 1] <- lubridate::dmy(observations[, 1])

  # filter ouliers -------------------------------------------------------------
  observations$Filtered_Moisture <-
    pracma::hampel(observations[, 3], 4, t0 = 3)$y

  # aggregate to daily values --------------------------------------------------
  aggregated <- doBy::summaryBy(
    Moisture ~
      as.Date(observations$Date, origin = "1960-01-01") +
      as.character(observations$Probe),
    data = observations,
    FUN = mean
  )
  aggregated[, 3] <- round(aggregated[, 3], 2)

  # arrange by Probe then date and return tibble object ------------------------
  names(aggregated) <- c("Date", "Probe", "Moisture")

  aggregated <- dplyr::arrange(aggregated, Probe, Date)

  return(aggregated)
}
adamhsparks/ThetaProbe documentation built on Sept. 19, 2019, 3:20 a.m.