R/compute_monthly_mean.R

Defines functions compute_monthly_mean

Documented in compute_monthly_mean

#' @title Function to compute monthly means for a given month of NEON data.

#' @author
#' John Zobitz \email{zobitz@augsburg.edu}

#' @description
#' Given a NEON measurement data frame calculate the monthly mean values across all horizontal and vertical locations. Based off code from Zoey Werbin.

#' @param NEON_data Required. Input vector of neon measurements for a month
#' @param position_columns Optional. Do we group by horizontalPosition, verticalPosition, and? Default is both. Added this option in case we just want to average across a given dimension.

#' @return A data frame that reports for each horiztonal and vertical position the computed mean and standard deviation from sampling (similar to a bootstrap method) as well as the sample mean and sample standard deviation


#' @keywords Currently none

#' @examples
#' \donttest{
#' # Download the NEON data directly - here this would be soil moisture
#' NEON_moist_30m_orig <- neonUtilities::loadByProduct(
#'   dpID = "DP1.00094.001",
#'   site = "WREF",
#'   startdate = "2022-06",
#'   enddate = "2022-06",
#'   timeIndex = "30",
#'   package = "expanded",
#'   check.size = FALSE,
#'   include.provisional = TRUE
#' )
#'
#'
#' # Then correct the swc
#' site_swc <- swc_correct(NEON_moist_30m_orig, "WREF","2022-06")
#'
#' # Select the columns and the time frequency
#' time_frequency <- "30_minute"
#' column_selectors <- c("Mean", "Minimum", "Maximum", "ExpUncert", "StdErMean")
#'
#'     swc <- site_swc |>
#'     purrr::pluck(paste0("SWS_",time_frequency)) |>
#'     dplyr::select(tidyselect::all_of(c("domainID","siteID",
#'     "horizontalPosition","verticalPosition","startDateTime","VSWCFinalQF")),
#'     tidyselect::matches(stringr::str_c("VSWC",column_selectors)))
#'
#'   # Determine a data frame of the different horizontal and vertical positions
#'   swc_positions <- site_swc |>
#'   purrr::pluck(paste0("sensor_positions_","00094"))
#'
#'   # Add on the positions for swc
#'   swc <- determine_position(swc_positions,swc)
#'   }



#' @export

#' @references
#' Zoey Werbin (@zoey-rw): original author https://github.com/zoey-rw/microbialForecasts/blob/caa7b1a8aa8a131a5ff9340f1562cd3a3cb6667b/data_construction/covariate_prep/soil_moisture/clean_NEON_sensor_moisture_data.r

compute_monthly_mean <- function(NEON_data, position_columns = c("horizontalPosition", "verticalPosition")) {

  # changelog and author contributions / copyrights
  #   Zoey Werbin (@zoey-rw): original author https://github.com/zoey-rw/microbialForecasts/blob/caa7b1a8aa8a131a5ff9340f1562cd3a3cb6667b/data_construction/covariate_prep/soil_moisture/clean_NEON_sensor_moisture_data.r
  #   John Zobitz (2024-01-17)
  #     modified to also compute the depth (zOffset) from the max of all zOffsets that month
  #   John Zobitz (2024-01-10)
  #     original creation
  #     2024-04-08: update to get namespaces correct


  .data = NULL  # Appease R CMD Check

  out_mean <- NEON_data |>
    dplyr::filter(dplyr::if_any(tidyselect::ends_with("FinalQF"), ~ (.x == 0))) |>
    tidyr::drop_na() |>
    dplyr::mutate(day = lubridate::day(.data[["startDateTime"]])) |>
    dplyr::group_by(dplyr::across(tidyr::any_of(position_columns))) |>
    tidyr::nest() |>
    dplyr::mutate(month_stats = purrr::map(.x = .data[["data"]], .f = function(input_data) {
      zOffset <- input_data |>
        dplyr::count(zOffset) |>
        dplyr::slice_max(.data[["n"]]) |>
        dplyr::pull(zOffset) # Take the max tally of the different depths (if there are any)



      if (length(unique(input_data$day)) >= 15) {
        col_names <- names(input_data)

        tsY <- dplyr::pull(input_data, var = which(stringr::str_detect(col_names, "[^StdEr]Mean$"))) # 30-min means
        uc <- dplyr::pull(input_data, var = which(stringr::str_detect(col_names, "ExpUncert$"))) / 2 # expanded measurement uncertainty at 95% confidence
        uNAT <- dplyr::pull(input_data, var = which(stringr::str_detect(col_names, "StdErMean$")))

        ub.sq <- (uc^2) - (uNAT^2)
        ub.sq[which(ub.sq < 0)] <- .0001 # negative values from rounded uncertainties...
        ub <- sqrt(ub.sq)


        nm <- 5000
        tsamp <- rep(NA, nm)
        tsamp_sd <- rep(NA, nm)
        for (i in 1:nm) {
          ub.samp <- sample(ub, 1) # sample 1 bias per month
          bias <- stats::rnorm(1, 0, 1) * ub.samp # assume that the bias is perfectly correlated from obs to obs

          uNAT.samp <- sample(uNAT, 1)
          tsim <- stats::rnorm(nm, tsY, uNAT.samp) ## sample random error per half hour - uses R recycling since nm < tsY

          tsamp[i] <- mean(tsim, na.rm = TRUE) + bias # output monthly mean
          tsamp_sd[i] <- stats::sd(tsim, na.rm = TRUE) # output monthly sd
        }
        comp_mean <- mean(tsamp)
        comp_sd <- mean(tsamp_sd)

        # just for sanity checking.
        simple_mean <- mean(tsY, na.rm = T)
        simple_sd <- stats::sd(tsY, na.rm = T)


        out_tibble <- tibble::tibble(comp_mean, comp_sd, simple_mean, simple_sd, zOffset)
      } else {
        out_tibble <- tibble::tibble(comp_mean = NA, comp_sd = NA, simple_mean = NA, simple_sd = NA, zOffset)
      }

      return(out_tibble)
    })) |>
    dplyr::select(-.data[["data"]]) |>
    tidyr::unnest(cols = c(.data[["month_stats"]])) |>
    dplyr::ungroup()

  return(out_mean)
}

Try the neonSoilFlux package in your browser

Any scripts or data that you put into this service are public.

neonSoilFlux documentation built on May 29, 2024, 4:06 a.m.