R/compute_monthly_mean.R
In neonSoilFlux: Compute Soil Carbon Fluxes for the National Ecological Observatory Network Sites

Documented in compute_monthly_mean

#' @title Function to compute monthly means for a given month of NEON data.

#' @author
#' John Zobitz \email{zobitz@augsburg.edu}

#' @description
#' Given a NEON measurement data frame calculate the monthly mean values across all horizontal and vertical locations. Based off code from Zoey Werbin.

#' @param NEON_data Required. Input vector of neon measurements for a month
#' @param time_horizon Required. How many unique days do we need each month?
#' @param position_columns Optional. Do we group by horizontalPosition, verticalPosition, and? Default is both. Added this option in case we just want to average across a given dimension.

#' @return A data frame that reports for each horiztonal and vertical position the computed mean and standard deviation from sampling (similar to a bootstrap method) as well as the sample mean and sample standard deviation


#' @keywords Currently none

#' @examples
#' \donttest{
#' # Download the NEON data directly - here this would be soil moisture
#' NEON_moist_30m_orig <- neonUtilities::loadByProduct(
#'   dpID = "DP1.00094.001",
#'   site = "WREF",
#'   startdate = "2022-06",
#'   enddate = "2022-06",
#'   timeIndex = "30",
#'   package = "expanded",
#'   check.size = FALSE,
#'   include.provisional = TRUE
#' )
#'
#'
#' # Then correct the swc
#' site_swc <- swc_correct(NEON_moist_30m_orig, "WREF","2022-06")
#'
#' # Select the columns and the time frequency
#' time_frequency <- "30_minute"
#' column_selectors <- c("Mean", "Minimum", "Maximum", "ExpUncert", "StdErMean")
#'
#'     swc <- site_swc |>
#'     purrr::pluck(paste0("SWS_",time_frequency)) |>
#'     dplyr::select(tidyselect::all_of(c("domainID","siteID",
#'     "horizontalPosition","verticalPosition","startDateTime","VSWCFinalQF")),
#'     tidyselect::matches(stringr::str_c("VSWC",column_selectors)))
#'
#'   # Determine a data frame of the different horizontal and vertical positions
#'   swc_positions <- site_swc |>
#'   purrr::pluck(paste0("sensor_positions_","00094"))
#'
#'   # Add on the positions for swc
#'   swc <- determine_position(swc_positions,swc)
#'   }



#' @export

#' @references
#' Zoey Werbin (@zoey-rw): original author https://github.com/zoey-rw/microbialForecasts/blob/caa7b1a8aa8a131a5ff9340f1562cd3a3cb6667b/data_construction/covariate_prep/soil_moisture/clean_NEON_sensor_moisture_data.r

compute_monthly_mean <- function(NEON_data,
                                 time_horizon = 15,
                                 position_columns = c("horizontalPosition", "verticalPosition")) {

  # changelog and author contributions / copyrights
  #   Zoey Werbin (@zoey-rw): original author https://github.com/zoey-rw/microbialForecasts/blob/caa7b1a8aa8a131a5ff9340f1562cd3a3cb6667b/data_construction/covariate_prep/soil_moisture/clean_NEON_sensor_moisture_data.r
  #   John Zobitz (2024-01-17)
  #     modified to also compute the depth (zOffset) from the max of all zOffsets that month
  #   John Zobitz (2024-01-10)
  #     original creation
  #     2024-04-08: update to get namespaces correct


  .data = NULL  # Appease R CMD Check

  out_mean <- NEON_data |>
    dplyr::filter(dplyr::if_any(tidyselect::ends_with("FinalQF"), ~ (.x == 0))) |>
    tidyr::drop_na() |>
    dplyr::mutate(day = lubridate::day(.data[["startDateTime"]])) |>
    dplyr::group_by(dplyr::across(tidyr::any_of(position_columns))) |>
    tidyr::nest() |>
    dplyr::mutate(month_stats = purrr::map(.x = .data[["data"]], .f = function(input_data) {
      zOffset <- input_data |>
        dplyr::count(zOffset) |>
        dplyr::slice_max(.data[["n"]]) |>
        dplyr::pull(zOffset) # Take the max tally of the different depths (if there are any)



      if (length(unique(input_data$day)) >= time_horizon) {
        col_names <- names(input_data)

        tsY <- dplyr::pull(input_data, var = which(stringr::str_detect(col_names, "[^StdEr]Mean$"))) # 30-min means
        uc <- dplyr::pull(input_data, var = which(stringr::str_detect(col_names, "ExpUncert$"))) / 2 # expanded measurement uncertainty at 95% confidence
        uNAT <- dplyr::pull(input_data, var = which(stringr::str_detect(col_names, "StdErMean$")))

        ub.sq <- (uc^2) - (uNAT^2)
        ub.sq[which(ub.sq < 0)] <- .0001 # negative values from rounded uncertainties...
        ub <- sqrt(ub.sq)


        nm <- 5000
        tsamp <- rep(NA, nm)
        tsamp_sd <- rep(NA, nm)
        for (i in 1:nm) {
          ub.samp <- sample(ub, 1) # sample 1 bias per month
          bias <- stats::rnorm(1, 0, 1) * ub.samp # assume that the bias is perfectly correlated from obs to obs

          uNAT.samp <- sample(uNAT, 1)
          tsim <- stats::rnorm(nm, tsY, uNAT.samp) ## sample random error per half hour - uses R recycling since nm < tsY

          tsamp[i] <- mean(tsim, na.rm = TRUE) + bias # output monthly mean
          tsamp_sd[i] <- stats::sd(tsim, na.rm = TRUE) # output monthly sd
        }
        comp_mean <- mean(tsamp)
        comp_sd <- mean(tsamp_sd)

        # just for sanity checking.
        simple_mean <- mean(tsY, na.rm = T)
        simple_sd <- stats::sd(tsY, na.rm = T)


        out_tibble <- tibble::tibble(comp_mean, comp_sd, simple_mean, simple_sd, zOffset)
      } else {
        out_tibble <- tibble::tibble(comp_mean = NA, comp_sd = NA, simple_mean = NA, simple_sd = NA, zOffset)
      }

      return(out_tibble)
    })) |>
    dplyr::select(-.data[["data"]]) |>
    tidyr::unnest(cols = c(.data[["month_stats"]])) |>
    dplyr::ungroup()

  return(out_mean)
}

Any scripts or data that you put into this service are public.

neonSoilFlux documentation built on Nov. 23, 2025, 1:06 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

neonSoilFlux
Compute Soil Carbon Fluxes for the National Ecological Observatory Network Sites

R/compute_monthly_mean.R
In neonSoilFlux: Compute Soil Carbon Fluxes for the National Ecological Observatory Network Sites

Defines functions compute_monthly_mean

Documented in compute_monthly_mean

Try the neonSoilFlux package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

neonSoilFlux Compute Soil Carbon Fluxes for the National Ecological Observatory Network Sites

R/compute_monthly_mean.R In neonSoilFlux: Compute Soil Carbon Fluxes for the National Ecological Observatory Network Sites

Defines functions compute_monthly_mean

Documented in compute_monthly_mean

Try the neonSoilFlux package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

neonSoilFlux
Compute Soil Carbon Fluxes for the National Ecological Observatory Network Sites

R/compute_monthly_mean.R
In neonSoilFlux: Compute Soil Carbon Fluxes for the National Ecological Observatory Network Sites