R/build-navigator.R

Defines functions navigator_datetime build_navigator_meta navigator_attrs_qc navigator_var_meta navigator_var_qc build_navigator_vars build_navigator_dims build_navigator_ctd bs_check_navigator bs_build_navigator

Documented in bs_build_navigator bs_check_navigator

#' Build data for Navigator
#'
#' For the purposes of [Navigator](https://navigator.oceansdata.ca/),
#' the BSRTO is treated as a single station at 74.605N/91.251W.
#' There is a single WMO code assigned to the station that is used for
#' the moorings that represent all three depths.
#'
#' @param built_dir The output directory used for
#'   [bs_build_realtime()] that contains the built data files.
#' @param out_dir Where the NetCDF files should be created.
#' @param out_file The location of the NetCDF generated by
#'   [bs_build_navigator()].
#'
#' @return A vector of files that were written.
#' @export
#'
#' @examples
#' \dontrun{
#' bs_build_realtime()
#' out_file <- bs_build_navigator()
#' bs_check_navigator(out_file)
#' }
#'
bs_build_navigator <- function(built_dir = ".", out_dir = built_dir) {

  ctd_aligned <- build_navigator_ctd(built_dir)

  dims <- build_navigator_dims(ctd_aligned)
  vars <- build_navigator_vars(dims)
  meta <- build_navigator_meta(
    date_start = min(ctd_aligned$date_time),
    date_end = max(ctd_aligned$date_time),
    date_update = Sys.time()
  )
  var_meta <- lapply(vars, navigator_var_meta)

  out_file <- file.path(out_dir, glue("{ meta$id }.nc"))
  cli::cat_line(glue("Writing '{ out_file }'"))
  if (!dir.exists(out_dir)) dir.create(out_dir, recursive = TRUE)

  nc <- ncdf4::nc_create(out_file, vars)
  on.exit(ncdf4::nc_close(nc))

  # write global meta
  for (att_name in names(meta)) {
    ncdf4::ncatt_put(nc, 0, att_name, meta[[att_name]])
  }

  # write variable meta
  for (var_name in names(vars)) {
    for (att_name in names(var_meta[[var_name]])) {
      ncdf4::ncatt_put(nc, var_name, att_name, var_meta[[c(var_name, att_name)]])
    }
  }

  # write variables
  for (var_name in names(vars)) {
    ncdf4::ncvar_put(nc, var_name, ctd_aligned[[var_name]])
  }

  out_file
}

#' @rdname bs_build_navigator
#' @export
bs_check_navigator <- function(out_file) {
  out_nc <- ncdf4::nc_open(out_file)
  on.exit(ncdf4::nc_close(out_nc))

  # check that the CTDs were assigned the right depth
  ctd_vars <- c("PRES", "TEMP", "DOXY", "PSAL")
  qc_vars <- paste0(ctd_vars, "_QC")

  df <- expand.grid(
    DEPTH = out_nc$dim$DEPTH$vals,
    TIME = out_nc$dim$TIME$vals
  )
  df$date_time <- as.POSIXct("1950-01-01 00:00:00", tz = "UTC") +
    as.difftime(df$TIME, units = "days")

  df[ctd_vars] <- lapply(
    ctd_vars,
    function(var) as.numeric(ncdf4::ncvar_get(out_nc, var))
  )
  df[qc_vars] <- lapply(
    qc_vars,
    function(var) as.integer(ncdf4::ncvar_get(out_nc, var))
  )

  if (requireNamespace("testthat", quietly = TRUE)) {
    testthat::expect_setequal(names(out_nc$dim), c("DEPTH", "TIME"))
    testthat::expect_setequal(out_nc$dim$DEPTH$vals, c(40, 60, 160))
    testthat::expect_true(
      all(round(diff(out_nc$dim$TIME$vals), 5) == round(2 / 24, 5))
    )
    testthat::expect_setequal(names(out_nc$var), c(ctd_vars, qc_vars))

    # check for DEPTH dimension values that are obviously wrong
    pres_depth_diff <- df$PRES - df$DEPTH
    testthat::expect_true(all(abs(pres_depth_diff) < 20, na.rm = TRUE))

    # check that stated time range matches the date/time of the file
    testthat::expect_identical(
      ncdf4::ncatt_get(out_nc, 0, "last_date_observation")$value,
      navigator_datetime(max(df$date_time))
    )
    testthat::expect_identical(
      ncdf4::ncatt_get(out_nc, 0, "time_coverage_start")$value,
      navigator_datetime(min(df$date_time))
    )
    testthat::expect_identical(
      ncdf4::ncatt_get(out_nc, 0, "time_coverage_end")$value,
      navigator_datetime(max(df$date_time))
    )

    # check that file doesn't claim to be generated in the future
    creation <- ncdf4::ncatt_get(out_nc, 0, "history")$value
    creation_date <- stringr::str_match(creation, "^(.*?)\\s*:\\s*Creation")
    testthat::expect_true(readr::parse_datetime(creation_date[, 2]) < Sys.time())
  } else {
    warning(
      "Can't run explicit tests on Navigator output without 'testthat' installed.",
      call. = FALSE, immediate. = TRUE
    )
  }

  tibble::as_tibble(df)
}

build_navigator_ctd <- function(built_dir = ".") {
  ctd_file <- file.path(built_dir, "ctd.csv")
  cli::cat_line(glue("Loading '{ ctd_file }'"))
  stopifnot(file.exists(ctd_file))

  ctd <- readr::read_csv(
    ctd_file,
    col_types = readr::cols(
      file = readr::col_character(),
      date_time = readr::col_datetime(format = ""),
      .default = readr::col_double()
    )
  )

  # The date_time reported is not aligned between moorings,
  # but is usually within a few seconds. To make a useful
  # multidimensional array, these are rounded by <30 seconds
  # (and checked to make sure this is the case).
  date_time_round <- round.POSIXt(ctd$date_time, "hours")
  date_time_diff <- as.numeric(date_time_round - ctd$date_time, units = "secs")
  stopifnot(
    max(abs(date_time_diff)) < 30
  )
  ctd$date_time <- as.POSIXct(date_time_round)

  # The below code makes a sequence of dates and times every
  # two hours covering the span of the input. Check to make
  # sure all of the observations fit this requirement.
  stopifnot(
    all(as.POSIXlt(ctd$date_time)$hour %in% seq(0, 22, by = 2)),
    all(as.POSIXlt(ctd$date_time)$minute == 0),
    all(as.POSIXlt(ctd$date_time)$sec == 0)
  )

  # Use moored depth label as the DEPTH dimension
  # The order of depth_label and date_time in expand.grid()
  # must match the order of the dimensions in the NetCDF
  # or the values will be misaligned
  date_time_range <- range(ctd$date_time)
  ctd_aligned <- expand.grid(
    depth_label = sort(unique(ctd$depth_label)),
    date_time = seq(date_time_range[1], date_time_range[2], by = "2 hours")
  ) %>%
    dplyr::left_join(ctd, by = c("date_time", "depth_label")) %>%
    dplyr::transmute(
      date_time = .data$date_time,
      DEPTH = .data$depth_label,
      TIME = as.numeric(
        .data$date_time - as.POSIXct("1950-01-01 00:00:00", tz = "UTC"),
        units = "days"
      ),
      PRES = .data$pressure,
      TEMP = .data$temperature,
      DOXY = .data$oxygen,
      PSAL = .data$salinity_calc,

      PRES_QC = dplyr::coalesce(.data$pressure_flag, bs_flag("not measured")),
      TEMP_QC = dplyr::coalesce(.data$temperature_flag, bs_flag("not measured")),
      DOXY_QC = dplyr::coalesce(.data$oxygen_flag, bs_flag("not measured")),
      PSAL_QC = dplyr::coalesce(.data$salinity_calc_flag, bs_flag("not measured"))
    )

  ctd_aligned
}

build_navigator_dims <- function(ctd_aligned) {
  dims <- list(
    ncdf4::ncdim_def(
      "DEPTH", units = "meters",
      vals = unique(sort(ctd_aligned$DEPTH))
    ),
    ncdf4::ncdim_def(
      "TIME", units = "meters",
      vals = unique(sort(ctd_aligned$TIME))
    )
  )

  names(dims) <- vapply(dims, "[[", "name", FUN.VALUE = character(1))
  dims
}

build_navigator_vars <- function(dims) {
  ctd_vars <- c("PRES", "TEMP", "DOXY", "PSAL")

  vars <- lapply(
    ctd_vars,
    ncdf4::ncvar_def,
    units = "",
    dim = list(dims$DEPTH, dims$TIME)
  )
  names(vars) <- ctd_vars

  vars_qc <- lapply(vars, navigator_var_qc)
  names(vars_qc) <- paste0(ctd_vars, "_QC")

  c(vars, vars_qc)
}

navigator_var_qc <- function(var) {
  ncdf4::ncvar_def(
    paste0(var$name, "_QC"),
    units = "",
    dim = var$dim,
    longname = paste(var$longname, "quality flag"),
    prec = "byte"
  )
}

navigator_var_meta <- function(var) {
  if (endsWith(var$name, "_QC")) {
    return(navigator_attrs_qc())
  }

  switch(
    var$name,
    PRES = list(
      units = "dbar",
      long_name = "Sea pressure",
      standard_name = "sea_water_pressure",
      axis = "Z",
      positive = "down"
    ),
    TEMP = list(
      units = "degrees_C",
      long_name = "Sea temperature",
      standard_name = "sea_water_temperature"
    ),
    PSAL = list(
      units = "0.001",
      long_name = "Practical salinity",
      standard_name = "sea_water_practical_salinity"
    ),
    DOXY = list(
      units = "micromole/kg",
      long_name = "Dissolved oxygen",
      standard_name = "moles_of_oxygen_per_unit_mass_in_sea_water"
    )
  )
}

navigator_attrs_qc <- function() {
  flags <- bs_flag_scheme()
  list(
    conventions = "BSRTO Internal Flag Scheme",
    flag_values = paste(flags$flag, collapse = " "),
    flag_meanings = paste(flags$flag_label, collapse = " | ")
  )
}

build_navigator_meta <- function(date_start, date_end, date_update = Sys.time()) {
  date_update <- navigator_datetime(date_update)
  station_lat <- "74.605"
  station_lon <- "-91.251"
  min_depth <- "40"
  max_depth <- "160"

  list(
    title = "Barrow Strait Real Time Observatory - In-situ moored observations",
    id = "BSRTO_12345",
    data_type = "Moored instrument",
    date_update = date_update,
    institution = "Bedford Institute of Oceanography (BIO)",
    institution_edmo_code = "1811",
    history = glue("{ date_update } : Creation"),
    references = "https://doi.org/10.1145/3148675.3152195",
    comment = bs_version_info(),
    netcdf_version = ncdf4::nc_version(),
    geospatial_lat_min = station_lat,
    geospatial_lat_max = station_lat,
    geospatial_lon_min = station_lon,
    geospatial_lon_max = station_lon,
    geospatial_vertical_min = min_depth,
    geospatial_vertical_max = max_depth,
    time_coverage_start = navigator_datetime(date_start),
    time_coverage_end = navigator_datetime(date_end),
    contact = "Clark.Richards@dfo-mpo.gc.ca",
    author = "Fisheries and Oceans Canada",
    data_assembly_center = "Bedford Institute of Oceanography",
    pi_name = "Clark Richards",
    distribution_statement = paste(
      "These data are public and free of charge. User assumes all risk for use of",
      "data. User must display citation in any publication or product using data.",
      "User must contact PI prior to any commercial use of data."
    ),
    citation = paste(
      "These data were collected and made freely available by Fisheries and",
      "Oceans Canada and its partner organizations."
    ),
    update_interval = "daily",
    last_date_observation = navigator_datetime(date_end),
    last_latitude_observation = station_lat,
    last_longitude_observation = station_lon
  )
}

navigator_datetime <- function(value) {
  format(
    as.POSIXct(value),
    "%Y-%m-%dT%H:%M:%OSZ",
    tz = "UTC",
    justify = "none"
  )
}
paleolimbot/bsrto documentation built on Dec. 12, 2021, 5:44 a.m.