Nothing
# metadata.R
#' Match metadata info with a vector of data timestamps
#'
#' @param data_timestamps Data timestamps, either character (YYYY-MM-DD HH:MM:SS) or \code{\link{POSIXct}}
#' @param start_dates Metadata measurement dates, either character
#' (YYYY-MM-DD) or \code{\link{POSIXct}}
#' @param start_times Metadata measurement start time entries, either character (HH:MM:SS) or \code{\link[lubridate]{period}}
#' @param obs_lengths Observation lengths in seconds, numeric; must be same
#' length as \code{start_dates}. This should include both the intended
#' measurement period as well as any dead band time at the beginning
#' @importFrom lubridate ymd_hms mdy_hms ymd mdy hms tz is.POSIXct is.period
#' @importFrom utils head tail
#' @importFrom stats na.omit
#' @return A numeric vector equal in length to \code{data_timestamps}, with
#' each entry indicating the metadata entry that should be used for that
#' observation. \code{NA} is returned if a timestamp has no match in the metadata (i.e., does not
#' fall within any window defined by the \code{start_dates}, \code{start_times},
#' and observation length parameters).
#' @note If \code{data_timestamps} or \code{start_dates} cannot be parsed as
#' YYYY-MM-DD, the preferred format, then MM/DD/YYYY (used by U.S. versions of
#' Microsoft Excel when saving CSV files, for example) will be tried.
#' @export
#' @examples
#' # Data timestamps
#' d_t <- c("2024-01-01 13:00:05", "2024-01-01 13:00:10",
#' "2024-01-01 13:05:05", "2024-01-01 13:10:00")
#' # Metadata start dates and times: two measurements, starting 5 minutes apart
#' s_d <- c("2024-01-01", "2024-01-01")
#' s_t <- c("13:00:00", "13:05:00")
#' ol <- c(60, 60) # Observation lengths
#' ffi_metadata_match(d_t, s_d, s_t, ol)
#' # Returns {1, 1, 2, NA} indicating that the first and second data timestamps
#' # correspond to metadata entry 1, the third to entry 2, and the fourth
#' # has no match
#'
#' # This generates an error because of overlapping timestamps:
#' try({
#' s_t <- c("13:00:00", "13:01:00")
#' ffi_metadata_match(d_t, s_d, s_t, ol)
#' })
ffi_metadata_match <- function(data_timestamps,
start_dates, start_times,
obs_lengths) {
# Input checks and convert to dates/timestamps if needed
stopifnot(length(start_dates) == length(start_times))
stopifnot(length(start_dates) == length(obs_lengths))
# The metadata dates and times shouldn't be empty
if(any(is.na(start_dates))) {
warning("One or more metadata dates are missing")
}
if(any(is.na(start_times))) {
warning("One or more metadata times are missing")
}
if(any(is.na(obs_lengths))) {
warning("One or more observation lengths are missing")
}
# Convert data timestasmps to POSIXct and check validity
d_ts <- data_timestamps
if(is.character(data_timestamps)) {
suppressWarnings({
data_timestamps <- ymd_hms(data_timestamps)
})
}
if(all(is.na(data_timestamps))) {
ffi_message("YYYY-MM-DD HH:MM:SS format failed for data_timestamps; trying MM/DD/YYYY HH:MM:SS")
data_timestamps <- mdy_hms(d_ts)
}
stopifnot(is.POSIXct(data_timestamps))
# Convert metadata dates and times to POSIXct and check validity
s_d <- start_dates
if(is.character(start_dates)) {
suppressWarnings({
start_dates <- ymd(start_dates, tz = tz(data_timestamps))
})
}
if(all(is.na(start_dates))) {
ffi_message("YYYY-MM-DD format failed for start_dates; trying MM/DD/YYYY")
start_dates <- mdy(s_d, tz = tz(data_timestamps))
}
stopifnot(is.POSIXct(start_dates))
# Convert metadata times to periods and check validity
if(is.character(start_times)) start_times <- hms(start_times)
stopifnot(is.period(start_times))
stopifnot(is.numeric(obs_lengths))
# Compute the metadata start and stop timestamps
start_timestamps <- start_dates + start_times
stopifnot(is.POSIXct((start_timestamps))) # should always be true!
stop_timestamps <- start_timestamps + obs_lengths
# Metadata records shouldn't overlap; we assume that metadata is from a
# single machine that generated the data
ord <- order(start_timestamps)
overlaps <- head(stop_timestamps[ord], -1) >= tail(start_timestamps[ord], -1)
if(any(overlaps, na.rm = TRUE)) {
stop("start_timestamps overlaps: ",
paste(which(overlaps) + 1, collapse = ", "))
}
# Compute data-metadata matches
matches <- rep(NA_real_, length(data_timestamps))
entries <- seq_along(start_timestamps)
for(i in entries) {
mtch <- data_timestamps >= start_timestamps[i] & data_timestamps < stop_timestamps[i]
matches[mtch] <- i
}
# Metadata rows with zero matches seems unexpected
no_matches <- base::setdiff(entries, unique(matches, na.omit(matches)))
lnm <- length(no_matches)
if(lnm) {
ffi_message(lnm, ifelse(lnm == 1, " entry", " entries"),
" had no timestamp matches!")
}
return(matches)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.