##' Subtracts two time series by matching irregular time indexes
##'
##' Subtracts two time series by matching irregular time indexes. Can also be
##' used to align the indexes of a time series to a set of standard
##' time indexes.
##'
##' The format for the timestamps (in the vector names) can be virtually any
##' reasonable format, which will be converted to POSIXct by
##' \code{\link{formatDT}}.
##'
##' Suppose that \code{v1} is shorter than \code{v2}. For each index of the
##' \code{v1}, the \code{n.ind} indices of \code{v2} that are closest in time
##' to the \code{v1} index are identified and "matched" (by averaging them) to
##' the \code{v1} index. In the case of ties, for example, the nearest
##' \code{v2} indexes are both 5 seconds before and 5 seconds after the
##' \code{v1} index of interest, then the closest \code{v2} index in the past
##' (5 seconds before) is matched to the \code{v1} index.
##'
##' Hence, the average of the \code{n.ind} elements of \code{v2} that best
##' "match" (are closest in time to) the element of \code{v1} are subtracted
##' from \code{v1}, which creates a series of differences with the same length
##' (and timestamps) as \code{v1}.
##'
##' If instead, \code{v2} is shorter than \code{v1}, then the time stamps of
##' \code{v2} become the 'standard' to which the times of \code{v1} are
##' matched.
##'
##' If \code{v1} and \code{v2} are the same length, then the timestamps of
##' \code{v2} are matched to \code{v1} and the resulting vector of differences
##' has the same timestamps as \code{v1}.
##'
##' @export
##' @param v1 A time series vector: vector with dates or datetimes for names
##' which are non repeating and in chronological order
##'
##' @param v2 Another time series vector
##'
##' @param n.ind An integer >= 1 that indicates how many elements of the longer
##' of the two vectors will be averaged and matched to the closest timestamp of
##' the shorter vector. See details below.
##'
##' @param full \code{=TRUE} returns a data frame that shows in detail how the
##' two vectors were matched and the difference calculated
##'
##' @return if \code{full = FALSE} then the difference (after matching) of
##' \code{(v1 - v2)} is returned. Otherwise, a data frame is returned that
##' shows how the vectors were matched and the resulting difference vector.
##'
##' @author Landon Sego
##'
##' @keywords misc
##'
##' @examples
##' data(timeDiff.eg)
##'
##' # Show the objects
##' print(timeDiff.eg)
##'
##' # Extract the objects from the list for easier use in the example
##' sepList(timeDiff.eg)
##'
##' # Print warnings as they occur
##' op <- options(warn = 1)
##'
##' # Show various differences
##' timeDiff(x1, x2, full = TRUE)
##' timeDiff(x2.d, x1.d, full = TRUE)
##' timeDiff(x1, x1)
##'
##' options(op)
##'
##' ### If we need to average a time-series at 30 second invervals:
##'
##' # Create the vector that will be averaged, with time stamps occuring
##' # about every 10 seconds
##' v1.names <- seq(formatDT("2009-09-12 3:20:31")$dt.posix,
##' formatDT("2009-09-12 3:29:15")$dt.posix, by = 10)
##'
##' # Now jitter the times a bit and look at the time spacing
##' v1.names <- v1.names + round(rnorm(length(v1.names), sd = 1.5))
##' diff(v1.names)
##'
##' # Create the vector
##' v1 <- abs(rnorm(length(v1.names), mean = 7, sd = 3))
##' names(v1) <- v1.names
##'
##' # Now create a standard vector with values of 0 with time stamps every 30 seconds
##' standard.names <- seq(formatDT("2009-09-12 3:21:30")$dt.posix,
##' formatDT("2009-09-12 3:28:30")$dt.posix, by = 30)
##' standard <- double(length(standard.names))
##' names(standard) <- standard.names
##'
##' # Now average the v1 values by matching the 3 closest values to each standard time:
##' timeDiff(v1, standard, n.ind = 3, full = TRUE)
##' v1.avg <- timeDiff(v1, standard, n.ind = 3)
##'
##' # Check that every 3 obs were averaged
##' v1.avg.check <- tapply(v1[6:50], rep(1:15, each = 3), mean)
##' max(abs(v1.avg.check - v1.avg))
##'
##'
timeDiff <- function(v1, v2, n.ind = 1, full = FALSE) {
if ((n.ind < 1) | (n.ind%%1 != 0))
stop("'n.ind' must be an integer >= 1")
# Identify the shorter and longer of the sequences
shorter <- ifelse(length(v1) <= length(v2), "v1", "v2")
longer <- ifelse(length(v1) > length(v2), "v1", "v2")
# Sanity check
if (any(sort(c(shorter,longer)) != c("v1","v2")))
stop("timeDiff(), Failure 1: 'shorter' and 'longer' mapping failed\n")
# Get the full vectors
v.long <- get(longer)
v.short <- get(shorter)
# v1 and v2 must be time series (named vectors in chronological order, the names being the time stamps)
v.long.n <- names(v.long)
v.short.n <- names(v.short)
# Verify there are names
if (is.null(v.long.n) | is.null(v.short.n))
stop("timeDiff(), Failure 2: v1 and v2 must be named vectors, with date or datetimes as the names\n")
# Convert dates or datetimes to posix (this will verify the names are date times)
v.long.fdt <- formatDT(v.long.n)
v.short.fdt <- formatDT(v.short.n)
# If both are not date times, there should be dates only
ldtp <- is.null(v.long.fdt$dt.posix)
sdtp <- is.null(v.short.fdt$dt.posix)
if (ldtp & sdtp)
var <- "date.posix"
# Otherwise, datetimes should exist in both
else if (!ldtp & !sdtp)
var <- "dt.posix"
# Otherwise they are both not the same type
else
stop("timeDiff(), Failure 3: names of v1 and v2 must both be datetimes or both be dates only\n")
# Select out the datetime or date element from the list
v.long.t <- v.long.fdt[[var]]
v.short.t <- v.short.fdt[[var]]
# Dates or dateimes must be increasing and non-repeating
if (any(diff(v.long.t) < 0) | any(diff(v.short.t) < 0))
stop("timeDiff(), Failure 4: Names of v1 and v2 must be chronological (increasing) date times\n")
# Environment for recording selected indexes
subEnv <- new.env()
assign("index.vec", NULL, envir = subEnv)
assign("t.index.vec", NULL, envir = subEnv)
# Function to identify the index that is closest to time stamp
# As a convention, if there is a tie, then choose the closest one in the past (which would be positive)
id.index <- function(x) {
# Calculate the vector of differences, in seconds (if a datetime) or in days (if a date)
ddiff <- data.frame(names = 1:length(v.long.t),
v.diff = as.numeric(x - v.long.t),
a.v.diff = abs(as.numeric(x - v.long.t)))
# Sort first by the absolute difference, then by descending difference (so that postive values come first on ties)
ddiff <- ddiff[order(ddiff$a.v.diff, -ddiff$v.diff), ]
# ddiff <- sortDF(ddiff, ~ a.v.diff - v.diff)
# Will select 1:n.ind of the min.indexes--but we need to make sure that n.ind is not tied with others below it
if (any(ties <- ddiff[(n.ind+1):NROW(ddiff), "a.v.diff"] == ddiff[n.ind, "a.v.diff"])) {
if (sum(ties) > 1)
stop("id.index(): Only 1 tie is expected")
if (any(ties[-1]))
stop("id.index(): Only expecting a tie to occur on the n.ind+1 index")
# Tie values: One should be negative, one should be positive (should sum to near 0)
if (abs(sum(ddiff[c(n.ind, n.ind+1), "v.diff"])) > 1e-08)
stop("id.index(): Tie values did not sum to 0")
# Verify the n.ind value is positive
if (ddiff[n.ind, "v.diff"] < 0)
stop("id.index(): ddiff[n.ind, 'v.diff'] < 0")
} # if any ties
# Get numerical indexes of the date-times that will be returned (they should be consecutive)
indexes <- sort(ddiff[1:n.ind, "names"])
if (any(diff(indexes) != 1))
stop("id.index(): Selected indexes are not contiguous")
# For checking:
# pvar(x, indexes)
# pvar(names(v.long)[indexes])
# Accumulate the indexes
assign("index.vec", c(get("index.vec", envir = subEnv), indexes), envir = subEnv)
assign("t.index.vec", c(get("t.index.vec", envir = subEnv), paste(indexes, collapse = ",")), envir = subEnv)
# Return the mean of the times
return(mean(v.long[indexes]))
} # id.index
# Vector of matched names
matched.means <- unlist(tapply(v.short.t, v.short.n, id.index))
# Look for overlapping windows from n.ind being too large
selected.indexes <- get("index.vec", envir = subEnv)
text.selected.indexes <- get("t.index.vec", envir = subEnv)
if ((n.ind > 1) & any(diff(selected.indexes) <= 0))
warning(pvar(n.ind, verbose = FALSE), " may be too large. Averaging windows may be overlapping or values\n",
"of the longer vector may be matched (averaged) to more than one index of the shorter vector")
## cat("\n")
## pvar(check.indexes)
## pvar(diff(check.indexes))
## cat("\n")
# The names and lengths of the two vectors should be the same
if (length(matched.means) != length(v.short))
stop("timeDiff() Failure 5: length(matched.means) != length(v.short)\n")
# Sanity check
if (!all(names(matched.means) == v.short.n))
stop("id.index() in timeDiff(), Failure 3: names aren't matching\n")
# Now calculate the difference v1 - v2 (this works if v.short is v1)
tdiff <- v.short - matched.means
names(tdiff) <- v.short.n
# Change the sign if v.short is v2
if (shorter == "v2")
tdiff <- -1 * tdiff
# Create a dataframe which illustrates the matching
if (full) {
# Create the dataframe
show.match <- data.frame(v.short.t = v.short.t,
v.long.t = NA,
original.index = text.selected.indexes,
v.short = v.short,
new.v.long = matched.means,
diff = tdiff)
if (n.ind == 1)
show.match[,"v.long.t"] <- v.long.t[as.numeric(text.selected.indexes)]
# Rename the columns with names that match the original input objects
dv1 <- deparse(substitute(v1))
dv2 <- deparse(substitute(v2))
s.name <- ifelse(shorter == "v1", dv1, dv2)
l.name <- ifelse(longer == "v1", dv1, dv2)
i.name <- paste("original", l.name, "index", sep = ".")
t.name <- paste(stripExtension(i.name), "time", sep = ".")
if (n.ind > 1)
l.name <- paste(l.name, "average", sep = ".")
colnames(show.match) <- c(paste(s.name, "timestamps", sep = "."),
t.name,
i.name,
s.name,
l.name,
paste("diff", dv1, dv2, sep = "."))
rownames(show.match) <- 1:NROW(show.match)
return(show.match)
}
else
return(tdiff)
} # timeDiff
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.