Nothing
##' Subtracts two time series by matching irregular time indexes
##'
##' Subtracts two time series by matching irregular time indexes. Can also be
##' used to align the indexes of a time series to a set of standard
##' time indexes.
##'
##' The format for the timestamps (in the vector names) can be virtually any
##' reasonable format, which will be converted to POSIXct by
##' \code{\link{formatDT}}.
##'
##' Suppose that \code{v1} is shorter than \code{v2}. For each index of the
##' \code{v1}, the \code{n.ind} indices of \code{v2} that are closest in time
##' to the \code{v1} index are identified and "matched" (by averaging them) to
##' the \code{v1} index. In the case of ties, for example, the nearest
##' \code{v2} indexes are both 5 seconds before and 5 seconds after the
##' \code{v1} index of interest, then the closest \code{v2} index in the past
##' (5 seconds before) is matched to the \code{v1} index.
##'
##' Hence, the average of the \code{n.ind} elements of \code{v2} that best
##' "match" (are closest in time to) the element of \code{v1} are subtracted
##' from \code{v1}, which creates a series of differences with the same length
##' (and timestamps) as \code{v1}.
##'
##' If instead, \code{v2} is shorter than \code{v1}, then the time stamps of
##' \code{v2} become the 'standard' to which the times of \code{v1} are
##' matched.
##'
##' If \code{v1} and \code{v2} are the same length, then the timestamps of
##' \code{v2} are matched to \code{v1} and the resulting vector of differences
##' has the same timestamps as \code{v1}.
##'
##' @export
##' @param v1 A time series vector: vector with dates or datetimes for names
##' which are non repeating and in chronological order
##'
##' @param v2 Another time series vector
##'
##' @param n.ind An integer >= 1 that indicates how many elements of the longer
##' of the two vectors will be averaged and matched to the closest timestamp of
##' the shorter vector. See details below.
##'
##' @param full \code{=TRUE} returns a data frame that shows in detail how the
##' two vectors were matched and the difference calculated
##'
##' @return if \code{full = FALSE} then the difference (after matching) of
##' \code{(v1 - v2)} is returned. Otherwise, a data frame is returned that
##' shows how the vectors were matched and the resulting difference vector.
##'
##' @author Landon Sego
##'
##' @keywords misc
##'
##' @examples
##' data(timeDiff.eg)
##'
##' # Show the objects
##' print(timeDiff.eg)
##'
##' # Extract the objects from the list for easier use in the example
##' sepList(timeDiff.eg)
##'
##' # Print warnings as they occur
##' op <- options(warn = 1)
##'
##' # Show various differences
##' timeDiff(x1, x2, full = TRUE)
##' timeDiff(x2.d, x1.d, full = TRUE)
##' timeDiff(x1, x1)
##'
##' options(op)
##'
##' ### If we need to average a time-series at 30 second invervals:
##'
##' # Create the vector that will be averaged, with time stamps occuring
##' # about every 10 seconds
##' v1.names <- seq(formatDT("2009-09-12 3:20:31")$dt.posix,
##' formatDT("2009-09-12 3:29:15")$dt.posix, by = 10)
##'
##' # Now jitter the times a bit and look at the time spacing
##' v1.names <- v1.names + round(rnorm(length(v1.names), sd = 1.5))
##' diff(v1.names)
##'
##' # Create the vector
##' v1 <- abs(rnorm(length(v1.names), mean = 7, sd = 3))
##' names(v1) <- v1.names
##'
##' # Now create a standard vector with values of 0 with time stamps every 30 seconds
##' standard.names <- seq(formatDT("2009-09-12 3:21:30")$dt.posix,
##' formatDT("2009-09-12 3:28:30")$dt.posix, by = 30)
##' standard <- double(length(standard.names))
##' names(standard) <- standard.names
##'
##' # Now average the v1 values by matching the 3 closest values to each standard time:
##' timeDiff(v1, standard, n.ind = 3, full = TRUE)
##' v1.avg <- timeDiff(v1, standard, n.ind = 3)
##'
##' # Check that every 3 obs were averaged
##' v1.avg.check <- tapply(v1[6:50], rep(1:15, each = 3), mean)
##' max(abs(v1.avg.check - v1.avg))
##'
##'
timeDiff <- function(v1, v2, n.ind = 1, full = FALSE) {
if ((n.ind < 1) | (n.ind%%1 != 0))
stop("'n.ind' must be an integer >= 1")
# Identify the shorter and longer of the sequences
shorter <- ifelse(length(v1) <= length(v2), "v1", "v2")
longer <- ifelse(length(v1) > length(v2), "v1", "v2")
# Sanity check
if (any(sort(c(shorter,longer)) != c("v1","v2")))
stop("timeDiff(), Failure 1: 'shorter' and 'longer' mapping failed\n")
# Get the full vectors
v.long <- get(longer)
v.short <- get(shorter)
# v1 and v2 must be time series (named vectors in chronological order, the names being the time stamps)
v.long.n <- names(v.long)
v.short.n <- names(v.short)
# Verify there are names
if (is.null(v.long.n) | is.null(v.short.n))
stop("timeDiff(), Failure 2: v1 and v2 must be named vectors, with date or datetimes as the names\n")
# Convert dates or datetimes to posix (this will verify the names are date times)
v.long.fdt <- formatDT(v.long.n)
v.short.fdt <- formatDT(v.short.n)
# If both are not date times, there should be dates only
ldtp <- is.null(v.long.fdt$dt.posix)
sdtp <- is.null(v.short.fdt$dt.posix)
if (ldtp & sdtp)
var <- "date.posix"
# Otherwise, datetimes should exist in both
else if (!ldtp & !sdtp)
var <- "dt.posix"
# Otherwise they are both not the same type
else
stop("timeDiff(), Failure 3: names of v1 and v2 must both be datetimes or both be dates only\n")
# Select out the datetime or date element from the list
v.long.t <- v.long.fdt[[var]]
v.short.t <- v.short.fdt[[var]]
# Dates or dateimes must be increasing and non-repeating
if (any(diff(v.long.t) < 0) | any(diff(v.short.t) < 0))
stop("timeDiff(), Failure 4: Names of v1 and v2 must be chronological (increasing) date times\n")
# Environment for recording selected indexes
subEnv <- new.env()
assign("index.vec", NULL, envir = subEnv)
assign("t.index.vec", NULL, envir = subEnv)
# Function to identify the index that is closest to time stamp
# As a convention, if there is a tie, then choose the closest one in the past (which would be positive)
id.index <- function(x) {
# Calculate the vector of differences, in seconds (if a datetime) or in days (if a date)
ddiff <- data.frame(names = 1:length(v.long.t),
v.diff = as.numeric(x - v.long.t),
a.v.diff = abs(as.numeric(x - v.long.t)))
# Sort first by the absolute difference, then by descending difference (so that postive values come first on ties)
ddiff <- ddiff[order(ddiff$a.v.diff, -ddiff$v.diff), ]
# ddiff <- sortDF(ddiff, ~ a.v.diff - v.diff)
# Will select 1:n.ind of the min.indexes--but we need to make sure that n.ind is not tied with others below it
if (any(ties <- ddiff[(n.ind+1):NROW(ddiff), "a.v.diff"] == ddiff[n.ind, "a.v.diff"])) {
if (sum(ties) > 1)
stop("id.index(): Only 1 tie is expected")
if (any(ties[-1]))
stop("id.index(): Only expecting a tie to occur on the n.ind+1 index")
# Tie values: One should be negative, one should be positive (should sum to near 0)
if (abs(sum(ddiff[c(n.ind, n.ind+1), "v.diff"])) > 1e-08)
stop("id.index(): Tie values did not sum to 0")
# Verify the n.ind value is positive
if (ddiff[n.ind, "v.diff"] < 0)
stop("id.index(): ddiff[n.ind, 'v.diff'] < 0")
} # if any ties
# Get numerical indexes of the date-times that will be returned (they should be consecutive)
indexes <- sort(ddiff[1:n.ind, "names"])
if (any(diff(indexes) != 1))
stop("id.index(): Selected indexes are not contiguous")
# For checking:
# pvar(x, indexes)
# pvar(names(v.long)[indexes])
# Accumulate the indexes
assign("index.vec", c(get("index.vec", envir = subEnv), indexes), envir = subEnv)
assign("t.index.vec", c(get("t.index.vec", envir = subEnv), paste(indexes, collapse = ",")), envir = subEnv)
# Return the mean of the times
return(mean(v.long[indexes]))
} # id.index
# Vector of matched names
matched.means <- unlist(tapply(v.short.t, v.short.n, id.index))
# Look for overlapping windows from n.ind being too large
selected.indexes <- get("index.vec", envir = subEnv)
text.selected.indexes <- get("t.index.vec", envir = subEnv)
if ((n.ind > 1) & any(diff(selected.indexes) <= 0))
warning(pvar(n.ind, verbose = FALSE), " may be too large. Averaging windows may be overlapping or values\n",
"of the longer vector may be matched (averaged) to more than one index of the shorter vector")
## cat("\n")
## pvar(check.indexes)
## pvar(diff(check.indexes))
## cat("\n")
# The names and lengths of the two vectors should be the same
if (length(matched.means) != length(v.short))
stop("timeDiff() Failure 5: length(matched.means) != length(v.short)\n")
# Sanity check
if (!all(names(matched.means) == v.short.n))
stop("id.index() in timeDiff(), Failure 3: names aren't matching\n")
# Now calculate the difference v1 - v2 (this works if v.short is v1)
tdiff <- v.short - matched.means
names(tdiff) <- v.short.n
# Change the sign if v.short is v2
if (shorter == "v2")
tdiff <- -1 * tdiff
# Create a dataframe which illustrates the matching
if (full) {
# Create the dataframe
show.match <- data.frame(v.short.t = v.short.t,
v.long.t = NA,
original.index = text.selected.indexes,
v.short = v.short,
new.v.long = matched.means,
diff = tdiff)
if (n.ind == 1)
show.match[,"v.long.t"] <- v.long.t[as.numeric(text.selected.indexes)]
# Rename the columns with names that match the original input objects
dv1 <- deparse(substitute(v1))
dv2 <- deparse(substitute(v2))
s.name <- ifelse(shorter == "v1", dv1, dv2)
l.name <- ifelse(longer == "v1", dv1, dv2)
i.name <- paste("original", l.name, "index", sep = ".")
t.name <- paste(stripExtension(i.name), "time", sep = ".")
if (n.ind > 1)
l.name <- paste(l.name, "average", sep = ".")
colnames(show.match) <- c(paste(s.name, "timestamps", sep = "."),
t.name,
i.name,
s.name,
l.name,
paste("diff", dv1, dv2, sep = "."))
rownames(show.match) <- 1:NROW(show.match)
return(show.match)
}
else
return(tdiff)
} # timeDiff
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.