Nothing
#' Delete locations to harmonize the sampling frequency and time duration
#'
#' Standardize the sampling frequency and duration across individuals in a \link{LoCoH-xy} object by deleting points
#'
#' @param lxy A \link{LoCoH-xy} object
#' @param id The id value(s) to be harmonized
#' @param trim.ends Truncate points from either end of the timeline to achieve a common time window, T/F
#' @param dt.start The starting date-time that all individual trajectories will be truncated to. Should be a POSIXct or POSIXlt object. If \code{NULL}, the first date-time that
#' all points have in common will be used.
#' @param dt.end The end date-time that all individual trajectories will be truncated to. If \code{NULL}, the last date-time that
#' all points have in common will be used.
#' @param byfreq Delete points to achieve a common sampling frequency (\code{samp.freq}), T/F
#' @param samp.freq The common time step for the output (in seconds). Can also be set to \code{"lcm"}, in which case the least common multiple
#' of the median time step of each individual will be computed
#' @param lcm.round When \code{samp.freq="lcm"}, the median time step for each individual will be rounded to the nearest interval of \code{lcm.round}
#' (in seconds)
#' @param lcm.max.iter The maximum number of iterations to be used in the algorithm that finds the least common multiple of the median time steps
#' @param dt.int.round.to The proportion of the median sampling frequency that time intervals will be rounded
#' to when computing the frequency table of sampling intervals (no change is made to the time stamps)
#' @param tau.diff.max The maximum deviation from tau (the median delta.t of the entire dataset), expressed
#' as a proportion of tau, that time difference between two points must fall for the distance between those
#' two points to be included in the calculation of the median step length
#' @param status Show messages, T/F
#'
#' @note This function processes a \link{LoCoH-xy} object that contains movement data (for one for several individuals), and removes points
#' such that the output contains a fixed start and end date for each individual, as well as an approximately uniform sampling frequency (time step).
#'
#' Before using this function, you should clean your data of all abnormally short time intervals (e.g., bursts). See \code{\link{lxy.thin.bursts}}.
#'
#' If you know the time interval the data was *supposed* to be sampled (e.g., every 20 minutes), you should
#' pass that value for \code{samp.freq} (expressed in seconds). If \code{samp.freq="lcm"}, the function will
#' automatically compute the common time step for the individuals by taking the least common multiple
#' of the median time steps of each individual. You can deal with noise by rounding the median sampling interval to
#' the value of \code{lcm.round} (expressed in seconds).
#'
#' The function \code{\link{lxy.plot.freq}} can help you see the 'actual' sampling intervals in the data (set \code{by.date=TRUE}).
#'
#' Because this function deletes points, the nearest-neighbors lookup table of the \link{LoCoH-xy} object (if any) will be deleted.
#'
#' @return a \link{LoCoH-xy} object
#' @seealso \code{\link{xyt.lxy}}, \code{\link{lxy.plot.freq}}, \code{\link{lxy.thin.bursts}}
#'
#' @export
#' @import sp
# You can thin by dt.start / end, deltat, or both
lxy.thin.byfreq <- function (lxy, id=NULL, trim.ends=FALSE, dt.start=NULL, dt.end=NULL,
byfreq=FALSE, samp.freq="lcm", lcm.round=120, lcm.max.iter=300,
status=TRUE, dt.int.round.to=0.1, tau.diff.max=0.02) {
if (!inherits(lxy, "locoh.lxy")) stop("lxy should be of class \"locoh.lxy\"")
if (is.null(lxy[["pts"]][["dt"]])) stop("Can't harmonize the temporal frequency without date-time values")
if (!trim.ends && !byfreq) stop("Don't know what to do! Set trim.ends and/or byfreq to TRUE")
if (!is.null(dt.start)) {
if (!inherits(dt.start, "POSIXt")) stop("dt.start must be class POSIXct or POSIXlt")
}
if (!is.null(dt.end)) {
if (!inherits(dt.end, "POSIXt")) stop("dt.end must be class POSIXct or POSIXlt")
}
if (is.null(id)) {
id <- levels(lxy[["pts"]][["id"]])
} else {
if (FALSE %in% (id %in% levels(lxy[["pts"]][["id"]]))) stop("id value(s) not found")
}
idx.comb <- which(lxy[["pts"]][["id"]] %in% id)
idx.not.analyzed <- (1:nrow(lxy[["pts"]]))[-idx.comb]
## Take note of the original time zone so we can restore it later
tz.lxy <- attr(lxy[["pts"]][["dt"]], "tzone")
## Find the dt.start and dt.end
if (trim.ends) {
idx.comb.orig.len <- length(idx.comb)
## Make a list of the indices of each id
ids.idx.lst <- lapply(id, function(x) which(lxy[["pts"]][["id"]] == x))
if (is.null(dt.start)) {
dt.start.use <- as.POSIXct(max(as.numeric(sapply(ids.idx.lst, function(x) min(lxy[["pts"]][["dt"]][x])))), origin="1970-01-01", tz="UTC")
if (!identical(tz.lxy, "UTC")) dt.start.use <- as.POSIXct(format(dt.start.use, tz=tz.lxy), tz=tz.lxy)
} else {
dt.start.use <- dt.start
}
if (is.null(dt.end)) {
dt.end.use <- as.POSIXct(min(as.numeric(sapply(ids.idx.lst, function(x) max(lxy[["pts"]][["dt"]][x])))), origin="1970-01-01", tz="UTC")
if (!identical(tz.lxy, "UTC")) dt.end.use <- as.POSIXct(format(dt.end.use, tz=tz.lxy), tz=tz.lxy)
} else {
dt.end.use <- dt.end
}
## Error check that dt.start <- dt.end
if (dt.start.use >= dt.end.use) {
if (is.null(dt.start) || is.null(dt.end)) {
stop("No temporal overlap detected")
} else {
stop("dt.start must be earlier than dt.end")
}
}
## Keep only points that fall within the common date range
idx.comb <- idx.comb[lxy[["pts"]][["dt"]][idx.comb] >= dt.start.use & lxy[["pts"]][["dt"]][idx.comb] <= dt.end.use]
if (length(idx.comb)==0) stop("No points fall within those date ranges")
if (length(idx.comb) == idx.comb.orig.len) {
if (status) cat(" - no points removed from the start or end due because the start and end dates passed include everything \n")
} else {
if (status) cat(" - trimming date-times to ", format(dt.start.use, format="%Y-%m-%d"), " to ", format(dt.end.use, format="%Y-%m-%d"), "\n", sep="")
}
}
if (byfreq) {
## Error check samp.freq
if (is.null(samp.freq)) stop("Missing required parameter: samp.freq")
## Start a vector of the indices of points that will be kept in the lxy object returned
idx.keepers <- NULL
num.dups.lst <- list()
if (trim.ends) {
## Since we truncated some points, need to recompute rw.params (to get new taus)
rwp.df <- xyt.rw.params.dt.int(id=lxy[["pts"]][["id"]][idx.comb], xy=coordinates(lxy[["pts"]])[idx.comb,], dt=lxy[["pts"]][["dt"]][idx.comb], dt.int.round.to=dt.int.round.to, tau.diff.max=tau.diff.max)[["rw.params"]]
} else {
rwp.df <- lxy[["rw.params"]]
}
taus <- rwp.df[rwp.df[["id"]] %in% id, "time.step.median"]
## Compute the common samp.freq (if needed)
if (identical(samp.freq, "lcm")) {
if (lcm.round != 1) taus <- lcm.round * round(taus / lcm.round)
samp.freq <- least.common.multiple(taus, max.iter=lcm.max.iter, show.err=FALSE)
if (is.null(samp.freq)) {
stop(cw("Unable to find the least common multiple of the sampling frequencies. Make sure you've removed all extremely short-interval points (bursts), then either provide a numeric value for samp.freq, or increase lcm.round and/or lcm.max.iter \n", final.cr=F))
}
if (status) cat(" - thinning to achieve a sampling frequency of ", secs.fmt(samp.freq), "\n", sep="")
} else {
if (samp.freq < min(taus)) stop("samp.freq can not be less than the median time step")
}
## Loop through idVals
for (idVal in id) {
idx.idVal <- idx.comb[lxy[["pts"]][["id"]][idx.comb] == idVal]
dts.int <- as.numeric(lxy[["pts"]][["dt"]][idx.idVal])
## Define a series of target times based on samp.freq
dts.target <- seq(from=dts.int[1], to=dts.int[length(dts.int)], by=samp.freq)
if (dts.target[length(dts.target)] < dts.int[length(dts.int)]) {
dts.target <- c(dts.target, dts.target[length(dts.target)] + samp.freq)
}
## For each element of dts.target, identify the nearest element of dts.int
dts.target.nn.idx <- FNN::get.knnx(data=dts.int, query=dts.target, k=1)[["nn.index"]]
## Record the number of duplicates, which are most likely due to sampling gaps > samp.freq
num.dups.lst[[idVal]] <- sum(duplicated(dts.target.nn.idx))
## Add the indices of the closest points to the 'keepers' list
idx.keepers <- c(idx.keepers, idx.idVal[unique(dts.target.nn.idx)])
}
idx.comb <- idx.keepers
}
if (status && byfreq) {
cat(" - time periods without points (probably due to gaps in the sampling): \n")
print(formatdf4print(as.data.frame(num.dups.lst)), row.names=FALSE)
}
idx.return <- sort(c(idx.not.analyzed, idx.comb))
return(lxy.subset(lxy, idx=idx.return, dt.int.round.to=dt.int.round.to, tau.diff.max=tau.diff.max))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.