Nothing
#' Compute s vs. proportion-time-selected hulls table
#'
#' Identify the values of s that result in proportion p of hulls being time-selected
#'
#' @param lxy A \link{LoCoH-xy} object
#' @param id The name(s) of individuals to analyze
#' @param k Value for the k-method that will be used to create hulls for the sample points (and then used to find the proportion of time-selected hulls)
#' @param n Number of randomly selected points to use as samples (ignored if \code{samp.idx} is passed)
#' @param samp.idx A vector of indices of the points to use as samples
#' @param sinit The initial value of \emph{s} that will be used to start the iteration to find that value of \emph{s} that produces a proportion of time selected hulls >= ptsh.max
#' @param ptsh.target A vector of values 0..1 that will serve as the target \emph{ptsh} values. For each ptsh target, script will
#' try to find a value of \emph{s} that generates the targeted proportion of time selected hulls (within \code{ptsh.buf})
#' @param ptsh.max The highest value of ptsh for which a value of \emph{s} will be computed. It is generally recommended that this be less than 1.
#' @param ptsh.buf The level of accuracy or precison to which the found values of 's' return the targetted ptsh.
#' @param max.iter The maximum number of times the script will double sinit in an effort to find the value of s that produces ptsh.max
#' @param max.loops The maximum number of intermediate values of s the script will try when 'zooming' in on the target ptsh levels
#' @param max.reached What to do if the maximum number of iterations is reached, \code{warning} or \code{stop}
#' @param time.term The space-time transformation to use in the TSD distance metric: \code{'vmax'} for the maximum velocity
#' transformation (default) or \code{'dif'} for the diffusion transformation.
#' @param FNN.algorithm The algorithm to be used in the get.knnx() function in the package FNN
#' @param use.nn Whether to use saved nearest neighbor sets for the ptsh computations. T/F
#' @param plotme Whether to plot the ptsh vs. s curve. T/F
#' @param save Whether to save the ptsh-s table in the LoCoH-xy object. T/F
#' @param nn.add Whether to also identify and save nearest neighbor sets for all points for \emph{k}=\code{k} and the values of \emph{s} identified. T/F
#' @param use.pb.n The sample size (i.e., \code{n}) above which a progress bar will be displayed
#' @param ptsh.exists What to do if there is already a ptsh table in the LoCoH-xy object: \code{'replace'} or \code{'append'}
#' @param beep Whether to beep when done. T/F
#' @param status Display status messages. T/F
#'
#' @details When \emph{s}=0, nearest neighbors are space-selected (i.e., point-to-point distance is merely
#' Euclidean distance). For large values of \emph{s}, nearest neighbors are effectively time-selected (e.g.,
#' temporally contiguous). One approach to selecting a value of \emph{s} between these two ends of
#' the spectrum is to pick a \emph{s} value that returns an intermediate proportion of
#' time-selected-hulls representing the desired balance between space and time selection
#' (which in turn is shaped by the research question / objective). This function will find the
#' value(s) of \emph{s} that produce a desired proportion of time-selected-hulls using a randomly-selected
#' subset of points. A general rule of thumb is to pick a value of \emph{s} that results in a proportion of time-selected hulls
#' from 0.4 to 0.6.
#'
#' The script iteratively tries a variety of \emph{s} values until it finds one that produces the desired
#' proportion of time-selected hulls within a tolerance of \code{ptsh.buf}. If for example the vector of target
#' values \code{ptsh.target} includes 0.5, and \code{ptsh.buf = 0.01}, the script will try to find a
#' value of \emph{s} that results in a proportion of time-selected hulls between 0.49 and 0.51.
#'
#' @seealso \code{\link{lxy.nn.add}}
#'
#' @export
#' @import pbapply sp
lxy.ptsh.add <- function(lxy, id=NULL, k=10, n=200, samp.idx=NULL, sinit=0.005, ptsh.target=1:9/10, ptsh.max=0.98, ptsh.buf=0.01, max.iter=15,
max.loops=15, max.reached=c("warning", "stop")[1], time.term=c("vmax", "dif")[1], FNN.algorithm = c("cover_tree", "kd_tree", "VR", "CR")[2], use.nn=FALSE,
plotme=TRUE, save=TRUE, nn.add=FALSE, use.pb.n=200, ptsh.exists=c("replace", "append")[2], beep=FALSE, status=TRUE) {
## Future enhancements
## 1) Option to compute ptsh from saved nearest neighbor sets *** top priority ***
## 2) Option to compute ptsh for r and a methods
## 3) Option to compute ptsh using the diffusion TSD method
if (!inherits(lxy, "locoh.lxy")) stop("lxy should be of class \"locoh.lxy\"")
if (!is.null(lxy[["xys"]])) stop("Old data structure detected. Fix with lxy.repair()")
if (is.null(lxy[["pts"]][["dt"]])) stop("No time-stamps found in lxy")
if (use.nn && is.null(lxy[["nn"]])) stop("No nearest-neighbor sets found")
if (ptsh.max > 1) stop("ptsh.max should not be greater than 1")
if (min(ptsh.target) < 0 || max(ptsh.target) > ptsh.max) stop("ptsh.target should be between 0 and ptsh.max")
if (!FNN.algorithm %in% c("cover_tree", "kd_tree", "VR", "CR")) stop(paste(FNN.algorithm, " is not a valid value", sep=""))
if (!max.reached %in% c("warning", "stop")) stop("invalivd value for max.reached")
if (time.term != "vmax") stop("Sorry, that value for time.term not yet supported")
if (!ptsh.exists %in% c("replace", "append")) stop("Unknown value for ptsh.exists")
if (!plotme && !save && !nn.add) stop("Nothing to do. Set plotme, save, or nn.add to TRUE")
## Error check parameter values
if (is.null(id)) {
id <- levels(lxy[["pts"]][["id"]])
} else {
if (FALSE %in% (id %in% levels(lxy[["pts"]][["id"]]))) stop(paste("Can't find the following ID(s) in lxy: ", paste(id[!(id %in% levels(lxy[["pts"]][["id"]]))], collapse=", ", sep=""), sep=""))
}
#max.tries.getting.close.to.ptsh.target <- 5
lxy.dt.int <- as.numeric(lxy[["pts"]][["dt"]])
res <- list()
## Starting the first of the mother of all nested loops
for (idVal in id) {
if (status) cat("id:", idVal, "\n")
idVal.idx <- which(lxy[["pts"]][["id"]] == idVal)
if (save) {
if (is.null(lxy[["ptsh"]])) lxy[["ptsh"]] <- list()
if (is.null(lxy[["ptsh"]][[idVal]])) lxy[["ptsh"]][[idVal]] <- list()
}
start.time = Sys.time()
if (use.nn) {
## Get the indices of the nearest neighbor sets affiliated with this idVal
nn.idVal.idx <- which(sapply(lxy[["nn"]], function(x) x[["id"]]==idVal))
s.ptsh <- NULL
## Loop through these nn sets and construct a matrix of s and ptsh
if (status) {
cat("Computing ptsh for each saved nearest neighbor set \n")
pb <- txtProgressBar(min=0, max=length(nn.idVal.idx), style = 3)
}
for (i in 1:length(nn.idVal.idx)) {
if (status) setTxtProgressBar(pb, i)
idx <- nn.idVal.idx[[i]]
if (lxy[["nn"]][[idx]][["kmax"]] < k) stop("Insufficient number of nearest neighbors")
good.rows.idx <- lxy[["nn"]][[idx]][["nn.df"]][["nn.rank"]] <= k
nn.lst <- with(lxy[["nn"]][[idx]][["nn.df"]][good.rows.idx, c("pp.idx","nn.idx")], split(nn.idx, pp.idx))
ptsh.cur <- sum(sapply(nn.lst, function(x) max(diff(sort(match(x, idVal.idx))))) == 1) / length(nn.lst)
s.ptsh <- rbind(s.ptsh, c(lxy[["nn"]][[idx]][["s"]], ptsh.cur))
}
if (status) close(pb)
s.ptsh <- s.ptsh[order(s.ptsh[,2]), , drop=FALSE]
res.idVal <- list(id=idVal, samp.idx=idVal.idx, n=length(idVal.idx), k=k, target.ptsh=signif(s.ptsh[,2],2), target.s=s.ptsh[,1], s.ptsh=s.ptsh, time.taken=difftime(Sys.time(), start.time, units="secs"))
} else {
## We will find the ptsh using a random sample of the points
## Identify the indices of the points for this animal
xys.idVal <- coordinates(lxy[["pts"]])[idVal.idx,]
## Get the movement parameters
vmax <- lxy[["rw.params"]][lxy[["rw.params"]][["id"]]==idVal, "vmax"]
if (is.null(vmax)) stop("vmax not found, please run lxy.repair")
## Extract info about nearest neighbor sets
nn.info <- do.call(rbind, lapply(lxy[["nn"]], function(x) data.frame(id=x$id, s=x$s, kmax=x$kmax)))
## See if any of the existing nearest neighbor sets are for this id, s=0, and have enough k
nn.idx <- which(nn.info[["s"]]==0 & nn.info[["id"]]==idVal & nn.info[["kmax"]] >= k)
## If such a nearest neighbor set doesn't exist, create it now (fast)
if (length(nn.idx)==0) {
## Find nearest neighbors s=0 for the whole dataset (we'll need to do this anyway)
if (status) cat(" Finding ", k, " nearest neighbors for s=0...", sep=""); flush.console()
lxy <- lxy.nn.add(lxy, id=idVal, k=k, s=0, status=FALSE)
if (status) cat("Done\n")
nn.info <- do.call(rbind, lapply(lxy[["nn"]], function(x) data.frame(id=x$id, s=x$s, kmax=x$kmax)))
nn.idx <- which(nn.info[["s"]]==0 & nn.info[["id"]]==idVal & nn.info[["kmax"]] >= k)
if (length(nn.idx)==0) stop("Can't find the nearest neighbor set for s=0")
}
## Compute the ptsh for s=0
nn.df <- lxy[["nn"]][[nn.idx]][["nn.df"]][ lxy[["nn"]][[nn.idx]][["nn.df"]][["nn.rank"]] <=k, c("pp.idx","nn.idx", "nn.rank") ]
nn.lst <- split(nn.df[["nn.idx"]], nn.df[["pp.idx"]])
ptsh.cur <- sum(sapply(nn.lst, function(x) max(diff(sort(match(x, idVal.idx))))) == 1) / length(nn.lst)
## Create a matrix to hold the results
s.ptsh <- matrix(c(0, ptsh.cur), ncol=2)
colnames(s.ptsh) <- c("s", "ptsh")
## Pick a sample of points
if (is.null(samp.idx)) {
samp.idx.use <- sample(idVal.idx, min(n, length(idVal.idx)))
if (status) {
if (n < length(idVal.idx)) {
cat(" Randomly selected ", n, " of ", length(idVal.idx), " points \n", sep=""); flush.console()
} else {
cat(" Using all ", length(samp.idx.use), " points \n", sep=""); flush.console()
}
}
} else {
samp.idx.use <- samp.idx
if (FALSE %in% (lxy[["pts"]][["id"]][samp.idx.use] %in% idVal)) {
stop(paste("Invalid values for samp.idx for id=", idVal, sep=""))
}
#n <- length(samp.idx.use)
if (status) cat(" Using ", n, " samples from passed value of samp.idx \n", sep=""); flush.console()
}
if (status) cat(" Finding ", k, " nearest neighbors for ", n, " sample points \n", sep=""); flush.console()
use.pb <- (n > use.pb.n)
post.sequals.str <- if (use.pb) "\n" else ", "
if (status) {
pbo.orig <- pboptions(type = if (use.pb) "txt" else "none")
on.exit(pboptions(pbo.orig))
con.width <- getOption("width")
}
## First thing to do is to increase s until we ptsh >= ptsh.max
if (status) {
cat(" Finding s for ptsh=", ptsh.max, "\n", sep=""); flush.console()
if (!use.pb) cat(" ")
}
count.int <- 0
sVal.cur <- sinit / 2
sequals.len <- 2
while (ptsh.cur < ptsh.max) {
count.int <- count.int + 1
if (count.int > max.iter) {
if (status) {
cat(" - reached the maximum iterations and still haven't reached ptsh.max=", ptsh.max, "\n")
cat(" - resetting ptsh.max to ", ptsh.cur, "\n")
}
ptsh.max <- ptsh.cur
ptsh.target <- ptsh.target[ptsh.target <= ptsh.max]
break
}
sVal.cur <- sVal.cur * 2
## Prepare the feedback for the console
if (status) {
sequals.str <- paste("s=", sVal.cur, post.sequals.str, sep="")
if (!use.pb) {
sequals.len <- sequals.len + nchar(sequals.str)
if (sequals.len > con.width) {
cat("\n ")
sequals.len <- 2
}
}
cat(sequals.str); flush.console()
}
## Find nn for sVal.cur
nn.lst <- pblapply(samp.idx.use, function(i) idVal.idx[as.numeric(FNN::get.knnx(data=data.frame(x=xys.idVal[,1], y=xys.idVal[,2], z=tsd.zvals(delta.t=lxy.dt.int[idVal.idx] - lxy.dt.int[i], sVal=sVal.cur, type="vmax", d.bar=NULL, tau=NULL, vmax=vmax)),
query=cbind(coordinates(lxy[["pts"]])[i, , drop=F], 0), k=k+1, algorithm=FNN.algorithm)[["nn.index"]])])
ptsh.cur <- sum(sapply(nn.lst, function(x) max(diff(sort(match(x, idVal.idx))))) == 1) / length(nn.lst)
s.ptsh <- rbind(s.ptsh, c(sVal.cur, ptsh.cur))
}
if (!use.pb && status) cat("\n ")
for (ptshVal in sort(ptsh.target, decreasing=FALSE)) {
if (status) {
cat("Finding s for ptsh=", ptshVal, " (+/- ", ptsh.buf, ")\n", sep=""); flush.console()
if (!use.pb) cat(" ")
}
sequals.len <- 2
count.int <- 0
while (count.int <= max.loops && sum(s.ptsh[,"ptsh"] >= (ptshVal - ptsh.buf) & s.ptsh[,"ptsh"] <= (ptshVal + ptsh.buf))==0) {
## Increment counter, we're only going to do this so many times
count.int <- count.int + 1
## Want to trap if we get stuck
sVal.cur.old <- sVal.cur
## Sort s.ptsh by ptsh and then s, because we want to find the sVal above and below the target ptsh
s.ptsh <- s.ptsh[order(s.ptsh[,2], s.ptsh[,1]),]
base.idx <- findInterval(ptshVal, s.ptsh[,2])
if (base.idx == 0) {
## ptshVal is less than ptsh when s=0, so no chance of getting that level
break
} else if (base.idx == nrow(s.ptsh)) {
## ptshVal is more than ptsh.max, we won't get closer
break
} else {
sVal.cur <- mean(s.ptsh[0:1+base.idx,1])
if (isTRUE(all.equal(sVal.cur, sVal.cur.old))) {
int.converge.counter <- 0
## We have to use the isTRUE function because s is not an integer
while (TRUE %in% sapply(s.ptsh[,1], function(x) isTRUE(all.equal(sVal.cur, x)))) {
int.converge.counter <- int.converge.counter + 1
if (int.converge.counter > 6) {
if (status) cat("Failing to find a value of s which ptsh=", ptshVal, "\n")
break
}
sVal.dup.idx <- which(sapply(s.ptsh[,1], function(x) isTRUE(all.equal(sVal.cur, x))))
if (sVal.dup.idx==0 || sVal.dup.idx==nrow(s.ptsh)) {
break
} else if (s.ptsh[sVal.dup.idx, 2] > ptshVal) {
sVals.to.avg.idx <- c(base.idx, sVal.dup.idx)
} else {
sVals.to.avg.idx <- c(base.idx + 1, sVal.dup.idx)
}
sVal.cur <- mean(s.ptsh[sVals.to.avg.idx,1])
}
}
## Prepare the feedback for the console
if (status) {
sequals.str <- paste("s=", sVal.cur, post.sequals.str, sep="")
if (!use.pb) {
sequals.len <- sequals.len + nchar(sequals.str)
if (sequals.len > con.width) {
cat("\n ")
sequals.len <- 2
}
}
cat(sequals.str); flush.console()
}
## Find nn for sVal.cur
nn.lst <- pblapply(samp.idx.use, function(i) idVal.idx[as.numeric(FNN::get.knnx(data=data.frame(x=xys.idVal[,1], y=xys.idVal[,2], z=tsd.zvals(delta.t=lxy.dt.int[idVal.idx] - lxy.dt.int[i], sVal=sVal.cur, type="vmax", d.bar=NULL, tau=NULL, vmax=vmax)),
query=cbind(coordinates(lxy[["pts"]])[i, , drop=F], 0), k=k+1, algorithm=FNN.algorithm)[["nn.index"]])])
ptsh.cur <- sum(sapply(nn.lst, function(x) max(diff(sort(match(x, idVal.idx))))) == 1) / length(nn.lst)
s.ptsh <- rbind(s.ptsh, c(sVal.cur, ptsh.cur))
}
}
if (!use.pb && count.int > 0 && status) cat("\n ")
if (count.int > max.loops && sum(s.ptsh[,"ptsh"] >= (ptshVal - ptsh.buf) & s.ptsh[,"ptsh"] <= (ptshVal + ptsh.buf))==0) {
if (max.reached == "warning") {
warning(paste(idVal, ": count.int reached ", count.int, " but still could not find a value of s that worked for ptsh=", ptshVal, sep=""))
if (status) cat("count.int reached ", count.int, " but could not a value of s \n")
} else if (max.reached == "stop") {
stop(cw(paste(idVal, ": count.int=", count.int, " but still could not find a value of s that worked for ptsh=", ptshVal, sep="")))
}
}
}
if (!use.pb && status) cat("\n")
## Sort matrix by s
s.ptsh <- s.ptsh[order(s.ptsh[,1]),]
ptsh.target.all <- sort(unique(c(0, ptsh.target, ptsh.max)))
magic.s <- s.ptsh[FNN::get.knnx(s.ptsh[,2], query=ptsh.target.all, k=1)[["nn.index"]],1]
res.idVal <- list(id=idVal, samp.idx=samp.idx.use, n=n, k=k, target.ptsh=ptsh.target.all, target.s=magic.s, s.ptsh=s.ptsh, time.taken=difftime(Sys.time(), start.time, units="secs"))
}
if (status) cat("Done with ", idVal, "\n\n", sep="")
if (plotme) {
plot(s.ptsh, type="l", main=paste("s vs. ptsh\n", idVal, ", n=", length(samp.idx.use), sep=""))
points(s.ptsh, pch=20, cex=1)
abline(v=pretty(c(0,max(s.ptsh[,1])), n=15), lty=3, col="gray", lwd=0.1)
}
if (save) lxy[["ptsh"]][[idVal]][[length(lxy[["ptsh"]][[idVal]]) + (ptsh.exists == "append" || length(lxy[["ptsh"]][[idVal]])==0)]] <- res.idVal
if (nn.add) lxy <- lxy.nn.add(lxy, id=idVal, k=k, s=magic.s)
}
if (beep) {
flush.console()
for (i in 1:3) {
alarm()
Sys.sleep(0.8)
}
}
return(lxy)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.