R/vis-interpolate_path.r

Defines functions interpolate_path

Documented in interpolate_path

#' Interpolate new positions within a spatiotemporal path data 
#'
#' Interpolate new positions within a spatiotemporal path data set 
#'   (e.g., detections of tagged fish) at regularly-spaced time intervals   
#' 	 using linear or non-linear interpolation.
#' 
#' @param det An object of class \code{glatos_detections} or data frame
#'   containing spatiotemporal data with at least 4 columns containing
#'   'animal_id', 'detection_timestamp_utc', 'deploy_long', and
#'   'deploy_lat' columns.
#'
#' @param start_time specify the first time bin for interpolated data.
#'     If not supplied, default is first timestamp in the input data
#'     set.  Must be a character string that can be coerced to
#'     'POSIXct' or an object of class 'POSIXct'.  If character string
#'     is supplied, timezone is automatically set to UTC.
#'
#' @param out_class Return results as a data.table or tibble.  Default
#'   returns results as data.frame.  Accepts `data.table` or `tibble`.
#' 
#' @param int_time_stamp The time step size (in seconds) of interpolated 
#'   positions. Default is 86400 (one day).
#'   
#' @param trans An optional transition matrix with the "cost" of
#'   moving across each cell within the map extent. Must be of class
#'   \code{TransitionLayer}. A transition layer may be
#'   created from a polygon shapefile using \link{make_transition}.
#'   
#' @param lnl_thresh A numeric threshold for determining if linear or
#'   non-linear interpolation shortest path will be used.
#'   
#' @param show_progress Logical. Progress bar and status messages will be 
#'  shown if TRUE (default) and not shown if FALSE.
#'
#' @details Non-linear interpolation uses the \code{gdistance} package
#'   to find the shortest pathway between two locations (i.e.,
#'   receivers) that avoid 'impossible' movements (e.g., over land for
#'   fish). The shortest non-linear path between two locations is
#'   calculated using a transition matrix layer that represents the
#'   'cost' of an animal moving between adjacent grid cells.  The
#'   transition matrix layer (see \link{gdistance}) is created from
#'   a polygon shapefile using \link{make_transition} or from a
#'   \code{RasterLayer} object using \link[gdistance]{transition}. In
#'   \code{make_transition}, each cell in the output transition layer
#'   is coded as water (1) or land (0) to represent possible (1) and
#'   impossible (0) movement paths.
#' 
#' @details Linear interpolation is used for all points when
#'   \code{trans} is not supplied.  When \code{trans} is supplied,
#'   then interpolation method is determined for each pair of
#'   sequential observed detections. For example, linear interpolation
#'   will be used if the two geographical positions are exactly the
#'   same and when the ratio (linear distance:non-linear distance)
#'   between two positions is less than \code{lnl_thresh}.  Non-linear
#'   interpolation will be used when ratio is greater than
#'   \code{lnl_thresh}.  When the ratio of linear distance to
#'   non-linear distance is greater than \code{lnl_thresh}, then the
#'   distance of the non-linear path needed to avoid land is greater
#'   than the linear path that crosses land.  \code{lnl_thresh} can be
#'   used to control whether non-linear or linear interpolation is
#'   used for all points. For example, non-linear interpolation will
#'   be used for all points when \code{lnl_thresh} > 1 and linear
#'   interpolation will be used for all points when \code{lnl_thresh}
#'   = 0.
#'
#' @return A dataframe with animal_id, bin_timestamp,
#'   latitude, longitude, and record_type.
#'
#'
#' @author Todd Hayden, Tom Binder, Chris Holbrook
#' 
#' @examples
#' 
#' #--------------------------------------------------
#' # EXAMPLE #1 - simple interpolate among lakes
#'   
#' library(sp) #for loading greatLakesPoly because spatial object   
#'   
#' # get polygon of the Great Lakes 
#' data(greatLakesPoly) #glatos example data; a SpatialPolygonsDataFrame
#' plot(greatLakesPoly, xlim = c(-92, -76))
#'   
#' # make sample detections data frame
#' pos <- data.frame(
#'    animal_id=1,
#'    deploy_long=c(-87,-82.5, -78),
#'    deploy_lat=c(44, 44.5, 43.5),
#'    detection_timestamp_utc=as.POSIXct(c("2000-01-01 00:00",
#'      "2000-02-01 00:00", "2000-03-01 00:00"), tz = "UTC"))
#' 
#' #add to plot
#' points(deploy_lat ~ deploy_long, data = pos, pch = 20, cex = 2, col = 'red')
#' 
#' # interpolate path using linear method
#' path1 <- interpolate_path(pos)
#' nrow(path1) #now 61 points
#' sum(path1$record_type == "interpolated") #58 interpolated points
#'  
#' #add linear path to plot
#' points(latitude ~ longitude, data = path1, pch = 20, cex = 0.8, col = 'blue')
#' 
#' # load a transition matrix of Great Lakes
#' # NOTE: This is a LOW RESOLUTION TransitionLayer suitable only for 
#' #       coarse/large scale interpolation only. Most realistic uses
#' #       will need to create a TransitionLayer; see ?make_transition.
#' data(greatLakesTrLayer) #glatos example data; a TransitionLayer
#'  
#' # interpolate path using non-linear method (requires 'trans')
#' path2 <- interpolate_path(pos, trans = greatLakesTrLayer)
#' 
#' # add non-linear path to plot
#' points(latitude ~ longitude, data = path2, pch = 20, cex = 1, 
#'        col = 'green')
#'  
#' # can also force linear-interpolation with lnlThresh = 0
#' path3 <- interpolate_path(pos, trans = greatLakesTrLayer, lnl_thresh = 0)
#' 
#' # add new linear path to plot
#' points(latitude ~ longitude, data = path3, pch = 20, cex = 1, 
#'           col = 'magenta')
#'           
#' #--------------------------------------------------
#' # EXAMPLE #2 - walleye in western Lake Erie
#' \dontrun{
#'
#' library(sp) #for loading greatLakesPoly
#' library(raster) #for raster manipulation (e.g., crop)
#'
#' # get example walleye detection data
#' det_file <- system.file("extdata", "walleye_detections.csv",
#'                         package = "glatos")
#' det <- read_glatos_detections(det_file)
#' 
#' # take a look
#' head(det)
#' 
#' # extract one fish and subset date
#' det <- det[det$animal_id == 22 & 
#'            det$detection_timestamp_utc > as.POSIXct("2012-04-08") &
#'            det$detection_timestamp_utc < as.POSIXct("2013-04-15") , ]
#' 
#' # get polygon of the Great Lakes 
#' data(greatLakesPoly) #glatos example data; a SpatialPolygonsDataFrame
#' 
#' # crop polygon to western Lake Erie
#' maumee <-  crop(greatLakesPoly, extent(-83.7, -82.5, 41.3, 42.4))
#' plot(maumee, col = "grey")
#' points(deploy_lat ~ deploy_long, data = det, pch = 20, col = "red", 
#'   xlim = c(-83.7, -80))
#' 
#' #make transition layer object
#' # Note: using make_transition2 here for simplicity, but 
#' #       make_transition is generally preferred for real application  
#' #       if your system can run it see ?make_transition
#' tran <- make_transition(maumee, res = c(0.1, 0.1))
#' 
#' plot(tran$rast, xlim = c(-83.7, -82.0), ylim = c(41.3, 42.7))
#' plot(maumee, add = TRUE)
#' 
#' # not high enough resolution- bump up resolution
#' tran1 <- make_transition(maumee, res = c(0.001, 0.001))
#' 
#' # plot to check resolution- much better
#' plot(tran1$rast, xlim = c(-83.7, -82.0), ylim = c(41.3, 42.7))
#' plot(maumee, add = TRUE)
#' 
#' 
#' # add fish detections to make sure they are "on the map"
#' # plot unique values only for simplicity
#' foo <- unique(det[, c("deploy_lat", "deploy_long")]) 
#' points(foo$deploy_long, foo$deploy_lat, pch = 20, col = "red")
#' 
#' # call with "transition matrix" (non-linear interpolation), other options
#' # note that it is quite a bit slower due than linear interpolation
#' pos2 <- interpolate_path(det, trans = tran1$transition, out_class = "data.table")
#'
#' plot(maumee, col = "grey")
#' points(latitude ~ longitude, data = pos2, pch=20, col='red', cex=0.5)
#' 
#' } 
#' 
#' @export 
                         
interpolate_path <- function(det, trans = NULL, start_time = NULL,
                             int_time_stamp = 86400, lnl_thresh = 0.9,
                             out_class = NULL, show_progress = TRUE){

  # stop if out_class is not NULL, data.table, or tibble
  if(!is.null(out_class)){
    if( !(out_class %in% c("data.table", "tibble"))) {stop('out_class is not a "data.table" or "tibble"')}}
   
  # check to see that trans is a transition layer or transition stack
  if(!is.null(trans) & 
       inherits(trans, c("TransitionLayer", "TransitionStack")) == FALSE){
    stop(paste0("Supplied object for 'trans' argument is not class ", 
                "TransitionLayer or TransitionStack."),
         call. = FALSE)
  }
 
  # check start_time
  if(is.null(start_time)){
    start_time <- min(det$detection_timestamp_utc)
  }       
  if(is.na(start_time) & length(start_time) > 0){
    stop("start_time cannot be coerced to 'POSIXct' or 'POSIXt' class")
  }

  if(is.character(start_time)){
    start_time <- as.POSIXct(start_time, tz = "UTC")
  }

  # make sure start_time < largest timestamp in dataset
  if(start_time > max(det$detection_timestamp_utc)){
    stop("start_time is larger than last detection.  No data to interpolate!", call. = FALSE)}
  
  # make copy of detections for function
  dtc <- data.table::as.data.table(det)
    
  # subset only columns for function and rows >= start_time:
  dtc <- dtc[detection_timestamp_utc >= start_time, c("animal_id",
                                                      "detection_timestamp_utc",
                                                      "deploy_lat", 
                                                      "deploy_long")]
 
  dtc[, record_type := "detection"]
  
  # count number of rows- single observations are not interpolated
  dtc[, num_rows := nrow(.SD), by = animal_id]
  
  # Sort detections by transmitter id and then by detection timestamp
  data.table::setkey(dtc, animal_id, detection_timestamp_utc)

  # save original dataset to combine with interpolated data in the end
  det <- data.table::copy(dtc)
  data.table::setnames(det, c("animal_id", "bin_stamp", "i_lat", "i_lon", 
                              "record_type", "num_rows"))
  
  # remove any fish with only one detection
  dtc <- dtc[num_rows != 1]

  # error if only fish with one observation.
    if (nrow(dtc) == 0) stop("must have two observations to interpolate")

  # extract and determine start time
  t_seq <- seq(start_time, max(dtc$detection_timestamp_utc),
               int_time_stamp)

  # bin data by time interval and add bin to dtc
  dtc[, bin := t_seq[findInterval(detection_timestamp_utc, t_seq)] ]

  # make all combinations of animals and detection bins
  dtc <- dtc[data.table::CJ(bin = t_seq, animal_id = unique(animal_id)),
             on = c("bin", "animal_id")]

  data.table::setkey(dtc, animal_id, bin, detection_timestamp_utc)

  # if only need to do linear interpolation:
  if(is.null(trans) | lnl_thresh == 0){
    dtc[, bin_stamp := detection_timestamp_utc][is.na(detection_timestamp_utc), 
                                                bin_stamp := bin]

    dtc[, i_lat := approx(detection_timestamp_utc, deploy_lat,
                          xout = bin_stamp)$y, by = animal_id]
    dtc[, i_lon := approx(detection_timestamp_utc, deploy_long,
                          xout = bin_stamp)$y, by = animal_id]

    dtc[is.na(deploy_long), record_type := "interpolated"]
    dtc <- dtc[, c("animal_id", "bin_stamp", "i_lat", "i_lon", "record_type")]
    det <- det[num_rows == 1, c("animal_id", "bin_stamp", "i_lat", "i_lon",
                                "record_type")]
    out <- data.table::rbindlist(list(dtc, det), use.names = TRUE)
    data.table::setkey(out, animal_id, bin_stamp)
    out[, bin_stamp := t_seq[findInterval(bin_stamp, t_seq)] ]
    out <- na.omit(out, cols = "i_lat")
    data.table::setnames(out, c("animal_id", "bin_timestamp", "latitude", 
                                "longitude", "record_type"))
    out <- unique(out)
    out <- data.table::setorder(out, animal_id, bin_timestamp, -record_type)

    # If out_class == NULL, then return data as data.table
    if(is.null(out_class)){ out <- as.data.frame(out)
    return(out)
    }
    
    # if out_class == "tibble", then return tibble object
    if(out_class == "tibble"){ out <- tibble::as_tibble(out)
    return(out)}
    
    # if out_class == NULL, then return data.frame object
    return(out)
  }

  # routine for combined nln and ln interpolation
  
  # identify start and end rows for observations before and after NA
  ends <- dtc[!is.na(deploy_lat), .(start = .I[-nrow(.SD)], end = .I[-1]),
              by = animal_id][end - start > 1]

  # identify observations that are both start and ends
  dups <-  c(ends$start, ends$end)[ ends[, duplicated(c(start, end))]]

  # create and append duplicate rows for observations
  # that are both start and end.
  # This is so each observation can be in only one group

  # identifies rows and duplicate rows that need duplicated
  dtc[, c("rep", "num") := list(1L, 1:.N)][dups, rep := 2L]
  dtc <- dtc[rep(num, rep)]
  dtc[, rep := NULL]
  dtc[, num := NULL]

  # recalculate first and last rows- no duplicate rows this time...
  new_ends <- dtc[!is.na(deploy_lat), .(start = .I[-nrow(.SD)], end = .I[-1]),
                  by = animal_id][end - start > 1]

  # create row index
  dtc[, start_dtc := 1:.N]

  # extract rows that need interpolated
  dtc <- dtc[new_ends, .(animal_id = x.animal_id,
                         detection_timestamp_utc = x.detection_timestamp_utc,
                         deploy_lat = x.deploy_lat, deploy_long = x.deploy_long,
                         record_type = x.record_type, num_rows = x.num_rows,
                         bin = x.bin, i.start = start),
              on = .(start_dtc >= start, start_dtc <= end)]

  # calculate great circle distance between coords
  dtc[, gcd := geosphere::distHaversine(as.matrix(
    .SD[1, c("deploy_long", "deploy_lat")]),
    as.matrix(.SD[.N, c("deploy_long", "deploy_lat")])), by = i.start]

  # calculate least cost (non-linear) distance between points
  message("Calculating least-cost (non-linear) distances... (step 1 of 3)")

  grpn = data.table::uniqueN(dtc$i.start)

  if(show_progress) pb <- txtProgressBar(min = 0, max = grpn, style = 3) 
  
   dtc[, lcd := {if(show_progress) setTxtProgressBar(pb, value = .GRP);
     gdistance::costDistance(trans, fromCoords = as.matrix(
    .SD[1, c("deploy_long", "deploy_lat")]),
    toCoords = as.matrix(.SD[.N, c("deploy_long", "deploy_lat")]))},
    by = i.start]
  
  # calculate ratio of gcd:lcd
  dtc[, crit := gcd / lcd]

  # create keys for lookup
  dtc[!is.na(detection_timestamp_utc),
      t_lat := data.table::shift(deploy_lat, type = "lead"), by = i.start]
  dtc[!is.na(detection_timestamp_utc),
      t_lon := data.table::shift(deploy_long, type = "lead"), by = i.start]
  dtc[!is.na(detection_timestamp_utc),
      t_timestamp := data.table::shift(detection_timestamp_utc, type = "lead"),
      by = i.start]

  # extract rows that need non-linear interpolation
  # based on gcd:lcd distance
  nln <- dtc[crit < lnl_thresh ]

  land_chk <- dtc[is.infinite(lcd)][!is.na(deploy_lat),
                                    c("deploy_lat", "deploy_long")]

  # stop execution and display offending receivers if any receivers are on land.

  capture <- function(x)paste(capture.output(print(x)), collapse = "\n")
  
  if (nrow(land_chk) > 0) {stop("Some coordinates are on land or beyond extent.
    Interpolation impossible! Check receiver locations or extents of transition
    layer:\n", capture(as.data.table(land_chk)), call. = FALSE)
  }

  # extract data for linear interpolation
  # check to make sure that all points to be interpolated
  # are within the tranition layer is needed before any interpolation.

  ln <- dtc[crit >= lnl_thresh | is.nan(crit) ]
  if (nrow(ln) == 0){
    ln <- data.table::data.table(animal_id = character(), i_lat = numeric(),
                     i_lon = numeric(),
                     bin_stamp = as.POSIXct(character()),
                     record_type = character())
    } else {

    message("Starting linear interpolation... (step 2 of 3)")
    # linear interpolation
      grpn = uniqueN(ln$i.start)
      if(show_progress) pb <- txtProgressBar(min = 0, max = grpn, style = 3)
      ln[, bin_stamp := detection_timestamp_utc][is.na(detection_timestamp_utc),
                                                 bin_stamp := bin]
      ln[, i_lat := {if(show_progress) setTxtProgressBar(pb, .GRP);
                              tmp = .SD[c(1, .N),
                               c("detection_timestamp_utc", "deploy_lat")];
                               approx(c(tmp$detection_timestamp_utc),
                                      c(tmp$deploy_lat),
                                      xout = c(bin_stamp))$y}, by = i.start]
      ln[, i_lon := {tmp = .SD[c(1, .N),
                               c("detection_timestamp_utc", "deploy_long")];
                               approx(c(tmp$detection_timestamp_utc),
                                      c(tmp$deploy_long), 
                                      xout = c(bin_stamp))$y},
         by = i.start]
      ln[is.na(deploy_long), record_type := "interpolated"]
    }
  
  # extract records to lookup
  nln_small <- nln[ !is.na(detection_timestamp_utc)][!is.na(t_lat)]
  
  if(nrow(nln_small) == 0){
    nln <- data.table(animal_id = character(), i_lat = numeric(),
                      i_lon = numeric(),
                      bin_stamp = as.POSIXct(character()),
                      record_type = character())
  } else {
    # nln interpolation
    # create lookup table
    data.table::setkey(nln_small, deploy_lat, deploy_long, t_lat, t_lon)
    lookup <- unique(nln_small[, .(deploy_lat, deploy_long, t_lat, t_lon),
                               allow.cartesian = TRUE])

    message("\nStarting non-linear interpolation... (step 3 of 3)")
    grpn <- nrow(lookup)
    if(show_progress) pb <- txtProgressBar(min = 0, max = grpn, style = 3)
    # calculate non-linear interpolation for all unique movements in lookup
    lookup[, coord := { if(show_progress) setTxtProgressBar(pb, value = .GRP);
              sp::coordinates(
              gdistance::shortestPath(trans, as.matrix(
              .SD[1, c("deploy_long", "deploy_lat")]), as.matrix(
                .SD[1, c("t_lon", "t_lat")]), output = "SpatialLines"))},
      by = 1:nrow(lookup)]

    message("\nFinalizing results.")
    
    lookup[, grp := 1:.N]

    # extract interpolated points from coordinate lists...
    res <- lookup[, .(nln_longitude = lookup$coord[[.I]][, 1],
                      nln_latitude = lookup$coord[[.I]][, 2]), by = grp]

    # set keys, join interpolation and original data
    data.table::setkey(lookup, grp)
    data.table::setkey(res, grp)
    lookup <- lookup[res]
    lookup[, coord := NULL]

    # added first/last rows, number sequence for groups
    lookup[lookup[, .I[1], by = grp]$V1, nln_longitude := deploy_long]
    lookup[lookup[, .I[.N], by = grp]$V1, nln_longitude := t_lon]
    lookup[lookup[, .I[1], by = grp]$V1, nln_latitude := deploy_lat]
    lookup[lookup[, .I[.N], by = grp]$V1, nln_latitude := t_lat]
    lookup[,seq_count := 1:.N, by = grp]

    # lookup interpolated values for original dataset
    data.table::setkey(lookup, deploy_lat, deploy_long, t_lat, t_lon)
    nln_small <- lookup[nln_small, allow.cartesian = TRUE]
    data.table::setkey(nln_small, i.start, seq_count)

    # add timeseries for interpolating nln movements
    nln_small[nln_small[, .I[1], by = i.start]$V1,
              i_time := detection_timestamp_utc]
    nln_small[nln_small[, .I[.N], by = i.start]$V1, i_time := t_timestamp]

arch <- nln_small

# nln_small <- nln_small[i.start == 163]

      nln_small[, latitude_lead := data.table::shift(nln_latitude, type = "lag", fill = NA), by = i.start]
      nln_small[, longitude_lead := data.table::shift(nln_longitude, type = "lag", fill = NA), by = i.start]

      nln_small[, cumdist :=  geosphere::distGeo(.SD[, c("nln_longitude", "nln_latitude")],
                                                 .SD[,c("longitude_lead", "latitude_lead")]), by = i.start]

      nln_small[is.na(cumdist), cumdist := 0]
      nln_small[, cumdist := cumsum(cumdist), by = i.start]

      nln_small[, latitude_lead := NULL][, longitude_lead := NULL]
      
    # calculate cumdist
    ## nln_small[, cumdist := cumsum(c(0, sqrt(diff(nln_longitude) ^ 2 +
    ##                                         diff(nln_latitude) ^ 2))),
    ##           by = i.start]

    # interpolate missing timestamps for interpolated coordinates
    nln_small[, i_time := as.POSIXct(approx(cumdist, i_time, xout = cumdist)$y,
                                     origin = "1970-01-01 00:00:00",
                                     tz = attr(nln_small$i_time, "tzone")),
              by = i.start]

    # create timestamp vector to interpolate on.
    nln[, bin_stamp := detection_timestamp_utc]
    nln[is.na(detection_timestamp_utc), bin_stamp := bin]
    nln[, grp := i.start]

    # interpolate timestamps
    data.table::setkey(nln_small, i.start)
    data.table::setkey(nln, i.start)
    nln[, i_lat := {tmp = nln_small[.(.SD[1, "i.start"]),
                                    c("i_time", "nln_latitude")];
                                    approx(tmp$i_time, tmp$nln_latitude,
                                           xout = bin_stamp)$y}, by = grp]

    nln[, i_lon := {tmp = nln_small[.(.SD[1, "i.start"]),
                                    c("i_time", "nln_longitude")];
                                    approx(tmp$i_time, tmp$nln_longitude,
                                           xout = bin_stamp)$y}, by = grp]

    nln[is.na(deploy_long), record_type := "interpolated"]
  }

  # combine into a single data.table
  out <- data.table::rbindlist(list(ln[record_type == "interpolated",
                  c("animal_id", "bin_stamp", "i_lat", "i_lon", "record_type")],
               nln[record_type == "interpolated",
                   c("animal_id", "bin_stamp", "i_lat", "i_lon", 
                     "record_type")],
               det[, c("animal_id", "bin_stamp", "i_lat", "i_lon", 
                       "record_type")]), use.names = TRUE)

  out[, !c("animal_id")]
  data.table::setkey(out, animal_id, bin_stamp)
  out[, bin_stamp := t_seq[findInterval(bin_stamp, t_seq)] ]
  data.table::setnames(out, c("animal_id", "bin_timestamp", "latitude", 
                              "longitude", "record_type"))
  out <- na.omit(out, cols = "latitude")
  out <- unique(out)
  data.table::setorder(out, animal_id, bin_timestamp, -record_type)

  # If out_class == NULL, then return data as data.table
  if(is.null(out_class)){ out <- as.data.frame(out)
    return(out)
  }

  # if out_class == "tibble", then return tibble object
  if(out_class == "tibble"){ out <- tibble::as_tibble(out)
    return(out)}

  # if out_class == NULL, then return data.frame object
  return(out)
}
 
jsta/glatos documentation built on July 11, 2022, 7:01 a.m.