R/distfilter.R
In SDLfilter: Filtering and Assessing the Sample Size of Tracking Data

#' @aliases distfilter
#' @title Filter locations by distance 
#' @description This function removes locations that are located beyond a specified distance.
#' @param sdata A data frame containing columns with the following headers: "id", "DateTime", "lat", "lon". 
#' See the data \code{\link{turtle}} for an example.
#' The function filters the input data by a unique "id" (e.g. transmitter number, identifier for each animal). 
#' "DateTime" is the GMT date & time of each location in class \code{\link[base:DateTimeClasses]{POSIXct}} or \code{\link[base]{character}} with the following format "2012-06-03 01:33:46".
#' "lat" and "lon" are the latitude and longitude of each location in decimal degrees. 
#' @param max.dist A numeric value specifying a threshold of distance between successive locations. Default is 100 km. 
#' @param method An integer specifying how locations should be filtered with \emph{max.dist}. 
#' A location is removed if the distance from a previous and(1)/or(2) to a subsequent location exceeds \emph{max.dist}. 
#' Default is 1 (both way).
#' @param ia An integer (0 to 180) specifying an inner angle (in degrees) between consecutive locations, 
#' beyond which the locations are considered potential outliers. 
#' Default (NA) ignores this option. See details. 
#' @importFrom dplyr bind_rows
#' @export
#' @details This function removes locations if the distance from a previous and/or to a subsequent location exceeds \emph{max.dist} and
#' the inner angle is less than \emph{ia}. If \emph{ia} is NA (default), inner angles are not considered in the filtering.
#' @return The input data is returned without locations identified by this filter. 
#' The following columns are added: "pDist", "sDist", 'inAng'. 
#' "pDist" and "sDist" are straight distances in kilometres from a previous and to a subsequent fix respectively.
#' "inAng" is the degree between the bearings of lines joining successive location points.
#' @author Takahiro Shimada
#' @examples
#' #### Load data sets
#' ## Fastloc GPS data obtained from a green turtle
#' data(turtle)
#' 
#' ## A Map for the example site
#' data(Australia)
#' data(SandyStrait)
#' 
#' 
#' #### Filter temporal and/or spatial duplicates
#' turtle.dup <- dupfilter(turtle, step.time=1/60, step.dist=0.001)
#'  
#' 
#' #### distfilter
#' turtle.dist <- distfilter(turtle.dup, max.dist = 50, ia = 20)
#' 
#' 
#' #### Plot data removed or retained by ddfilter
#' ## Entire area
#' p1 <- to_map(turtle.dup, bgmap=Australia, point.size = 2, line.size = 0.5, axes.lab.size = 0, 
#'             multiplot = FALSE, point.bg = "red",
#'             title.size=15, title="Entire area")[[1]] + 
#'   geom_point(aes(x=lon, y=lat), data=turtle.dist, size=2, fill="yellow", shape=21)+
#'   geom_point(aes(x=x, y=y), data=data.frame(x=c(154, 154), y=c(-22, -22.5)), 
#'              size=3, fill=c("yellow", "red"), shape=21) + 
#'   annotate("text", x=c(154.3, 154.3), y=c(-22, -22.5), label=c("Retained", "Removed"), 
#'            colour="black", size=4, hjust = 0)
#'
#' ## Zoomed in
#' p2 <- to_map(turtle.dup, bgmap=SandyStrait, xlim=c(152.7, 153.2), ylim=(c(-25.75, -25.24)), 
#'             axes.lab.size = 0, point.size = 2, point.bg = "red", line.size = 0.5, 
#'             multiplot = FALSE, title.size=15, title="Zoomed in")[[1]] + 
#' geom_path(aes(x=lon, y=lat), data=turtle.dist, linewidth=0.5, colour="black", linetype=1) + 
#' geom_point(aes(x=lon, y=lat), data=turtle.dist, size=2, colour="black", shape=21, fill="yellow")
#'
#' gridExtra::marrangeGrob(list(p1, p2), nrow=1, ncol=2)


distfilter <- function (sdata, max.dist = 100, method = 1, ia = NA){
  
  ## Original sample size
  OriginalSS <- nrow(sdata)
  
  ## qi format
  sdata <- within(sdata, {
    qi[qi %in% "A"] <- "-1"
    qi[qi %in% "B"] <- "-2"
    qi[qi %in% "Z"] <- "-3"
    qi <- as.numeric(as.character(qi))
  })
  
  ## Date & time
  sdata$DateTime <- with(sdata, as.POSIXct(DateTime, format = "%Y-%m-%d %H:%M:%S", tz = "GMT"))
  
  
  max.distance <- function(sdata = sdata, max.dist = max.dist, method = method, ia = ia){
    #### Exclude data with less than 4 locations
    ndata <- table(sdata$id)
    id.exclude <- names(ndata[as.numeric(ndata) < 4])
    excluded.data <- sdata[sdata$id %in% id.exclude,]
    sdata <- sdata[!(sdata$id %in% id.exclude),]
    
    if(nrow(sdata) > 0){
      #### Organize data
      ## Sort data in alphabetical and chronological order
      sdata <- with(sdata, sdata[order(id, DateTime),])
      row.names(sdata) <- 1:nrow(sdata)
      
      
      ## Get Id of each animal
      IDs <- levels(factor(sdata$id))
      
      
      ## Function to identify location to remove: (0 = remove, 1 = keep)
      if(is.na(ia)){
        
        ## Get movement parameters
        sdata <- track_param(sdata, param = 'distance')
        
        ## function
        if(method == 1){
          overMax<-function(i)
            if(sdata$pDist[i] > max.dist && sdata$sDist[i] > max.dist && (!is.na(sdata$pDist[i])) && (!is.na(sdata$sDist[i]))){
              0
            } else {
              1
            }
        } else if (method == 2) {
          overMax<-function(i)
            if((sdata$pDist[i] > max.dist | sdata$sDist[i] > max.dist) && (!is.na(sdata$pDist[i])) && (!is.na(sdata$sDist[i]))){
              0
            } else {
              1
            }
        }
        
      } else {
        
        ## Get movement parameters
        sdata <- track_param(sdata, param = c('distance', 'angle'))
        
        ## function
        if(method == 1){
          overMax<-function(i)
            if(sdata$pDist[i] > max.dist && sdata$sDist[i] > max.dist && sdata$inAng[i] < ia && 
               (!is.na(sdata$pDist[i])) && (!is.na(sdata$sDist[i])) && (!is.na(sdata$inAng[i]))){
              0
            } else {
              1
            }
        } else if (method == 2) {
          overMax <- function(i)
            if((sdata$pDist[i] > max.dist | sdata$sDist[i] > max.dist) && sdata$inAng[i] < ia && 
               (!is.na(sdata$pDist[i])) && (!is.na(sdata$sDist[i])) && (!is.na(sdata$inAng[i]))){
              0
            } else {
              1
            }
        }
      }
      
      
      ## Apply the above function to each data set separately
      set.rm <- function(j){
        start <- as.numeric(rownames(sdata[sdata$id %in% j,][2,]))
        end <- as.numeric(rownames(sdata[sdata$id %in% j,][1,])) + (nrow(sdata[sdata$id %in% j,])-2)
        rm <- unlist(lapply(start:end, overMax))
        c(1, rm, 1)
      }
      
      sdata$overMax <- unlist(lapply(IDs, set.rm))
      
      sdata <- sdata[sdata$overMax == 1,]
    }

    
    #### Bring back excluded data
    if(nrow(excluded.data) > 0){
      sdata <- dplyr::bind_rows(sdata, excluded.data)
    } else {
      sdata <- sdata
    }
  }
  
  
  
  # Repeat the above function until no locations can be removed by this filter.
  sdata2 <- max.distance(sdata = sdata, max.dist = max.dist, method = method, ia = ia)
  sdata3 <- max.distance(sdata = sdata2, max.dist = max.dist, method = method, ia = ia)
  while(!(nrow(sdata2) == nrow(sdata3)))
  {
    sdata3 <- max.distance(sdata = sdata2, max.dist = max.dist, method = method, ia = ia)
    sdata2 <- max.distance(sdata = sdata3, max.dist = max.dist, method = method, ia = ia)
  }
  
  
  #### Report the summary of filtering
  ## Data excluded from filtering
  ndata <- table(as.character(sdata$id))
  id.exclude <- names(ndata[as.numeric(ndata) < 4])
  
  ## Filtered data
  FilteredSS <- nrow(sdata3)
  RemovedSamplesN <- OriginalSS - FilteredSS
  if(RemovedSamplesN > 0){
    RemovedSamplesP <- round((1-(FilteredSS/OriginalSS))*100,2)
  } else {
    RemovedSamplesP <- 0
  }
  
  ## Print report
  cat("\n")
  cat("Input data:", OriginalSS, "locations", fill = TRUE)
  cat("Filtered data:", FilteredSS, "locations", fill = TRUE)
  cat("distfilter removed ", RemovedSamplesN, " locations (", RemovedSamplesP, "% of original data)", sep="", fill = TRUE)
  if(length(id.exclude) > 0){
    message('Warning: insufficient data to apply distfilter to:')
    message(paste(id.exclude, collapse = ', '))
  }
  cat("\n")
  
  
  # Delete working columns and return the output
  drop.vars <- c("pTime", "sTime", "pSpeed", "sSpeed", "meanSpeed", "meanAngle", 'overMax')
  sdata3 <- sdata3[,!(names(sdata3) %in% drop.vars)] 
  return(sdata3)
}

Any scripts or data that you put into this service are public.

SDLfilter documentation built on Nov. 10, 2023, 1:11 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

SDLfilter
Filtering and Assessing the Sample Size of Tracking Data

R/distfilter.R
In SDLfilter: Filtering and Assessing the Sample Size of Tracking Data

Try the SDLfilter package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

SDLfilter Filtering and Assessing the Sample Size of Tracking Data

R/distfilter.R In SDLfilter: Filtering and Assessing the Sample Size of Tracking Data

Try the SDLfilter package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

SDLfilter
Filtering and Assessing the Sample Size of Tracking Data

R/distfilter.R
In SDLfilter: Filtering and Assessing the Sample Size of Tracking Data