R/filt.R

Defines functions filt

Documented in filt

#' Delineating movement trips for central-place foraging species
#'
#' This function was developped to conveniently delineate trips from raw GPS data for
#' central-place foraging species. It also includes various "filtering" options to segment trips.
#'
#' @param pathF path leading to your tracks
#' @param pathM path leading to your metadata file
#' @param metname a character string corresponding to the metadata file's name
#' @param nbn character. Vector of "names" matching your raw GPS files (i.e. which are linked by the pathF argument). See details for more details
#' @param timezone time zone in which data are recorded
#' @param gpst a character string corresponding to the metadata's column name containing the GPS type
#' @param ddep a character string corresponding to the metadata's column name containing the date of departure
#' @param drecap a character string corresponding to the metadata's column name containing the date of retrieval
#' @param colony a character string corresponding to the metadata's column name containing individual colony attribution
#' @param year a character string corresponding to the metadata's column name containing the year of sampling
#' @param ring a character string corresponding to the metadata's column name containing the individual ring number
#' @param tdep a character string corresponding to the metadata's column name containing the time of departure
#' @param trecap a character string corresponding to the metadata's column name containing the time of retrieval
#' @param speedTresh numeric treshold used as speed cutoff for speed filtering (km/h)
#' @param FIX a character string corresponding to the metadata's column name containing the numeric interval separating two
#' locations in the GPS track
#' @param FixInt numeric interval that should separate two locations in GPS tracks for interpolation (minutes; e.g. 2, 10)
#' @param BuffColony numeric value indicating the buffer radius length around the colony (km)
#' @param MinTripDur numeric value used for filtering trip by minimum trip duration (minutes)
#' @param Complete logical; if TRUE, only complete trip will be kept
#' @param Interpolate logical; if TRUE, tracks are interpolated
#' @param filtNA numeric value which correspond of the filtering treshold for number of NAs generated along the
#' interpolation. Between 0 and 1, whereas 1 correspond to no filtration at all (filtering only trips that are 100% full of NAs)
#' @param splt logical; if TRUE, the final trips are ordered as a list instead of an unique data frame. Default FALSE
#' @param metINFO character string(s). Used as a vector of name for the additional information you want to add to the final
#' data frame for facilitating the subsequent analyses. However, names should fit the column name occuring in the metadata file (i.e. *pathM*)
#' @details Raw GPS data (located via *pathF*) should be as .csv format. This version only include 4 types of GPS format 1) Catlog, 2)
#' IGotU, 3) PathTracks and 4) Ecotone. If your file doesn't have any specific format, the *gpst* could be specified as IGotU (one line header,
#' and the raw data in the second lines. However, one should make sure that the *Latitude*, *Longitude*, *Date* and *Time* are specified.
#' *Date* could be supplied as character, as YYYY-MM-DD or MM/DD/YYYY and *Time* also as character as HH:MM:SS). Raw GPS data are expected to be in lat long coordinate system
#' Distance between points are calculated via the Great Circle distance, computed via *sp* package
#' @return the function return either a data frame (splt = FALSE) or a list (splt = TRUE) containing each bird trip as component
#' Metrics for each trip are also computed, as;
#'
#' - track_ID: is the unique track ID, should be named following each GPS raw file
#' - nbNA: indicating if the relative point is precise or if is was NA after the interpolation process
#' - ColonyDist: is the distance (km) separating the relative point and the colony
#' - PointDist: is the distance (km) separating two consecutive points (zi - zi-1)
#' - tripID: trip number in the trackID group
#' - birdTrip: unique bird trip identifyer
#' - TripLength: Total time length of the trip (min)
#' - TripDist: Cumulative distance flew by the bird for the specific trip
#' - Speed: Speed between t-1 and t in m/s
#' - nPoints: number of points composing the trip
#' - maxDist: maximum distance that the bird went in its trip, relative to the colony
#' - propNA: proportion of NAs in the trip. Convenient for filtering for reliability of the statistics
#' - BegPoint: colony or trip. Specify if the first point was at the colony or outside (i.e. trip). Convenient for filtering for completness
#' - EndPoint: colony or trip. Specify if the last point was at the colony or outside (i.e. trip). Convenient for filtering for completness
#'
#' @keywords trips movement central-place forager cpf delineating colony
#' @examples
#' \dontrun{
#' pathF <- c("C:/Users/User/files/allspecies/")
#' pathM <- c("C:/Users/User/files/metadata/")
#' metname <- c("meta.csv")
#' timezone <- c("GMT")
#'
#' f <- filt(pathF, pathM, metname, nbn = c("year", "colony", "ring", "recapture"),
#' timezone, speedTresh = 90, gpst = "GPSType",
#' ddep = "deployment", drecap = "recapture", colony = "colony", year = "year",
#' ring = "ring", FIX = "FIX", tdep = "utc_deployment", trecap = "utc_retrieval",
#' Clong = "Clongitude", Clat = "Clatitude", BuffColony = 0.5, MinTripDur = 30, Complete = T, FixInt = 2,
#' Interpolate = T, filtNA = 0.9, metINFO = c("ring", "year", "species", "colony"), splt = F)
#' }
#' @references
#' - Freitas, C., Lydersen, C., Ims, R.A., Fedak, M.A. and Kovacs, K.M. (2008) A simple new algorithm to filter marine mammal Argos locations Marine Mammal Science 24:315-325.
#' - McConnell, B.J., Chambers, C. and Fedak, M.A. (1992) Foraging ecology of southern elephant seals in relation to the bathymetry and productivity of the Southern Ocean. Antarctic Science 4:393-398.
#' @export

filt <- function(pathF = ..., pathM = ..., metname = NULL, nbn = c("NULL"), gpst = NULL, ddep = NULL, drecap = NULL,
                 colony = NULL, year = NULL, ring = NULL, tdep = NULL, trecap = NULL, timezone = NULL,
                 Clong = NULL, Clat = NULL, speedTresh = NULL, FIX = NULL, FixInt = NULL, BuffColony = NULL, MinTripDur = NULL,
                 Complete = FALSE, Interpolate = FALSE, filtNA = 1, metINFO = c(NULL), splt = TRUE) {

  if (class(metname) != "character")
    stop("metname should be a character")
  if (class(nbn) != "character")
    stop("nbn should be a (vector of) character")
  if (class(gpst) != "character")
    stop("gpst should be a character")
  if (class(ddep) != "character")
    stop("ddep should be a character")
  if (class(drecap) != "character")
    stop("drecap should be a character")
  if (class(colony) != "character")
    stop("colony should be a character")
  if (class(year) != "character")
    stop("year should be a character")
  if (class(ring) != "character")
    stop("ring should be a character")
  if (class(tdep) != "character")
    stop("tdep should be a character")
  if (class(trecap) != "character")
    stop("trecap should be a character")
  if (class(timezone) != "character")
    stop("the timezone should be a character")
  if (class(FIX) != "character")
    stop("the FIX should be a character")
  if (class(Clong) != "character")
    stop("the Clong should be a character")
  if (class(Clat) != "character")
    stop("the Clat should be a character")
  if (!is.null(speedTresh) & class(speedTresh) != "numeric")
    stop("the speed treshold should be numeric")
  if (!is.null(FixInt) & class(FixInt) != "numeric")
    stop("the time interval between successive fixes should be numeric")
  if (!is.null(BuffColony) & class(BuffColony) != "numeric")
    stop("the buffer size should be numeric")
  if (!is.null(MinTripDur) & class(MinTripDur) != "numeric")
    stop("the minimum time duration (min) that a trip should have should be numeric")
  if (Complete != TRUE & Complete != FALSE)
    stop("Logical: Completness function should be approved (TRUE) or delcine (FALSE)")
  if (!is.null(FixInt) & class(FixInt) != "numeric")
    stop("the time interval between successive fixes should be numeric")
  if (class(filtNA) != "numeric" | c(filtNA < 0 | filtNA > 1))
    stop("filtering NAs induced by interpolation needs a proportion as treshold; 0-1")
  if (!is.null(metINFO) & class(metINFO) != "character")
    stop("the metINFO should be a vector of character strings")

  ## Importing file's names
  Filext <- ".csv"
  file.name <- list.files(pathF)[grep(Filext,list.files(pathF))]; rm(Filext)
  files   <- length(file.name)

  ## Importing metadata; need to remove path to pathM, no permission is denied
  metafile <- read.csv(paste(pathM, metname, sep=""), sep = ",", header=T, na.strings=c("","NA"))

  ## define from when the tracks should begin and end: when logger was deployed/retrieved
  metafile$start <- as.POSIXct(strptime(paste (as.character(metafile[[ddep]]),
                     as.character(metafile[[tdep]])),"%Y-%m-%d %H:%M", tz=timezone), timezone)
  metafile$end   <- as.POSIXct(strptime(paste (as.character(metafile[[drecap]]),
                     as.character(metafile[[trecap]])),"%Y-%m-%d %H:%M", tz=timezone), timezone)

  ## create a track ID synonymous with how the files are named:
  dfn <- list()
  for (m in 1:length(unique(nbn))){
    tmpname <- nbn[m]
    dfn[[m]] <- paste(metafile[[tmpname]])
  }

  metafile$ID <- do.call(paste, c(as.data.frame(do.call(cbind, dfn))[, c(1:ncol(as.data.frame(do.call(cbind, dfn))))], sep="_"))

  Date.diff <-  metafile$end - metafile$start; if( length(metafile$ID[which(Date.diff <= 0)]) != 0 )
      stop('Date of retrieval should be later than date of deployment')

  allbirds.list <- list()

    for (i in 1:files) { ## Loop delineating trips on each file i.e. GPS deployement

  ## Partitionning between two types of GPS-based file
  ## Need to have a column specifying GPS type in the metadata file
  GPSType <- metafile[, gpst][which(metafile$ID == gsub(".csv", "", file.name[i]))]
  if(identical(GPSType, character(0))) {stop("You need to have a valid GPS type. Referring to gpst")}

  ## This line remove the 6 first unecassary lines in the CHIP-PATCH GPS Type
  ## The 1GEN is read as it is
  if(GPSType != "Catlog" & GPSType != "IGotU" & GPSType != "Pathtrack" & GPSType != "Ecotone") {stop("the GPS type should be either Catlog, IGotU, Ecotone or Pathtrack")}
  if(GPSType == "Catlog") { bird <- read.csv(paste(pathF, file.name[i], sep=""), sep = ",", skip = 6) }
  if(GPSType == "IGotU"){ bird <- read.csv(paste(pathF, file.name[i], sep=""), sep = ",") }
  if(GPSType == "Ecotone"){ bird <- read.csv(paste(pathF, file.name[i], sep=""), sep = ",")
    bird$Date <- as.Date(with(bird, paste(Year, Month, Day, sep="/")), "%Y/%m/%d")
    bird$Time <- sprintf("%s:%s:%s", bird$Hour, bird$Minute, bird$Second)
    bird <- subset(bird, !is.na(bird$Latitude) & !is.na(bird$Longitude))}
  if(GPSType == "Pathtrack"){
    bird <- read.csv(paste(pathF, file.name[i], sep=""), sep = ",", skip =5)
    colnames(bird) <- c("Day", "Month", "Year", "Hour", "Minute", "Second", "SecondDay",
                        "NumberSats", "Latitude", "Longitude", "Altitude", "ClockOff",
                        "Accuracy", "BatLevel")
    bird$Year <- bird$Year + 2000
    bird$Date <- as.Date(with(bird, paste(Year, Month, Day, sep="/")), "%Y/%m/%d")
    bird$Time <- sprintf("%s:%s:%s", bird$Hour, bird$Minute, bird$Second)
    bird <- subset(bird, bird$Latitude > 0 & bird$Longitude > 0)}

  ## This assume to have a meaningful file.name, will be used thereafter as trackID
  bird$trackID <- unlist(strsplit(file.name[i], "[.]"))[1]

  ## extract start and end times for track, needs to be linked to a metadata file
  ## should implement an object, for specifying start, end & timezone
  start <- metafile$start[which(metafile$ID == gsub(".csv", "", file.name[i]))]
  end <- metafile$end[which(metafile$ID == gsub(".csv", "", file.name[i]))]
  int <- lubridate::interval(start, end, tzone = timezone)

  ## Extract colony corrdinates and building of a data.frame
  ## Will be used later in the script for colony-location distance
  CLong <- metafile[[Clong]][which(metafile$ID == gsub(".csv", "", file.name[i]))]
  CLat <- metafile[[Clat]][which(metafile$ID == gsub(".csv", "", file.name[i]))]
  CLL <- cbind(CLong, CLat)

  ## make sure date in the right format - ymd; creating the time object, then ordering since
  ## sometimes, data are not in chronological order
  mdy <- lubridate::mdy(bird$Date, quiet = TRUE)
  ymd <- lubridate::ymd(bird$Date, quiet = TRUE)
  mdy[is.na(mdy)] <- ymd[!is.na(ymd)] # give the working one precedence
  bird$Date <- format(mdy, "%Y/%m/%d")
  bird$Time <- chron::chron(times=bird$Time)
  bird$datetime <- strptime(paste(gsub("/", "-", bird$Date), bird$Time), format = "%Y-%m-%d %H:%M:%S", tz = "GMT")
  bird <- bird[order(bird$datetime , decreasing = FALSE ),] ## assuring the chronological order of fixes

  ## now subset to points within this time start/end window
  bird <- bird[bird$datetime >= start & bird$datetime <= end, ]

  ## overwrite device speed data, in m/s
  # trip.matrix <- data.matrix(bird[,c("Longitude","Latitude")], rownames.force = NA) #creates two column matrix of lat and long for trip trackDistance function
  # between.point.distances <- trip::trackDistance(trip.matrix, longlat = TRUE) #calculate distance in km between each GPS point, into vector
  # bird$PointDist <- c(0, between.point.distances) #dist in km
  # bird$TimeElapsed <- 0 #create empty column to fill with for loop
  #
  # for (k in 2:NROW(bird)){
  #     bird$TimeElapsed[k] <- difftime(bird$datetime[k], bird$datetime[k-1], units = "secs") #time diff in secs
  #   }
  #
  # bird$Speed <- (bird$PointDist * 1000)/bird$TimeElapsed ## m/s
  #
  # if (!is.null(speedTresh)) {
  #     bird <- subset(bird, bird$Speed < speedTresh)
  #   }

  ## Extracting GPS interval, determined from the metadata
  Sres <- (metafile[, FIX][which(metafile$ID == gsub(".csv","",file.name[i]))])*60
  Mres <- (metafile[, FIX][which(metafile$ID == gsub(".csv","",file.name[i]))])

  if(Interpolate == TRUE) {

    refda <- min(bird$datetime)
    Nbird <- adehabitatLT::as.ltraj(xy = data.frame(bird$Longitude, bird$Latitude), date = as.POSIXct(bird$datetime), id = bird$trackID)
    Nbird[[1]]$sf <- suppressWarnings(trip::sda(x=trip::as.trip(Nbird), smax= speedTresh))
    Nbird <- adehabitatLT::ld(Nbird)
    Nbird <- subset(Nbird, Nbird$sf == TRUE)
    Nbird <- adehabitatLT::as.ltraj(xy = data.frame(Nbird$x, Nbird$y), date = as.POSIXct(Nbird$date), id = Nbird$id)

    wost_NA <- adehabitatLT::setNA(Nbird,refda,Mres,units="min")
    wost_demo <- adehabitatLT::sett0(wost_NA,refda,Mres,units="min")
    Nbird <- adehabitatLT::redisltraj(na.omit(wost_demo), u = Sres, type = "time")
    Nbird[[1]]$nbNA <- ifelse(Nbird[[1]]$x %in% wost_demo[[1]]$x, 0, 1)

    tmpN <- adehabitatLT::ld(Nbird)
    Nbird <- adehabitatLT::as.ltraj(xy = data.frame(tmpN$x, tmpN$y), date = as.POSIXct(tmpN$date), id = tmpN$id, infolocs = tmpN[, c(11, 12)])

    if(!is.null(FixInt)) {
     if(nrow(Nbird[[1]]) > 1){ ### Standardized as matter of time treshold
      if(Mres != FixInt) {
       if( FixInt < max(metafile$FIX, na.rm = TRUE)  ) stop("your selected treshold for ", print(bird$trackID[1]), " is smaller than the greater interval in your sampled tracks - meaning that your are creating points")
        Nbird <- adehabitatLT::subsample(Nbird, FixInt, units = c("min"))
       }
      }
     }

    Nbird <- adehabitatLT::ld(Nbird)
    bird <- data.frame(datetime = Nbird$date, Latitude = Nbird$y, Longitude = Nbird$x, trackID = Nbird$id, nbNA = Nbird$nbNA)

    } else { bird <- bird[, c("datetime", "Latitude", "Longitude", "trackID")]; bird$nbNA <- 0 }

  ## get distance flown from the colony
  trip.matrix <- data.matrix(bird[,c("Longitude","Latitude")], rownames.force = NA) #creates two column matrix of lat and long for trip trackDistance function
  bird$ColonyDist <- sp::spDistsN1(trip.matrix, CLL, longlat = TRUE) #calculate distances between GPS points and initial GPS point in km
  between.point.distances <- trip::trackDistance(trip.matrix, longlat = TRUE) #calculate distance in km between each GPS point, into vector
  bird$PointDist <- c(0, between.point.distances) #dist in km
  bird$TimeElapsed <- 0 #create empty column to fill with for loop

  for (k in 2:NROW(bird)){
    bird$TimeElapsed[k] <- difftime(bird$datetime[k], bird$datetime[k-1], units = "secs") #time diff in secs
  }

  bird$Speed <- (bird$PointDist * 1000)/bird$TimeElapsed ## m/s

  if(is.null(BuffColony)) {
    bird$ColonyorTrip <- c("trip")
    bird$tripID <- 1
    } else {

  ## Setting buffer treshold
  bird$ColonyorTrip <- ifelse(bird$ColonyDist > BuffColony, "trip","colony") #in km

  ## set all colony points to 0
  bird$ColonyDist[which(bird$ColonyorTrip == "colony")] <- 0

  ## add trip number
  tripID <- 1
  nRows <- length(bird$ColonyorTrip) - 1
  bird$tripID <- 0

  ### this big for loop numbers all trips, including the one colony point before
  ### and after the last trip point
  if(bird$ColonyorTrip[1] == "colony" && bird$ColonyorTrip[2] == "trip") {bird$tripID[1] <- tripID}  else {bird$tripID[1] <- 0}
  if(bird$ColonyorTrip[1] == "trip") {bird$tripID[1] <- tripID}

  for (j in 2:nRows) {
    if(bird$ColonyorTrip[j] == "colony" && bird$ColonyorTrip[j] != bird$ColonyorTrip[j+1] &&
       bird$ColonyorTrip[j-1] != bird$ColonyorTrip[j+1]) {
      tripID <- tripID + 1
    } else {bird$tripID[j] <- tripID}
    if(bird$ColonyorTrip[j] == "trip" | bird$ColonyorTrip[j] != bird$ColonyorTrip[j-1]
       | bird$ColonyorTrip[j] != bird$ColonyorTrip[j+1]) {
      bird$tripID[j] <- tripID
    } else {
      bird$tripID[j] <- 0}
    if(bird$ColonyorTrip[nRows+1] == "trip") {
      bird$tripID[nRows+1] <- tripID ## set the last row correctly
     }
    }
   }

  ## Creating unique ID variable
  bird$birdTrip <- paste(bird$trackID, bird$tripID, sep = "_")

  ## Getting rid of colony points; perhaps YES or NO in the function()
  bird <- subset(bird, bird$tripID > 0)

  ## Loop estimating the number of points per trip
  p.list <- list()

  for (o in 1:length(unique(bird$tripID))) {

    test <- subset(bird, bird$tripID == unique(bird$tripID)[o])

    test$TripLength <- difftime(max(test$datetime),min(test$datetime),   #calculate time elapsed between start and end
                                units = ("min"))

    trip.matrix <- data.matrix(test[,c("Longitude","Latitude")], rownames.force = NA) #creates two column matrix of lat and long for trip trackDistance function
    distbp <- trip::trackDistance(trip.matrix, longlat = TRUE) #calculates distance between each GPS point, into vector
    TDist <- sum(distbp) ## Total distance travelled per trip (m)
    test$TripDist <- TDist

    tripDurs <- as.data.frame(table(test$birdTrip))
    test$nPoints <- tripDurs$Freq[match(test$birdTrip, tripDurs$Var1)] ## Nb of points per trips
    test$maxDist <- max(test$ColonyDist, na.rm = TRUE) ## Calculating maximum distance per trip

    test$propNA <- sum(test$nbNA)/length(test$nbNA) ## Proportion of interpolated NAs for each trip

    ## Calculation of the proportion of points over land
    ## Would require a appropriate shapefile for the calculation
    #pts <- SpatialPointsDataFrame(test[,c("Longitude", "Latitude")], test,proj4string=crdref)
    #pts$Land <- !is.na(over(pts, as(s, "SpatialPolygons")))
    #pts$Prop.Land <- mean(pts$Land)
    #test <- as.data.frame(pts)

    p.list[[o]] <- test
    rm(test)

  }

  alltrips <- do.call(rbind, p.list)
  rm(p.list)

  notime <- alltrips[, c("birdTrip", "ColonyDist","ColonyorTrip")]

  trip.Bincomplete <- aggregate(notime, by=list(notime$birdTrip), FUN = function(x) { head(x,1) }) ## gets the first point for each trip
  trip.Eincomplete <- aggregate(notime, by=list(notime$birdTrip), FUN = function(x) { tail(x,1) }) ## gets the last point for each trip

  alltrips$BegPoint <- trip.Bincomplete[, 4][match(alltrips$birdTrip, trip.Bincomplete$birdTrip)]
  alltrips$EndPoint <- trip.Eincomplete[, 4][match(alltrips$birdTrip, trip.Eincomplete$birdTrip)]

      if ( Complete == TRUE ) { # get rid of incomplete trips
  alltrips <- subset(alltrips, alltrips$BegPoint == "colony")
  alltrips <- subset(alltrips, alltrips$EndPoint == "colony")
    }

      if (!is.null(MinTripDur)) {
  alltrips <- subset(alltrips, alltrips$TripLength >= MinTripDur)
    }

      if (!is.null(filtNA)) {
  alltrips <- subset(alltrips, alltrips$propNA <= filtNA)
    }

  if (!is.null(metINFO)) {
  for (k in 1:length(metINFO)) {  ## this loop insert specifric colum from the metadata, convenient for further analyses
    mi <- metINFO[k]
    alltrips[, mi] <- tryCatch({metafile[, mi][match(alltrips$trackID, metafile$ID)]},
             error = function(x) {stop("column's name(s) is/are not recognised from the specified metadata file")})
      }}

      if (nrow(alltrips) > 1){
  alltrips$ColLong <- CLong
  alltrips$ColLat <- CLat
      }

  allbirds.list[[i]] <- alltrips
  rm(alltrips)
  allbirds <- do.call(rbind, allbirds.list)
  cat("GPS file = ", i, "\n")

    }

  if(is.null(BuffColony)) {
    warning("No buffer has been drawn around the colony, meaning that trips have not been delineated. Resuming in one single track for each bird", call. = F)
      }

  if(splt == TRUE) { allbirds <- split(allbirds, allbirds$birdTrip)}

  class(allbirds) <- "CPFMove"

  return(allbirds)

}
PhilBertrand/CPFMove documentation built on Nov. 22, 2020, 4:45 a.m.