#' Delineating movement trips for central-place foraging species
#'
#' This function was developped to conveniently delineate trips from raw GPS data for
#' central-place foraging species. It also includes various "filtering" options to segment trips.
#'
#' @param pathF path leading to your tracks
#' @param pathM path leading to your metadata file
#' @param metname a character string corresponding to the metadata file's name
#' @param nbn character. Vector of "names" matching your raw GPS files (i.e. which are linked by the pathF argument). See details for more details
#' @param timezone time zone in which data are recorded
#' @param gpst a character string corresponding to the metadata's column name containing the GPS type
#' @param ddep a character string corresponding to the metadata's column name containing the date of departure
#' @param drecap a character string corresponding to the metadata's column name containing the date of retrieval
#' @param colony a character string corresponding to the metadata's column name containing individual colony attribution
#' @param year a character string corresponding to the metadata's column name containing the year of sampling
#' @param ring a character string corresponding to the metadata's column name containing the individual ring number
#' @param tdep a character string corresponding to the metadata's column name containing the time of departure
#' @param trecap a character string corresponding to the metadata's column name containing the time of retrieval
#' @param speedTresh numeric treshold used as speed cutoff for speed filtering (km/h)
#' @param FIX a character string corresponding to the metadata's column name containing the numeric interval separating two
#' locations in the GPS track
#' @param FixInt numeric interval that should separate two locations in GPS tracks for interpolation (minutes; e.g. 2, 10)
#' @param BuffColony numeric value indicating the buffer radius length around the colony (km)
#' @param MinTripDur numeric value used for filtering trip by minimum trip duration (minutes)
#' @param Complete logical; if TRUE, only complete trip will be kept
#' @param Interpolate logical; if TRUE, tracks are interpolated
#' @param filtNA numeric value which correspond of the filtering treshold for number of NAs generated along the
#' interpolation. Between 0 and 1, whereas 1 correspond to no filtration at all (filtering only trips that are 100% full of NAs)
#' @param splt logical; if TRUE, the final trips are ordered as a list instead of an unique data frame. Default FALSE
#' @param metINFO character string(s). Used as a vector of name for the additional information you want to add to the final
#' data frame for facilitating the subsequent analyses. However, names should fit the column name occuring in the metadata file (i.e. *pathM*)
#' @details Raw GPS data (located via *pathF*) should be as .csv format. This version only include 4 types of GPS format 1) Catlog, 2)
#' IGotU, 3) PathTracks and 4) Ecotone. If your file doesn't have any specific format, the *gpst* could be specified as IGotU (one line header,
#' and the raw data in the second lines. However, one should make sure that the *Latitude*, *Longitude*, *Date* and *Time* are specified.
#' *Date* could be supplied as character, as YYYY-MM-DD or MM/DD/YYYY and *Time* also as character as HH:MM:SS). Raw GPS data are expected to be in lat long coordinate system
#' Distance between points are calculated via the Great Circle distance, computed via *sp* package
#' @return the function return either a data frame (splt = FALSE) or a list (splt = TRUE) containing each bird trip as component
#' Metrics for each trip are also computed, as;
#'
#' - track_ID: is the unique track ID, should be named following each GPS raw file
#' - nbNA: indicating if the relative point is precise or if is was NA after the interpolation process
#' - ColonyDist: is the distance (km) separating the relative point and the colony
#' - PointDist: is the distance (km) separating two consecutive points (zi - zi-1)
#' - tripID: trip number in the trackID group
#' - birdTrip: unique bird trip identifyer
#' - TripLength: Total time length of the trip (min)
#' - TripDist: Cumulative distance flew by the bird for the specific trip
#' - Speed: Speed between t-1 and t in m/s
#' - nPoints: number of points composing the trip
#' - maxDist: maximum distance that the bird went in its trip, relative to the colony
#' - propNA: proportion of NAs in the trip. Convenient for filtering for reliability of the statistics
#' - BegPoint: colony or trip. Specify if the first point was at the colony or outside (i.e. trip). Convenient for filtering for completness
#' - EndPoint: colony or trip. Specify if the last point was at the colony or outside (i.e. trip). Convenient for filtering for completness
#'
#' @keywords trips movement central-place forager cpf delineating colony
#' @examples
#' \dontrun{
#' pathF <- c("C:/Users/User/files/allspecies/")
#' pathM <- c("C:/Users/User/files/metadata/")
#' metname <- c("meta.csv")
#' timezone <- c("GMT")
#'
#' f <- filt(pathF, pathM, metname, nbn = c("year", "colony", "ring", "recapture"),
#' timezone, speedTresh = 90, gpst = "GPSType",
#' ddep = "deployment", drecap = "recapture", colony = "colony", year = "year",
#' ring = "ring", FIX = "FIX", tdep = "utc_deployment", trecap = "utc_retrieval",
#' Clong = "Clongitude", Clat = "Clatitude", BuffColony = 0.5, MinTripDur = 30, Complete = T, FixInt = 2,
#' Interpolate = T, filtNA = 0.9, metINFO = c("ring", "year", "species", "colony"), splt = F)
#' }
#' @references
#' - Freitas, C., Lydersen, C., Ims, R.A., Fedak, M.A. and Kovacs, K.M. (2008) A simple new algorithm to filter marine mammal Argos locations Marine Mammal Science 24:315-325.
#' - McConnell, B.J., Chambers, C. and Fedak, M.A. (1992) Foraging ecology of southern elephant seals in relation to the bathymetry and productivity of the Southern Ocean. Antarctic Science 4:393-398.
#' @export
filt <- function(pathF = ..., pathM = ..., metname = NULL, nbn = c("NULL"), gpst = NULL, ddep = NULL, drecap = NULL,
colony = NULL, year = NULL, ring = NULL, tdep = NULL, trecap = NULL, timezone = NULL,
Clong = NULL, Clat = NULL, speedTresh = NULL, FIX = NULL, FixInt = NULL, BuffColony = NULL, MinTripDur = NULL,
Complete = FALSE, Interpolate = FALSE, filtNA = 1, metINFO = c(NULL), splt = TRUE) {
if (class(metname) != "character")
stop("metname should be a character")
if (class(nbn) != "character")
stop("nbn should be a (vector of) character")
if (class(gpst) != "character")
stop("gpst should be a character")
if (class(ddep) != "character")
stop("ddep should be a character")
if (class(drecap) != "character")
stop("drecap should be a character")
if (class(colony) != "character")
stop("colony should be a character")
if (class(year) != "character")
stop("year should be a character")
if (class(ring) != "character")
stop("ring should be a character")
if (class(tdep) != "character")
stop("tdep should be a character")
if (class(trecap) != "character")
stop("trecap should be a character")
if (class(timezone) != "character")
stop("the timezone should be a character")
if (class(FIX) != "character")
stop("the FIX should be a character")
if (class(Clong) != "character")
stop("the Clong should be a character")
if (class(Clat) != "character")
stop("the Clat should be a character")
if (!is.null(speedTresh) & class(speedTresh) != "numeric")
stop("the speed treshold should be numeric")
if (!is.null(FixInt) & class(FixInt) != "numeric")
stop("the time interval between successive fixes should be numeric")
if (!is.null(BuffColony) & class(BuffColony) != "numeric")
stop("the buffer size should be numeric")
if (!is.null(MinTripDur) & class(MinTripDur) != "numeric")
stop("the minimum time duration (min) that a trip should have should be numeric")
if (Complete != TRUE & Complete != FALSE)
stop("Logical: Completness function should be approved (TRUE) or delcine (FALSE)")
if (!is.null(FixInt) & class(FixInt) != "numeric")
stop("the time interval between successive fixes should be numeric")
if (class(filtNA) != "numeric" | c(filtNA < 0 | filtNA > 1))
stop("filtering NAs induced by interpolation needs a proportion as treshold; 0-1")
if (!is.null(metINFO) & class(metINFO) != "character")
stop("the metINFO should be a vector of character strings")
## Importing file's names
Filext <- ".csv"
file.name <- list.files(pathF)[grep(Filext,list.files(pathF))]; rm(Filext)
files <- length(file.name)
## Importing metadata; need to remove path to pathM, no permission is denied
metafile <- read.csv(paste(pathM, metname, sep=""), sep = ",", header=T, na.strings=c("","NA"))
## define from when the tracks should begin and end: when logger was deployed/retrieved
metafile$start <- as.POSIXct(strptime(paste (as.character(metafile[[ddep]]),
as.character(metafile[[tdep]])),"%Y-%m-%d %H:%M", tz=timezone), timezone)
metafile$end <- as.POSIXct(strptime(paste (as.character(metafile[[drecap]]),
as.character(metafile[[trecap]])),"%Y-%m-%d %H:%M", tz=timezone), timezone)
## create a track ID synonymous with how the files are named:
dfn <- list()
for (m in 1:length(unique(nbn))){
tmpname <- nbn[m]
dfn[[m]] <- paste(metafile[[tmpname]])
}
metafile$ID <- do.call(paste, c(as.data.frame(do.call(cbind, dfn))[, c(1:ncol(as.data.frame(do.call(cbind, dfn))))], sep="_"))
Date.diff <- metafile$end - metafile$start; if( length(metafile$ID[which(Date.diff <= 0)]) != 0 )
stop('Date of retrieval should be later than date of deployment')
allbirds.list <- list()
for (i in 1:files) { ## Loop delineating trips on each file i.e. GPS deployement
## Partitionning between two types of GPS-based file
## Need to have a column specifying GPS type in the metadata file
GPSType <- metafile[, gpst][which(metafile$ID == gsub(".csv", "", file.name[i]))]
if(identical(GPSType, character(0))) {stop("You need to have a valid GPS type. Referring to gpst")}
## This line remove the 6 first unecassary lines in the CHIP-PATCH GPS Type
## The 1GEN is read as it is
if(GPSType != "Catlog" & GPSType != "IGotU" & GPSType != "Pathtrack" & GPSType != "Ecotone") {stop("the GPS type should be either Catlog, IGotU, Ecotone or Pathtrack")}
if(GPSType == "Catlog") { bird <- read.csv(paste(pathF, file.name[i], sep=""), sep = ",", skip = 6) }
if(GPSType == "IGotU"){ bird <- read.csv(paste(pathF, file.name[i], sep=""), sep = ",") }
if(GPSType == "Ecotone"){ bird <- read.csv(paste(pathF, file.name[i], sep=""), sep = ",")
bird$Date <- as.Date(with(bird, paste(Year, Month, Day, sep="/")), "%Y/%m/%d")
bird$Time <- sprintf("%s:%s:%s", bird$Hour, bird$Minute, bird$Second)
bird <- subset(bird, !is.na(bird$Latitude) & !is.na(bird$Longitude))}
if(GPSType == "Pathtrack"){
bird <- read.csv(paste(pathF, file.name[i], sep=""), sep = ",", skip =5)
colnames(bird) <- c("Day", "Month", "Year", "Hour", "Minute", "Second", "SecondDay",
"NumberSats", "Latitude", "Longitude", "Altitude", "ClockOff",
"Accuracy", "BatLevel")
bird$Year <- bird$Year + 2000
bird$Date <- as.Date(with(bird, paste(Year, Month, Day, sep="/")), "%Y/%m/%d")
bird$Time <- sprintf("%s:%s:%s", bird$Hour, bird$Minute, bird$Second)
bird <- subset(bird, bird$Latitude > 0 & bird$Longitude > 0)}
## This assume to have a meaningful file.name, will be used thereafter as trackID
bird$trackID <- unlist(strsplit(file.name[i], "[.]"))[1]
## extract start and end times for track, needs to be linked to a metadata file
## should implement an object, for specifying start, end & timezone
start <- metafile$start[which(metafile$ID == gsub(".csv", "", file.name[i]))]
end <- metafile$end[which(metafile$ID == gsub(".csv", "", file.name[i]))]
int <- lubridate::interval(start, end, tzone = timezone)
## Extract colony corrdinates and building of a data.frame
## Will be used later in the script for colony-location distance
CLong <- metafile[[Clong]][which(metafile$ID == gsub(".csv", "", file.name[i]))]
CLat <- metafile[[Clat]][which(metafile$ID == gsub(".csv", "", file.name[i]))]
CLL <- cbind(CLong, CLat)
## make sure date in the right format - ymd; creating the time object, then ordering since
## sometimes, data are not in chronological order
mdy <- lubridate::mdy(bird$Date, quiet = TRUE)
ymd <- lubridate::ymd(bird$Date, quiet = TRUE)
mdy[is.na(mdy)] <- ymd[!is.na(ymd)] # give the working one precedence
bird$Date <- format(mdy, "%Y/%m/%d")
bird$Time <- chron::chron(times=bird$Time)
bird$datetime <- strptime(paste(gsub("/", "-", bird$Date), bird$Time), format = "%Y-%m-%d %H:%M:%S", tz = "GMT")
bird <- bird[order(bird$datetime , decreasing = FALSE ),] ## assuring the chronological order of fixes
## now subset to points within this time start/end window
bird <- bird[bird$datetime >= start & bird$datetime <= end, ]
## overwrite device speed data, in m/s
# trip.matrix <- data.matrix(bird[,c("Longitude","Latitude")], rownames.force = NA) #creates two column matrix of lat and long for trip trackDistance function
# between.point.distances <- trip::trackDistance(trip.matrix, longlat = TRUE) #calculate distance in km between each GPS point, into vector
# bird$PointDist <- c(0, between.point.distances) #dist in km
# bird$TimeElapsed <- 0 #create empty column to fill with for loop
#
# for (k in 2:NROW(bird)){
# bird$TimeElapsed[k] <- difftime(bird$datetime[k], bird$datetime[k-1], units = "secs") #time diff in secs
# }
#
# bird$Speed <- (bird$PointDist * 1000)/bird$TimeElapsed ## m/s
#
# if (!is.null(speedTresh)) {
# bird <- subset(bird, bird$Speed < speedTresh)
# }
## Extracting GPS interval, determined from the metadata
Sres <- (metafile[, FIX][which(metafile$ID == gsub(".csv","",file.name[i]))])*60
Mres <- (metafile[, FIX][which(metafile$ID == gsub(".csv","",file.name[i]))])
if(Interpolate == TRUE) {
refda <- min(bird$datetime)
Nbird <- adehabitatLT::as.ltraj(xy = data.frame(bird$Longitude, bird$Latitude), date = as.POSIXct(bird$datetime), id = bird$trackID)
Nbird[[1]]$sf <- suppressWarnings(trip::sda(x=trip::as.trip(Nbird), smax= speedTresh))
Nbird <- adehabitatLT::ld(Nbird)
Nbird <- subset(Nbird, Nbird$sf == TRUE)
Nbird <- adehabitatLT::as.ltraj(xy = data.frame(Nbird$x, Nbird$y), date = as.POSIXct(Nbird$date), id = Nbird$id)
wost_NA <- adehabitatLT::setNA(Nbird,refda,Mres,units="min")
wost_demo <- adehabitatLT::sett0(wost_NA,refda,Mres,units="min")
Nbird <- adehabitatLT::redisltraj(na.omit(wost_demo), u = Sres, type = "time")
Nbird[[1]]$nbNA <- ifelse(Nbird[[1]]$x %in% wost_demo[[1]]$x, 0, 1)
tmpN <- adehabitatLT::ld(Nbird)
Nbird <- adehabitatLT::as.ltraj(xy = data.frame(tmpN$x, tmpN$y), date = as.POSIXct(tmpN$date), id = tmpN$id, infolocs = tmpN[, c(11, 12)])
if(!is.null(FixInt)) {
if(nrow(Nbird[[1]]) > 1){ ### Standardized as matter of time treshold
if(Mres != FixInt) {
if( FixInt < max(metafile$FIX, na.rm = TRUE) ) stop("your selected treshold for ", print(bird$trackID[1]), " is smaller than the greater interval in your sampled tracks - meaning that your are creating points")
Nbird <- adehabitatLT::subsample(Nbird, FixInt, units = c("min"))
}
}
}
Nbird <- adehabitatLT::ld(Nbird)
bird <- data.frame(datetime = Nbird$date, Latitude = Nbird$y, Longitude = Nbird$x, trackID = Nbird$id, nbNA = Nbird$nbNA)
} else { bird <- bird[, c("datetime", "Latitude", "Longitude", "trackID")]; bird$nbNA <- 0 }
## get distance flown from the colony
trip.matrix <- data.matrix(bird[,c("Longitude","Latitude")], rownames.force = NA) #creates two column matrix of lat and long for trip trackDistance function
bird$ColonyDist <- sp::spDistsN1(trip.matrix, CLL, longlat = TRUE) #calculate distances between GPS points and initial GPS point in km
between.point.distances <- trip::trackDistance(trip.matrix, longlat = TRUE) #calculate distance in km between each GPS point, into vector
bird$PointDist <- c(0, between.point.distances) #dist in km
bird$TimeElapsed <- 0 #create empty column to fill with for loop
for (k in 2:NROW(bird)){
bird$TimeElapsed[k] <- difftime(bird$datetime[k], bird$datetime[k-1], units = "secs") #time diff in secs
}
bird$Speed <- (bird$PointDist * 1000)/bird$TimeElapsed ## m/s
if(is.null(BuffColony)) {
bird$ColonyorTrip <- c("trip")
bird$tripID <- 1
} else {
## Setting buffer treshold
bird$ColonyorTrip <- ifelse(bird$ColonyDist > BuffColony, "trip","colony") #in km
## set all colony points to 0
bird$ColonyDist[which(bird$ColonyorTrip == "colony")] <- 0
## add trip number
tripID <- 1
nRows <- length(bird$ColonyorTrip) - 1
bird$tripID <- 0
### this big for loop numbers all trips, including the one colony point before
### and after the last trip point
if(bird$ColonyorTrip[1] == "colony" && bird$ColonyorTrip[2] == "trip") {bird$tripID[1] <- tripID} else {bird$tripID[1] <- 0}
if(bird$ColonyorTrip[1] == "trip") {bird$tripID[1] <- tripID}
for (j in 2:nRows) {
if(bird$ColonyorTrip[j] == "colony" && bird$ColonyorTrip[j] != bird$ColonyorTrip[j+1] &&
bird$ColonyorTrip[j-1] != bird$ColonyorTrip[j+1]) {
tripID <- tripID + 1
} else {bird$tripID[j] <- tripID}
if(bird$ColonyorTrip[j] == "trip" | bird$ColonyorTrip[j] != bird$ColonyorTrip[j-1]
| bird$ColonyorTrip[j] != bird$ColonyorTrip[j+1]) {
bird$tripID[j] <- tripID
} else {
bird$tripID[j] <- 0}
if(bird$ColonyorTrip[nRows+1] == "trip") {
bird$tripID[nRows+1] <- tripID ## set the last row correctly
}
}
}
## Creating unique ID variable
bird$birdTrip <- paste(bird$trackID, bird$tripID, sep = "_")
## Getting rid of colony points; perhaps YES or NO in the function()
bird <- subset(bird, bird$tripID > 0)
## Loop estimating the number of points per trip
p.list <- list()
for (o in 1:length(unique(bird$tripID))) {
test <- subset(bird, bird$tripID == unique(bird$tripID)[o])
test$TripLength <- difftime(max(test$datetime),min(test$datetime), #calculate time elapsed between start and end
units = ("min"))
trip.matrix <- data.matrix(test[,c("Longitude","Latitude")], rownames.force = NA) #creates two column matrix of lat and long for trip trackDistance function
distbp <- trip::trackDistance(trip.matrix, longlat = TRUE) #calculates distance between each GPS point, into vector
TDist <- sum(distbp) ## Total distance travelled per trip (m)
test$TripDist <- TDist
tripDurs <- as.data.frame(table(test$birdTrip))
test$nPoints <- tripDurs$Freq[match(test$birdTrip, tripDurs$Var1)] ## Nb of points per trips
test$maxDist <- max(test$ColonyDist, na.rm = TRUE) ## Calculating maximum distance per trip
test$propNA <- sum(test$nbNA)/length(test$nbNA) ## Proportion of interpolated NAs for each trip
## Calculation of the proportion of points over land
## Would require a appropriate shapefile for the calculation
#pts <- SpatialPointsDataFrame(test[,c("Longitude", "Latitude")], test,proj4string=crdref)
#pts$Land <- !is.na(over(pts, as(s, "SpatialPolygons")))
#pts$Prop.Land <- mean(pts$Land)
#test <- as.data.frame(pts)
p.list[[o]] <- test
rm(test)
}
alltrips <- do.call(rbind, p.list)
rm(p.list)
notime <- alltrips[, c("birdTrip", "ColonyDist","ColonyorTrip")]
trip.Bincomplete <- aggregate(notime, by=list(notime$birdTrip), FUN = function(x) { head(x,1) }) ## gets the first point for each trip
trip.Eincomplete <- aggregate(notime, by=list(notime$birdTrip), FUN = function(x) { tail(x,1) }) ## gets the last point for each trip
alltrips$BegPoint <- trip.Bincomplete[, 4][match(alltrips$birdTrip, trip.Bincomplete$birdTrip)]
alltrips$EndPoint <- trip.Eincomplete[, 4][match(alltrips$birdTrip, trip.Eincomplete$birdTrip)]
if ( Complete == TRUE ) { # get rid of incomplete trips
alltrips <- subset(alltrips, alltrips$BegPoint == "colony")
alltrips <- subset(alltrips, alltrips$EndPoint == "colony")
}
if (!is.null(MinTripDur)) {
alltrips <- subset(alltrips, alltrips$TripLength >= MinTripDur)
}
if (!is.null(filtNA)) {
alltrips <- subset(alltrips, alltrips$propNA <= filtNA)
}
if (!is.null(metINFO)) {
for (k in 1:length(metINFO)) { ## this loop insert specifric colum from the metadata, convenient for further analyses
mi <- metINFO[k]
alltrips[, mi] <- tryCatch({metafile[, mi][match(alltrips$trackID, metafile$ID)]},
error = function(x) {stop("column's name(s) is/are not recognised from the specified metadata file")})
}}
if (nrow(alltrips) > 1){
alltrips$ColLong <- CLong
alltrips$ColLat <- CLat
}
allbirds.list[[i]] <- alltrips
rm(alltrips)
allbirds <- do.call(rbind, allbirds.list)
cat("GPS file = ", i, "\n")
}
if(is.null(BuffColony)) {
warning("No buffer has been drawn around the colony, meaning that trips have not been delineated. Resuming in one single track for each bird", call. = F)
}
if(splt == TRUE) { allbirds <- split(allbirds, allbirds$birdTrip)}
class(allbirds) <- "CPFMove"
return(allbirds)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.