Pre-processing tracking data

Load libraries and prepare files

# libs for data
library(data.table)
library(stringi)
library(glue)

# for analysis
library(dbscan)

# library for atlas data
library(ggplot2)
library(atlastools)

Prepare files.

# list files
files <- list.files("data/processed/data_id", full.names = TRUE)

Pre-processing pipeline

This pre-processing pipeline is parameterised based on exploratory data analysis which can be found in the supplementary material.

# remove old preprocessing log
if (file.exists("data/log_preprocessing.log")) {
  message("removing old preprocessing log")
  file.remove("data/log_preprocessing.log")
} else {
  message(glue("No preprocessing log as of {Sys.time()}, new log to be made"))
}

# some parameters
min_daily_fixes <- 500 # discard data below n rows
moving_window_ppa <- 7 # moving window for sparrow, bulbul, warbler (PPA)
moving_window_h <- 7 # moving window for swallow
speed_threshold_ppa <- 20 # speed threshold for PPA is 3 m/s
speed_threshold_h <- 20 # speed threshold for swallow
angle_threshold <- 10 # in degrees
hour_start <- 5
hour_end <- 20
dbscan_eps <- 3 # for PPA only
sd_limit <- 20

# sink date time to log
sink(file = "data/log_preprocessing.log", append = FALSE)
glue("Holeybirds Pre-processing log from {Sys.time()}\n\n")
sink()

# open sinks
sink(file = "data/log_preprocessing.log", append = TRUE)

# run pre-processing
for (file in files) {
  # read in file
  df <- fread(file)

  tag_id <- unique(df$TAG)
  species <- unique(df$sp)

  # write messages
  # informative messages
  print(
    glue(
      "Pre-processing tag_id = {tag_id}; {species};
      N rows raw = {nrow(df)}
      "
    )
  )
  # add time
  df[, time := as.integer(as.numeric(TIME) / 1000)]

  # remove so called attractor points
  df <- atl_filter_bounds(
    data = df,
    x = "X", y = "Y",
    x_range = 257000.0 - c(300, -300),
    y_range = 780000.0 - c(300, -300)
  )

  print(
    glue("
          -- removed attractor points near X = {257000.0},Y = {780000.0}
             ")
  )

  # remove other attractors
  df[, count := .N, by = c("X", "Y")]
  df <- df[count < 5, ]

  print(
    glue("
          -- removed other attractor points; counts above {5}
             ")
  )

  df$count <- NULL

  # filter on time of day -- this is being redone for safety
  # the column 'd' already marks daytime positions
  df[, c("date", "hour") := list(
    lubridate::date(as.POSIXct(as.numeric(TIME) / 1000,
      origin = "1970-01-01", tz = "Asia/Jerusalem"
    )),
    lubridate::hour(as.POSIXct(as.numeric(TIME) / 1000,
      origin = "1970-01-01", tz = "Asia/Jerusalem"
    ))
  )]

  df <- df[hour >= hour_start & hour < hour_end, ]

  print(
    glue("
          -- removed nighttime positions between {hour_end}PM and {hour_start}AM
             ")
  )

  # calculate SD and filter for SD <= 20 metres
  df[, SD := sqrt(VARX + VARY + (2 * COVXY))]

  df <- df[SD <= 20]

  print(
    glue("
          -- removed SD > {sd_limit}
             ")
  )

  # split by date
  df_l <- split(df, by = "date")

  # remove data with few rows
  df_l <- df_l[vapply(df_l, function(le) {
    nrow(le) > min_daily_fixes
  }, FUN.VALUE = T)]

  #### Filtering on speed OR density based scan (DBSCAN) ####
  # warn for all data lost, and skip to next individual if all data lost
  # else continue with pre-processing
  if (purrr::is_empty(df_l)) {
    print(
      glue::glue("List of data is empty, all data had fewer than \\
        {min_daily_fixes} rows")
    )

    print(
      glue("\n\n***\n\n")
    )

    next
  } else {
    # pre-processing continues here

    print(glue::glue("-- Pre-processing {length(df_l)} days' data separately
                       "))

    # first calculate speeds and turning angles, handle 0 angles
    df_l <- lapply(df_l, function(le) {
      # check if filtering works
      le[, c("speed_in", "speed_out", "angle") := list(
        atl_get_speed(data = le, x = "X", y = "Y", time = "time", type = "in"),
        atl_get_speed(data = le, x = "X", y = "Y", time = "time", type = "out"),
        atl_turning_angle(data = le, x = "X", y = "Y", time = "time")
      )]
      # fix missing angles due to same position, assign 0
      le[, angle := nafill(angle, type = "const", fill = 0)]
    })

    # check is species is Hirundo (swallow) and handle differently
    # CURRENTLY HANDLED THE SAME!
    if (species == "Hirundo") {
      df_l <- lapply(df_l, function(le) {
        atl_filter_covariates(
          le,
          filters = c(
            glue::glue("(angle < {angle_threshold}) |\\
                       ((speed_in < {speed_threshold_ppa}) &\\
                       (speed_out < {speed_threshold_ppa}))")
          )
        )
      })
      print(glue::glue("-- Filtered out spikes using speeds > \\
      {speed_threshold_ppa} AND angle > {angle_threshold};
                       "))
    } else {
      df_l <- lapply(df_l, function(le) {
        atl_filter_covariates(
          le,
          filters = c(
            glue::glue("(angle < {angle_threshold}) |\\
                       ((speed_in < {speed_threshold_ppa}) &\\
                       (speed_out < {speed_threshold_ppa}))")
          )
        )
      })
      print(glue::glue("-- Filtered out spikes using speeds > \\
      {speed_threshold_ppa} AND angle > {angle_threshold};
                       "))
    }


    #### Median smoothing ####
    # remove data with few rows
    df_l <- df_l[vapply(df_l, function(le) {
      nrow(le) > min_daily_fixes
    }, FUN.VALUE = T)]

    # check again if any data remains and if not, move to next ID
    if (purrr::is_empty(df_l)) {
      print(
        glue::glue("List of data is empty, all data removed by speed filter
                  ")
      )

      print(
        glue("\n\n***\n\n")
      )

      next
    } else {
      if (species == "Hirundo") {
        # pre-processing continues
        # apply a median smooth smooth
        df_l <- lapply(df_l, atl_median_smooth,
          x = "X", y = "Y",
          time = "time", moving_window = moving_window_h
        )

        print(
          glue("-- Median smoothed coordinates, moving window = {moving_window_h}")
        )
      } else {
        # apply a median smooth smooth
        df_l <- lapply(df_l, atl_median_smooth,
          x = "X", y = "Y",
          time = "time", moving_window = moving_window_ppa
        )

        print(
          glue("-- Median smoothed coordinates, moving window = {moving_window_ppa}")
        )
      } # if else for H or PPA ends

      #### Merging data and saving to file ####
      df <- rbindlist(df_l)

      # save data to file
      save_file <- glue("data/processed/data_preprocessed/data_preproc\\
                          _{species}_{tag_id}.csv")
      print(
        glue("-- N rows pre-processed = {nrow(df)}")
      )

      # really save the data
      fwrite(df, file = save_file)

      print(
        glue("-- saved to data/processed/data_preprocessed/
                    data_preproc_{species}_{tag_id}.csv
                ")
      )

      print(
        glue("\n\n***\n\n")
      )
    } # else case for data after filtering ends, data to file
  } # else case for min daily fixes data ends
} # for loops ends
sink()

Observations after preprocessing

files <- list.files(
  path = "data/processed/data_preprocessed",
  full.names = TRUE
)

npos <- lapply(files, fread) |>
  sapply(nrow) |>
  sum()

Estimate fix Table

# load swallow data preprocessed
files <- list.files(
  path = "data/processed/data_preprocessed",
  full.names = TRUE
)
# read all data and count fixes per hour per calendar day
data <- lapply(files, function(df) {
  df_ <- fread(df)

  df_ <- df_[, list(
    tracking_duration_min = (max(time) - min(time)) / (60), # div by 60 for mins
    nfixes = length(X)
  ), by = c("sp", "TAG", "date")]

  df_[, fix_per_min := nfixes / (tracking_duration_min)][]
})

# read WGI and attach beacause we onyl want metrics for birds we
# actually use, ie with rrv score

# attach to rrv data
wgi <- fread("data/results/data_tracking_metrics_rrv.csv")

# bind list and split by id and date
data <- rbindlist(data)
data <- merge(
  data,
  wgi,
  by = c("TAG", "sp")
)

# how many birds with preprocessed data and rrv
length(unique(data$TAG))

# save data to file
fwrite(data, file = "data/results/data_fix_per_min_daily.csv")

Check data remaining.

# read all preprocesed data nd merge
data_preproc <- lapply(files, fread) |> rbindlist()
data_preproc[TAG %in% wgi$TAG, ][, list(n_id = length(unique(TAG))), by = "sp"]
# summarise the data per species
data_summary_fix_rate <- data[, list(
  mean_fix_pm = mean(fix_per_min, na.rm = T),
  sd_fix_pm = sd(fix_per_min, na.rm = T)
), by = c("sp")]

# save to file
fwrite(data_summary_fix_rate, file = "data/results/data_summary_fix_rate.csv")

Supplementary code to find attractor positions

This code is to find positions returned by ATLAS when it fails to calculate a good localisations.

# find all raw data files
files <- list.files("data/processed/data_id", full.names = TRUE)

# find attractor locations
data_attractors <- lapply(files, function(file) {
  # read in file
  df <- fread(file)

  tag_id <- unique(df$TAG)
  species <- unique(df$sp)

  # add time
  df[, time := as.integer(as.numeric(TIME) / 1000)]

  # remove other attractors
  df[, count := .N, by = c("X", "Y")]
  df <- df[count > 5, ]
  df
})

# bind list and plot locations
data_attractors <- rbindlist(data_attractors)
# get time of day - are these sleeping birds?
data_attractors[, hour := hour(as.POSIXct(time,
  origin = "1970-01-01",
  tz = "Asia/Jerusalem"
))]

data_attractors[, light := fifelse(hour >= 5 & hour < 20, "day", "night")]

# save data on attractprs
fwrite(
  data_attractors,
  file = "data/results/data_attractors.csv"
)
(
  ggplot(data_attractors) +
    geom_histogram(
      aes(hour)
    ) +
    geom_vline(
      xintercept = c(5, 20),
      col = "red"
    ) +
    facet_wrap(~sp) +
    labs(
      x = "Hour of day",
      y = "# Duplicated positions"
    )
) |>
  ggsave(
    filename = "figures/fig_duplicated_positions.png"
  )

Read in spatial data to plot attractors in correct locations.

# read in spatial data
library(sf)

landcover <- st_read("data/spatial/hula_lc_vector/HulaValley.shp")
p_attractor <- ggplot(data_attractors) +
  geom_sf(
    data = landcover,
    fill = "lightgrey"
  ) +
  geom_point(
    aes(X, Y, fill = sp, size = count),
    shape = 21
  ) +
  scale_size_continuous(
    range = c(0.1, 10)
  ) +
  coord_sf(
    xlim = st_bbox(landcover)[c("xmin", "xmax")],
    ylim = st_bbox(landcover)[c("ymin", "ymax")]
  ) +
  facet_wrap(
    ~light
  )

ggsave(
  p_attractor,
  filename = "figures/fig_attractor_locations.png"
)

Where were attractor positions removed?

atrac_sf <- st_as_sf(data_attractors[, c("sp", "X", "Y")],
  coords = c("X", "Y"),
  crs = 2039
)
lc_attractor <- terra::extract(
  terra::vect(landcover[, c("Name")]), terra::vect(atrac_sf)
)
# WIP - may need to be processed further - fails due to replicate
# landcovers from points on boundaries
data_attractors$lc <- lc_attractor$Name


pratikunterwegs/holeybirds documentation built on Aug. 6, 2023, 8:53 a.m.