tRackIT: Process data recorded with automatic radio-tracking stations

Documented in activity_vars_tRackIT

#' calculate variables for activity classification
#'
#' @description calculates variables for the ML-based activity classification
#'
#'
#' @author Jannis Gottwald
#'
#' @param animal list, list generated by initAnimal function
#' @param t_col string, colname of timestamp column
#' @param s_col string, colname of signal strength column
#' @param r_col tring, name of the column containing the name of each receiver
#' @param tz string, time zone of timestamp
#' @param rscale numeric, numericvalue to rescale to dBW scale
#'
#' @export
#'
#' @examples
#' # get animal
#' #projroot<-paste0(getwd(),"/tRackIT_test_data/")
#' #anml<-getAnimal(projroot =projroot, animalID = "woodpecker")
#' # calculate activity vars
#' #activity_vars_tRackIT(animal = anml, t_col = "timestamp", s_col = "max", r_col = "receiver", tz = "CET", rscale = 0)
#'


activity_vars_tRackIT <- function(animal, t_col, s_col, r_col, tz, rscale = 0) {

  # matching function
  d <- function(x, y) abs(x - y)

  #get filtered files
  fls <- list.files(animal$path$filtered, full.names = TRUE)
  
  #error handling
  if (length(fls) == 0) {
    stop("No files available from input directory")
  }
  
  # create variables per filtered file
  lapply(fls, function(x) {
    
    data <- data.table::fread(x)
    data <- as.data.frame(data)
    # rename columns
    data$max_signal <- data[, s_col]
    # scale to dbw if necessary
    data$max_signal <- data$max_signal - rscale
    data$timestamp <- data[, t_col]
    data$receiver <- data[, r_col]

    # deal with timestamp format inconsitencies
    data$timestamp <- gsub("T", " ", data$timestamp)
    data$timestamp <- gsub("Z", "", data$timestamp)
    data$timestamp <- as.POSIXct(data$timestamp, tz = tz)

    # split into five minute bins to deal with reception gaps
    lst1 <- split(data, droplevels(cut(as.POSIXct(data$timestamp,
      format = "%Y-%m-%d %H:%M:%S", tz = tz
    ), breaks = "5 min")))


    # identify receiver with highest number of data points per 5 minute bin
     df <- plyr::ldply(lst1, function(k) {
      tmp <- k[k$receiver == tail(names(sort(table(k$receiver))), 1)[1], ]


      # 5 Minutes= 300 seconds = ~300 beeps if tag with beep intervall of 1s is perfectly received. 100 entries per receiver is equivalent to ~30% reception time. Should be adapted to specs of tags- e.g. tags that have a beep intervall of 2s should at least have 50 entries.

      if (nrow(tmp) >= 50) {

        # identify receiver with second highest number of data points per 5 minute bin
        tmp2 <- k[k$receiver == tail(names(sort(table(k$receiver))), 2)[1], ]
        nrow(tmp2) / nrow(tmp)

        # if number of data points of second receiver is at least 80% of first receiver variables such as co-variance can be calculated
        if ((nrow(tmp2) / nrow(tmp)) >= 0.8 & (tail(names(sort(table(k$receiver))), 2)[1] != tail(names(sort(table(k$receiver))), 1)[1])) {
          
          tmp$time_control1 <- tmp$timestamp
          tmp2$time_control2 <- tmp2$timestamp
          tmp2$max_signal_2 <- tmp2$max_signal

          tmp2 <- tmp2[, c("timestamp", "max_signal_2", "time_control2")]
          idx <- sapply(tmp$timestamp, function(x) which.min(d(x, tmp2$timestamp))) # find matches

          match <- cbind(tmp, tmp2[idx, -1, drop = FALSE])
          match$timediff <- as.numeric(abs(difftime(match$time_control1, match$time_control2)))
          match <- as.data.frame(match)
          # exclude timediffs>1
          match$max_signal_2[match$timediff > 1] <- NA
          match$n_receivers <- 2

          match$time_control1 <- NULL
          match$time_control2 <- NULL
          match$timediff <- NULL
          return(match)
        } else {

          # print("no")

          tmp$max_signal_2 <- NA

          tmp$n_receivers <- 1

          return(tmp)
        }
      }
    })



    if (nrow(df) > 100) {

   #calculate varialbes for 2 receivers
      df <- df[order(df$timestamp), ]
      
      if (nrow(df[!is.na(df$max_signal_2), ]) > 100) {
        
        df$diff <- abs(df$max_signal - df$max_signal_2)
        
        df$diff_std <- RollingWindow::RollingStd(x = df$diff, window = 10, na_method = "window")
      } else {
        df$diff <- NA
        df$diff_std <- NA
      }


      # 1 receiver variables

      # smooth data with max, mean and hampel

      hampel_1 <- pracma::hampel(x = df$max_signal, k = 10, t0 = 1)
      df$hampel <- hampel_1$y
      
      df$max <- RollingWindow::RollingMax(x = df$max_signal, window = 10, na_method = "window")
      
      df$mean <- RollingWindow::RollingMean(x = df$max_signal, window = 10, na_method = "window")
      
      df$skew <- RollingWindow::RollingSkew(x = df$max_signal, window = 10, na_method = "window")

      df$sumsq <- RollingWindow::RollingSS(x = df$max_signal, window = 10, na_method = "window")

      # calculate variables on RollingWindow::Rolling max smoothed signals

      df$sumsq_max <- RollingWindow::RollingSS(x = df$max, window = 10, na_method = "window")

      # calculate variables on RollingWindow::Rolling mean smoothed signals

      df$sumsq_mean <- RollingWindow::RollingSS(x = df$mean, window = 10, na_method = "window")

      # calculate variables on hampel smoothed signals
      df$var_hampel <- RollingWindow::RollingVar(x = df$hampel, window = 10, na_method = "window")
      
      df$std_hampel <- RollingWindow::RollingStd(x = df$hampel, window = 10, na_method = "window")
      
      df$sumsq_hampel <- RollingWindow::RollingSS(x = df$hampel, window = 10, na_method = "window")

      # write file per station to variable folder
      data.table::fwrite(df, paste0(animal$path$vars, "/", basename(x)))


    }
  })
}


# deprecated variables
# df$var<-RollingWindow::RollingVar(x = df$max_signal, window = 10, na_method = "window")
# df$std<-RollingWindow::RollingStd(x = df$max_signal, window = 10, na_method = "window")
# df$kurt<-RollingWindow::RollingKurt(x = df$max_signal, window = 10, na_method = "window")


# df$diff_var<-RollingWindow::RollingVar(x = df$diff, window = 10, na_method = "window")
# df$cor_max<-RollingWindow::RollingCorr(x = df$max_signal, y=df$max_signal_2, window = 10, na_method = "window")
# df$cov_max<-RollingWindow::RollingCov(x = df$max_signal, y=df$max_signal_2, window = 10, na_method = "window")

# df$var_mean<-RollingWindow::RollingVar(x = df$mean, window = 10, na_method = "window")
# df$std_mean<-RollingWindow::RollingStd(x = df$mean, window = 10, na_method = "window")
# df$var_max<-RollingWindow::RollingVar(x = df$max, window = 10, na_method = "window")
# df$std_max<-RollingWindow::RollingStd(x = df$max, window = 10, na_method = "window")