R/filter_tRackIT.R

Defines functions filter_tRackIT

Documented in filter_tRackIT

#' filter automatic (tRackIT.OS)
#' @description Filtering of raw signals based on transmitter frequency, signal duration and runtime of the transmitter. The function reads files from a given folder. Make shure that there is one file per station and that they have the following columns:timestamp= time of received signal-expected format: “%Y-%m-%d %H:%M:%S (e.g. 1970-01-29 15:30:22:11),  duration=signal length of the recorded signal, frequency= frequency in kHz (e.g. 150150.2),  max= the max signal strength in dBW of the recorded signal (peak) , receiver= name of the receiver device (usually 0,1,2,3 for a 4 Antenna station ), station= name of the station as character string
#'
#'
#' @author Jannis Gottwald
#'
#'
#'
#' @param animal list, generated by initanimal function
#'
#' @param path_to_data string, path to awk filtered file
#' @param freq_er numeric, accepted deviation from tag frequency in khz
#' @param d_min numeric, minimum duration of signal
#' @param d_max numeric, maximum duration of signal
#' @param start string, start of tracking YYYY-MM-DD
#' @param end string, end of tracking YYYY-MM-DD
#' @param freq num, tag frequency (khz)
#'
#' @export
#'
#' @examples
#' #projroot<-paste0(getwd(),"/tRackIT_test_data/")
#' #anml<-getAnimal(projroot =projroot, animalID = "woodpecker")
#' #filter_tRackIT(animal = anml, freq_er = 2, path_to_data = "/test_project/data/logger_data_csv/", d_min = anml$meta$duration_min, d_max = anml$meta$duration_max, freq = anml$meta$freq, start = anml$meta$start, end = anml$meta$end)
#'

filter_tRackIT <- function(animal = NULL, freq_er = 2, path_to_data = NULL, d_min = NULL, d_max = NULL, start = NULL, end = NULL, freq = NULL) {
  
  #error handling
  if (is.null(freq)) {
    stop("No transmitter frequency provided")
  }

  if (is.null(freq)) {
    stop("No transmitter frequency provided")
  }

  if (is.null(path_to_data)) {
    stop("No data path provided")
  }

  if (is.null(d_min)) {
    stop("No expected minimum duration of signals is provided. This is important for signal filtering. Set it low (0) if you are unsure.")
  }

  if (is.na(d_max)) {
    stop("No expected maximum duration of signals is provided. This is important for signal filtering. Set it high (100) if you are unsure.")
  }

  if (is.na(start)) {
    stop("No start date of the tagging period provided")
  }

  if (is.na(end)) {
    stop("No end date of the tagging period provided")
  }

  # get files
  fls <- list.files(path_to_data, full.names = TRUE)

  if (length(fls) == 0) {
    stop(paste0("No files found in ", path_to_data))
  }

  lapply(fls, function(f) {
    stat <- data.table::fread(f)

    nms_actual <- colnames(stat)
    nms_expected <- c("receiver", "timestamp", "frequency", "duration", "max", "station")
    if (!all(nms_expected %in% nms_actual)) {
      idx <- nms_expected %in% nms_actual

      stop(paste0("Required column ", nms_expected[!idx], " not found! "))
    }

    stat <- stat[!is.na(stat$timestamp), ]
    stat <- as.data.frame(stat)
    stat <- stat[, colSums(is.na(stat)) != nrow(stat)]


    colnames(stat) <- c("receiver", "timestamp", "frequency", "duration", "avg", "max", "sd", "noise", "snr", "station")

    stat <- stat[!is.na(stat$timestamp), ]

    #filters
    stat$sig_diff <- abs(stat$frequency - as.numeric(freq))
    stat <- stat[stat$sig_diff <= freq_er, ]
    stat <- stat[stat$duration >= d_min & stat$duration <= d_max, ]
    stat <- stat[stat$timestamp >= start & stat$timestamp <= end, ]

    if (nrow(stat) >= 10) {
      data.table::fwrite(stat, paste0(animal$path$filtered, "/", gsub(".csv", "", basename(f)), "_filtered.csv"))
    }
  })
}
Nature40/tRackIT documentation built on Nov. 21, 2023, 3:43 a.m.