tRackIT: Process data recorded with automatic radio-tracking stations

Documented in activity_predict_tRackIT

#' activity classification
#'
#' @description classifies data based on ML variables
#'
#'
#' @author Jannis Gottwald
#'
#' @param animal list, list generated by initAnimal function
#' @param get_data logical, if TRUE classification will be returned as data.frame and stored in the classification folder. If FALSE classification will only be stored
#'
#' @export
#'
#' @examples
#' #projroot<-paste0(getwd(),"/tRackIT_test_data/")
#' #anml<-getAnimal(projroot =projroot, animalID = "woodpecker")
#' # predict
#' #activity_predict_tRackIT(animal = anml, get_data=TRUE)
#'
activity_predict_tRackIT <- function(animal = NULL, get_data=FALSE) {
  
  # define pipe operator
  
  `%>%` <- dplyr::`%>%`
  
  # error handling
  if (is.null(animal)) {
    stop("No animal file provided. Please see ?initAnimal or ?getAnimal")
  }

  if (!file.exists(system.file("extdata", "m_r1.rds", package = "tRackIT"))) {
    stop("random forest model m_r1.rds is missing. Download models from https://doi.org/10.17192/fdr/79 and store the .rds files in the folder /extdata of the tRackIt package ")
  }

  if (!file.exists(system.file("extdata", "m_r2.rds", package = "tRackIT"))) {
    stop("random forest model m_r2.rds is missing. Download models from https://doi.org/10.17192/fdr/79 and store the .rds files in the folder /extdata of the tRackIt package")
  }
 
  # load models

  # model for predictions based on one receiver
  mod_rec_1 <- readRDS(system.file("extdata", "m_r1.rds", package = "tRackIT"))
  # model for predictions based on two receivers
  mod_rec_2 <- readRDS(system.file("extdata", "m_r2.rds", package = "tRackIT"))

  # navigate to individuals folder in tRackIT-Project

  fls <- list.files(animal$path$vars, full.names = TRUE)

  data <- plyr::ldply(fls, function(x) {
    data.table::fread(x)
  })

  names(data)[names(data) == "Name"] <- "station"

  # predict data using model trained for 1 receiver
  data_1 <- as.data.frame(data[data$n_receivers == 1, ])
  data_1 <- data_1[, c("timestamp", "station", "receiver", "max", "n_receivers", mod_rec_1$selectedvars)]
  data_1[, mod_rec_1$selectedvars] <- sapply(data_1[, mod_rec_1$selectedvars], as.numeric)
  
  #clean
  data_1 <- data_1[complete.cases(data_1), ]
  data_1 <- data_1 %>%
    dplyr::filter_if(~ is.numeric(.), dplyr::all_vars(!is.infinite(.)))

  #predict
  if (nrow(data_1) >= 50) {
    pred1 <- caret::predict.train(mod_rec_1, data_1)
    data_1$prediction <- pred1
    data_1 <- data_1[, c("timestamp", "station", "receiver", "max", "n_receivers", "prediction")]
  } else {
    data_1 <- data.frame()
  }


  # predict data using model trained for 2 receivers
  data_2 <- data[data$n_receivers == 2, ]
  data_2 <- as.data.frame(data_2)
  data_2 <- data_2[, c("timestamp", "station", "receiver", "max", "n_receivers", mod_rec_2$selectedvars)]
  
  #clean
  data_2[, mod_rec_2$selectedvars] <- sapply(data_2[, mod_rec_2$selectedvars], as.numeric)
  data_2 <- data_2[complete.cases(data_2), ]
 data_2 <- data_2 %>%
    dplyr::filter_if(~ is.numeric(.), dplyr::all_vars(!is.infinite(.)))
  
 # predict
  if (nrow(data_2) >= 50) {
    pred2 <- caret::predict.train(mod_rec_2, data_2)
    data_2$prediction <- pred2
    data_2 <- data_2[, c("timestamp", "station", "receiver", "max", "n_receivers", "prediction")]
  } else {
    data_2 <- data.frame()
  }



  # combine data and store in results folder
  data_predicted <- rbind(data_1, data_2)
  data.table::fwrite(data_predicted, paste0(animal$path$classification, animal$meta$animalID, "_classified.csv"))

  if(get_data){return(data_predicted)}
}