ohun: Optimizing Acoustic Signal Detection

Documented in consensus_detection

#' @title Remove ambiguous detections
#'
#' @description \code{consensus_detection} removes ambiguous detections
#' @usage consensus_detection(detection, by = "overlap", filter = "max", cores = 1, pb = TRUE)
#' @param detection Data frame or selection table (using the warbleR package's format, see \code{\link[warbleR]{selection_table}}) with the output of \code{\link{label_detection}} containing the start and end of the signals. Must contained at least the following columns: "sound.files", "selec", "start", "end" and "detection.class" (the last one is generated by \code{\link{label_detection}}). It must also contained the column indicated in the 'by' argument (which is 'overlap' by default).
#' @param by Character vector of length 1 indicating a column in 'detection' that will be used to filter detections. Must refer to a numeric column. Default is 'overlap', which is return by \code{\link{label_detection}}.
#' @param filter Character vector of length 1 indicating the criterium used to filter the column refer to by the 'by' argument. Current options are 'max' (maximum) and 'min' (minimum). Default is 'max'.
#' @param cores Numeric. Controls whether parallel computing is applied.
#'  It specifies the number of cores to be used. Default is 1 (i.e. no parallel computing).
#' @param pb Logical argument to control progress bar. Default is \code{TRUE}.
#' @return A data frame or selection table (if 'detection' was also a selection table, warbleR package's format, see \code{\link[warbleR]{selection_table}}) as in 'X' but removing ambiguous detections (split and merged positives).
#' @export
#' @name consensus_detection
#' @details This function removes ambiguous detections keeping only the one that maximizes a criterium given by 'filter'. By default it keeps the detection with the highest overlap to the reference signal. It works on the output of \code{\link{label_detection}}. Useful when several detections match the same reference as in the case of template detection with multiple templates (see \code{\link{template_detector}}).
#'
#' @examples {
#'   # load example data
#'   data("lbh1", "lbh_reference")
#'
#'   # save sound files
#'   tuneR::writeWave(lbh1, file.path(tempdir(), "lbh2.wav"))
#'
#'   # template for the first sound file in 'lbh_reference'
#'   templ1 <- lbh_reference[1, ]
#'
#'   # generate template correlations
#'   tc <- template_correlator(
#'     templates = templ1, path = tempdir(),
#'     files = "lbh2.wav"
#'   )
#'
#'   # template detection
#'   td <- template_detector(template.correlations = tc, threshold = 0.12)
#'
#'   # this detection generates 2 split positives
#'   diagnose_detection(
#'     reference = lbh_reference[lbh_reference == "lbh2.wav", ],
#'     detection = td
#'   )
#'
#'   # label detection
#'   ltd <- label_detection(
#'     reference = lbh_reference[lbh_reference == "lbh2.wav", ],
#'     detection = td
#'   )
#'
#'   # now they can be filter to keep the detection with the highest score for each split
#'   ftd <- consensus_detection(ltd, by = "scores")
#'
#'   # splits must be 0
#'   diagnose_detection(
#'     reference = lbh_reference[lbh_reference == "lbh2.wav", ],
#'     detection = ftd
#'   )
#' }
#'
#' @references 
#' #' Araya-Salas, M., Smith-Vidaurre, G., Chaverri, G., Brenes, J. C., Chirino, F., Elizondo-Calvo, J., & Rico-Guevara, A. 2022. ohun: an R package for diagnosing and optimizing automatic sound event detection. BioRxiv, 2022.12.13.520253. https://doi.org/10.1101/2022.12.13.520253
#' 
#' @seealso \code{\link{label_detection}}, \code{\link{template_detector}}
#' @author Marcelo Araya-Salas (\email{marcelo.araya@@ucr.ac.cr}).

# function to filter detection based on overlap
consensus_detection <-
  function(detection,
           by = "overlap",
           filter = "max",
           cores = 1,
           pb = TRUE) {
    # save start time
    start_time <- proc.time()

    # check arguments
    arguments <- as.list(base::match.call())

    # add objects to argument names
    for (i in names(arguments)[-1]) {
      arguments[[i]] <- get(i)
    }

    # check each arguments
    check_results <- check_arguments(fun = arguments[[1]], args = arguments)

    # report errors
    checkmate::reportAssertions(check_results)

    # check if by column is found
    if (!by %in% names(detection)) {
      stop2("'by' column not found")
    }

    # add row id column to la
    detection$..row.id <- seq_len(nrow(detection))

    # split in false and true positives
    false.positives <-
      as.data.frame(detection[grep("false.positive", detection$detection.class), ])
    true.positives <-
      as.data.frame(detection[grep("true.positive", detection$detection.class, FALSE), ])

    # set clusters for windows OS
    if (Sys.info()[1] == "Windows" & cores > 1) {
      cl <- parallel::makeCluster(cores)
    } else {
      cl <- cores
    }

    # run loop over every detected signal in the reference
    filter_tp_list <-
      warbleR:::.pblapply(X = unique(true.positives$reference), cl = cl, message = "computing consensus detections", total = 1, pbar = pb, function(x) {
        # get those detection that overlapped with x
        X <- true.positives[true.positives$reference == x, ]

        # order by 'by'
        X <- X[order(X[, by, drop = TRUE], decreasing = TRUE), ]

        # filter
        if (filter == "max") {
          X <- X[1, , drop = FALSE]
        } else {
          X <- X[nrow(X), , drop = FALSE]
        }

        return(X)
      })

    # put together in a data frame
    filter_tp_df <- do.call(rbind, filter_tp_list)

    # add false positives
    filtered_detection <- rbind(false.positives, filter_tp_df)

    # sort back
    filtered_detection <-
      filtered_detection[order(filtered_detection$..row.id), ]

    # convert back to selection table
    if (warbleR::is_selection_table(detection)) {
      # keep only those rows in filtered_detections
      detection <-
        detection[detection$..row.id %in% filtered_detection$..row.id, ]
      detection <- detection[order(detection$..row.id), ]

      # overwrite labeled_detections
      filtered_detection <- detection

      # fix call
      attributes(filtered_detection)$call <- base::match.call()

      # add elapsed time
      attributes(filtered_detection)$elapsed.time.s <-
        as.vector((proc.time() - start_time)[3])
    }
    # remove column with row names
    filtered_detection$..row.id <- NULL

    return(filtered_detection)
  }


##############################################################################################################
#' alternative name for \code{\link{consensus_detection}}
#'
#' @keywords internal
#' @details see \code{\link{consensus_detection}} for documentation. \code{\link{filter_detection}} will be deprecated in future versions.
#' @export

filter_detection <- consensus_detection