#' Find overlapping selections
#'
#' \code{overlapping_sels} finds which selections overlap in time within a given sound file.
#' @usage overlapping_sels(X, index = FALSE, pb = TRUE, max.ovlp = 0, relabel = FALSE,
#' drop = FALSE, priority = NULL, priority.col = NULL, unique.labs = NULL,
#' indx.row = FALSE, parallel = 1, verbose = TRUE)
#' @param X 'selection_table' object or data frame with the following columns: 1) "sound.files": name of the sound
#' files, 2) "selec": number of the selections, 3) "start": start time of selections, 4) "end":
#' end time of selections.
#' @param index Logical. Indicates if only the index of the overlapping selections would be returned.
#' Default is \code{FALSE}.
#' @param pb Logical argument to control progress bar and messages. Default is \code{TRUE}.
#' @param max.ovlp Numeric vector of length 1 specifying the maximum overlap allowed (in seconds)
#' . Default is 0.
#' @param relabel Logical. If \code{TRUE} then selection names ('selec' column) are reset within each sound files.
#' Default is \code{FALSE}.
#' @param drop Logical. If \code{TRUE}, when 2 or more selections overlap the function will remove
#' all but one of the overlapping selection. Default is \code{FALSE}.
#' @param priority Character vector. Controls the priority criteria used for removing overlapped selections. It
#' must list the levels of the column used to determine priority (argument priority.col) in the desired
#' priority order. Default is \code{NULL}.
#' @param priority.col Character vector of length 1 with the name of the column use to determine the priority of
#' overlapped selections. Default is \code{NULL}.
#' @param unique.labs DEPRECATED.
#' @param indx.row Logical. If \code{TRUE} then a character column with the indices of all selections that overlapped with
#' each selection is added to the ouput data frame (if \code{index = TRUE}). For instance, if the selections in rows 1,2
#' and 3 all overlapped with each other, the 'indx.row' value would be "1/2/3" for all. However, if selection 3 only overlaps
#' with 2 but not with 1, then it returns, "1/2" for row 1, "1/2/3" for row 2, and "2/3" for row 3. Default is \code{FALSE}.
#' @param parallel Numeric. Controls whether parallel computing is applied.
#' It specifies the number of cores to be used. Default is 1 (i.e. no parallel computing).
#' @param verbose Logical to control if messages are printed to the console.
#' @return A data frame with the columns in X plus an additional column ('ovlp.sels') indicating
#' which selections overlap. For instance, if the selections in rows 1,2
#' overlap and 2 and 3 also overlap, the 'ovlp.sels' label would be the same for all 3 selections. If
#' \code{drop = TRUE} only the non-overlapping selections are returned.and if 2 or more selections
#' overlap only the first one is kept. The arguments 'priority' and 'priority.col' can be used to modified the criterium for droping overlapping selections.
#' @export
#' @name overlapping_sels
#' @examples
#' {
#' # no overlap
#' overlapping_sels(X = lbh_selec_table)
#'
#' # modified lbh_selec_table to make the first and second selection overlap
#' Y <- lbh_selec_table
#' Y$end[4] <- 1.5
#'
#' overlapping_sels(X = Y)
#'
#' # drop overlapping
#' overlapping_sels(X = Y, drop = TRUE)
#'
#' # get index instead
#' overlapping_sels(X = Y, index = TRUE)
#' }
#' @details This function detects selections within a selection table that overlap in time. Selections must be
#' listed in a data frame similar to \code{\link{lbh_selec_table}}. Note that row names are set to \code{1:nrow(X)}.
#' @seealso \code{\link{filtersels}}, \code{\link{lbh_selec_table}}
#'
#' @references {
#' Araya-Salas, M., & Smith-Vidaurre, G. (2017). warbleR: An R package to streamline analysis of animal acoustic signals. Methods in Ecology and Evolution, 8(2), 184-191.
#' }
#' @author Marcelo Araya-Salas (\email{marcelo.araya@@ucr.ac.cr})
# last modification on mar-13-2018 (MAS)
overlapping_sels <- function(X, index = FALSE, pb = TRUE, max.ovlp = 0, relabel = FALSE, drop = FALSE,
priority = NULL, priority.col = NULL, unique.labs = NULL, indx.row = FALSE, parallel = 1, verbose = TRUE) {
#### set arguments from options
# get function arguments
argms <- methods::formalArgs(overlapping_sels)
# get warbleR options
opt.argms <- if (!is.null(getOption("warbleR"))) getOption("warbleR") else SILLYNAME <- 0
# remove options not as default in call and not in function arguments
opt.argms <- opt.argms[!sapply(opt.argms, is.null) & names(opt.argms) %in% argms]
# get arguments set in the call
call.argms <- as.list(base::match.call())[-1]
# remove arguments in options that are in call
opt.argms <- opt.argms[!names(opt.argms) %in% names(call.argms)]
# set options left
if (length(opt.argms) > 0) {
for (q in seq_len(length(opt.argms))) {
assign(names(opt.argms)[q], opt.argms[[q]])
}
}
if (!is.null(unique.labs)) {
warning2("'unique.labs' has been deprecated")
}
# if X is not a data frame
if (!any(is.data.frame(X), is_selection_table(X))) stop2("X is not of a class 'data.frame', 'selection_table'")
# check column names
if (!all(c(
"sound.files", "selec",
"start", "end"
) %in% colnames(X))) {
stop2(paste(paste(c("sound.files", "selec", "start", "end")[!(c(
"sound.files", "selec",
"start", "end"
) %in% colnames(X))], collapse = ", "), "column(s) not found in data frame"))
}
# if there are NAs in start or end stop
if (any(is.na(c(X$end, X$start)))) stop2("NAs found in start and/or end")
# if end or start are not numeric stop
if (any(!is(X$end, "numeric"), !is(X$start, "numeric"))) stop2("'start' and 'end' must be numeric")
# if any start higher than end stop
if (any(X$end - X$start <= 0)) stop2(paste("Start is higher than or equal to end in", length(which(X$end - X$start <= 0)), "case(s)"))
# priority
if (!is.null(priority.col) & !is.null(priority)) {
# if col not found
if (!priority.col %in% names(X)) stop2(paste("priority.col", priority.col, "not found"))
# all levels of priority col should be in priority
if (!all(priority %in% unique(X[, priority.col]))) stop2("Not all levels of 'priority.col' included in 'priority'")
}
# order by start time
X <- X[order(X$sound.files, X$start), ]
# save rowname
X$...ROWNAME... <- 1:nrow(X)
# function that runs on a data frame from a single sound file
ovlpFUN <- function(w) {
Y <- X[X$sound.files == w, ]
# only if there is more than 1 selection for that sound file
if (nrow(Y) > 1) {
# relabel
if (relabel) {
Y$selec <- 1:nrow(Y)
}
# determine which ones overlap
Y$indx.row <- sapply(1:nrow(Y), function(i) {
# if any of those after i overlap
ovlp_rows <- Y$...ROWNAME...[(Y$end - max.ovlp) > Y$start[i] & as.numeric(Y$...ROWNAME...) < as.numeric(Y$...ROWNAME...[i]) | Y$start < (Y$end[i] - max.ovlp) & as.numeric(Y$...ROWNAME...) > as.numeric(Y$...ROWNAME...[i])]
if (length(ovlp_rows) > 0) {
out <- paste(sort(as.numeric(c(Y$...ROWNAME...[i], ovlp_rows))), collapse = "/")
} else {
out <- NA
}
return(out)
})
} else {
Y$indx.row <- NA
}
return(as.data.frame(Y))
}
# set clusters for windows OS
if (Sys.info()[1] == "Windows" & parallel > 1) {
cl <- parallel::makePSOCKcluster(getOption("cl.cores", parallel))
} else {
cl <- parallel
}
# run loop apply function
ovlp_df_l <- pblapply_wrblr_int(pbar = pb, X = unique(X$sound.files), cl = cl, FUN = ovlpFUN)
ovlp_df <- do.call(rbind, ovlp_df_l)
rownames(ovlp_df) <- 1:nrow(ovlp_df)
ovlp_df$ovlp.sels <- NA
# give unique label to each group of overlapping selections
if (any(!is.na(ovlp_df$indx.row))) {
overlap_row_id <- strsplit((ovlp_df$indx.row), split = "/")
for (i in which(!is.na(ovlp_df$indx.row))) {
ovlp_df$ovlp.sels[i] <- if (i == 1) 1 else if (any(overlap_row_id[[i]] %in% overlap_row_id[[i - 1]])) ovlp_df$ovlp.sels[i - 1] else if (any(!is.na(ovlp_df$ovlp.sels[1:(i - 1)]))) max(ovlp_df$ovlp.sels[1:(i - 1)], na.rm = TRUE) + 1 else 1
}
}
if (index) {
return(which(!is.na(ovlp_df$ovlp.sels)))
} else {
# remove the ones overlapped
if (drop) {
# remove based on priority
if (!is.null(priority.col) & !is.null(priority) & length(priority) > 1) {
# remove duplicated labels
priority <- priority[!duplicated(priority)]
# create numeric vector to order resulting data frame before dropping
ordr <- as.character(ovlp_df[, priority.col])
for (i in seq_len(length(priority))) {
ordr[ordr == priority[i]] <- i
}
# order based on priority
ovlp_df <- ovlp_df[order(ovlp_df$sound.files, as.numeric(ordr)), ]
}
org.ovlp <- sum(!is.na(ovlp_df$ovlp.sels))
ovlp_df <- ovlp_df[dups <- !duplicated(ovlp_df[, c("ovlp.sels", "sound.files")]) | is.na(ovlp_df$ovlp.sels), ]
}
if (pb & verbose) {
if (any(!is.na(ovlp_df$ovlp.sels))) {
if (drop) {
message2(paste(org.ovlp, "selections overlapped,", sum(!is.na(ovlp_df$ovlp.sels)), "were removed"))
} else {
message2(paste(sum(!is.na(ovlp_df$ovlp.sels)), "selections overlapped"))
}
} else {
message2("No overlapping selections were found")
}
}
# rename rows
rownames(ovlp_df) <- ovlp_df$...ROWNAME...
# remove ...ROWNAME...
ovlp_df$...ROWNAME... <- NULL
# set indx.row to NA when no ovlp.sels
if (!indx.row) {
ovlp_df$indx.row <- NULL
}
return(ovlp_df)
}
}
##############################################################################################################
#' alternative name for \code{\link{overlapping_sels}}
#'
#' @keywords internal
#' @details see \code{\link{overlapping_sels}} for documentation. \code{\link{ovlp_sels}} will be deprecated in future versions.
#' @export
ovlp_sels <- overlapping_sels
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.