Nothing
#' @name predict
#' @title Predict BANTER events
#' @description Predict species of events for novel data from a BANTER model.
#'
#' @param object a \code{\link{banter_model}} object.
#' @param new.data a list of event and detector data that has the same
#' predictors as in the \code{banter_model}. It must contain elements called
#' \code{events} and \code{detectors}. The \code{events} element must be a
#' data.frame that has a column called \code{event.id} and the same
#' predictor columns as the event data used to initialize the banter model
#' (see \code{\link{initBanterModel}}). The \code{detectors} element must be
#' a named list with the same detectors used to build the model
#' (see \code{\link{addBanterDetector}}).
#' @param ... unused.
#'
#' @note At least one detector in the model must be present in \code{new.data}.
#' Any detectors in the training model that are absent will have all species
#' proportions and the the detector propoprtion set to 0. If a column called
#' \code{species} is in \code{new.data}, columns for the original species
#' designation and if that matches predicted (\code{correct}) will be added
#' to the \code{predict.df} data.frame of the output.
#'
#' @return A list with the following elements: \describe{
#' \item{events}{the data frame used in the event model for predictions.}
#' \item{predict.df}{data.frame of predicted species and assignment
#' probabilities for each event.}
#' \item{detector.freq}{data.frame giving the number of events available
#' for each detector.}
#' \item{validation.matrix}{if \code{species} is a column in \code{new.data},
#' a table giving the classification rate for each event}
#' }
#'
#' @author Eric Archer \email{eric.archer@@noaa.gov}
#'
#' @references Rankin, S. , Archer, F. , Keating, J. L., Oswald, J. N.,
#' Oswald, M. , Curtis, A. and Barlow, J. (2017), Acoustic classification
#' of dolphins in the California Current using whistles, echolocation clicks,
#' and burst pulses. Marine Mammal Science 33:520-540. doi:10.1111/mms.12381
#'
#' @examples
#' data(train.data)
#' # initialize BANTER model with event data
#' bant.mdl <- initBanterModel(train.data$events)
#' # add all detector models
#' bant.mdl <- addBanterDetector(
#' bant.mdl, train.data$detectors,
#' ntree = 50, sampsize = 2, num.cores = 1
#' )
#' # run BANTER event model
#' bant.mdl <- runBanterModel(bant.mdl, ntree = 1000, sampsize = 1)
#'
#' # predict test data
#' data(test.data)
#' test.pred <- predict(bant.mdl, test.data)
#' test.pred
#'
#' @exportMethod predict
methods::setGeneric("predict")
#' @name predict
#' @rdname predict
#' @method predict banter_model
#' @export
predict.banter_model <- function(object, new.data, ...) {
# Check that 'new.data' has elements called 'events' and 'detectors'
if(!all(c("events", "detectors") %in% names(new.data))) {
stop(
"'new.data' must have elements named 'events' and 'detectors'",
call. = FALSE
)
}
# Check that at least one detector is present in 'new.data'
detectors.found <- intersect(names(object@detectors), names(new.data$detectors))
if(length(detectors.found) == 0) {
stop(
"Can't predict with 'new.data' because no detectors from model are present.",
call. = FALSE
)
}
# Check that all detectors that are present have the required fields
for(x in detectors.found) {
new.cols <- colnames(new.data$detectors[[x]])
mdl.cols <- names(getBanterModel(object, x)$forest$xlevels)
cols.missing <- setdiff(mdl.cols, new.cols)
if(!"event.id" %in% new.cols) cols.missing <- c("event.id", cols.missing)
if(length(cols.missing) > 0) {
stop(
"The '", x, "' detector is missing the following columns: ",
paste(cols.missing, collapse = ", "),
call. = FALSE
)
}
}
# Check that event data has all necessary columns
# detector columns in event model
detector.regex <- paste0(
paste0("^", detectors.found, ".", collapse = "|"),
paste0(".", detectors.found, "$", collapse = "|"),
sep = "|"
)
# non-detector columns in event model
event.cols <- names(getBanterModel(object)$forest$xlevels)
other.cols <- setdiff(
event.cols,
grep(detector.regex, event.cols, value = TRUE)
)
new.cols <- colnames(new.data$events)
cols.missing <- setdiff(other.cols, new.cols)
if(!"event.id" %in% new.cols) cols.missing <- c("event.id", cols.missing)
if(length(cols.missing) > 0) {
stop(
"The following columns are missing from the event data: ",
paste(cols.missing, collapse = ", "),
call. = FALSE
)
}
unique.events <- unique(new.data$events$event.id)
# Get number of calls in each detector for each event
detector.num <- lapply(names(object@detectors), function(d) {
if(d %in% names(new.data$detectors)) {
new.data$events %>%
dplyr::left_join(new.data$detectors[[d]], by = "event.id") %>%
dplyr::group_by(.data$event.id) %>%
dplyr::summarize(n = sum(!is.na(.data$call.id)), .groups = "drop") %>%
dplyr::mutate(detector = paste0("prop.", d))
} else { # detector not present in new.data, proportion = 0
tibble::tibble(
event.id = unique.events,
n = 0,
detector = paste0("prop.", d)
)
}
}) %>%
dplyr::bind_rows()
# Convert number to proportion of calls
detector.prop <- detector.num %>%
dplyr::group_by(.data$event.id) %>%
dplyr::mutate(n = .data$n / sum(.data$n, na.rm = TRUE)) %>%
dplyr::ungroup() %>%
tidyr::pivot_wider(names_from = "detector", values_from = "n")
detector.prop <- replace(detector.prop, is.na(detector.prop), 0) %>%
as.data.frame()
# Calculate mean votes for each event
detector.votes <- sapply(names(object@detectors), function(d) {
if(d %in% names(new.data$detectors)) { # detector present in model
predict(
object@detectors[[d]]@model,
new.data$detectors[[d]],
type = "prob"
) %>%
as.data.frame() %>%
dplyr::bind_cols(event.id = new.data$detectors[[d]]$event.id)
} else { # detector not present in model - fill with 0's
spp <- colnames(object@detectors[[d]]@model$votes)
vote.0 <- matrix(0, nrow = length(unique.events), ncol = length(spp))
colnames(vote.0) <- spp
dplyr::bind_cols(as.data.frame(vote.0), event.id = unique.events)
}
}, simplify = FALSE) %>%
.meanVotes()
# Construct data.frame to predict
df <- new.data$events %>%
dplyr::left_join(detector.prop, by = "event.id") %>%
dplyr::left_join(detector.votes, by = "event.id")
# add call rate columns if duration is present and there is no missing data
if("duration" %in% colnames(df)) {
if(all(!is.na(df$duration))) {
df <- df %>%
dplyr::left_join(
detector.num %>%
dplyr::left_join(
dplyr::select(df, "event.id", "duration"),
by = "event.id"
) %>%
dplyr::mutate(
detector = gsub("prop.", "rate.", .data$detector),
n = .data$n / .data$duration
) %>%
dplyr::select(-.data$duration) %>%
tidyr::pivot_wider(names_from = "detector", values_from = "n"),
by = "event.id"
)
}
}
df <- stats::na.omit(df)
result <- list(
events = df,
predict.df = cbind(
data.frame(event.id = df$event.id, stringsAsFactors = FALSE),
predicted = as.character(predict(object@model, df, type = "response")),
predict(object@model, df, type = "prob"),
stringsAsFactors = FALSE
),
detector.freq = detector.num %>%
dplyr::mutate(detector = gsub("prop.", "", .data$detector)) %>%
dplyr::group_by(.data$detector) %>%
dplyr::summarize(num.events = sum(.data$n > 0)) %>%
dplyr::ungroup() %>%
as.data.frame(stringsAsFactors = FALSE)
)
if("species" %in% colnames(df)) {
result$predict.df <- dplyr::mutate(
result$predict.df,
original = make.names(df$species, allow_ = FALSE),
correct = .data$original == .data$predicted
)
conf.df <- result$predict.df
conf.df$predicted <- factor(
conf.df$predicted,
levels = object@model$classes
)
result$validation.matrix <- table(
original = result$predict.df$original,
predicted = result$predict.df$predicted
)
}
result
}
#' @name predict
#' @rdname predict
#' @aliases predict,banter_model-method
methods::setMethod("predict", "banter_model", predict.banter_model)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.