R/utils.R

Defines functions is_flag is_count is_integer get_species date_to_st_week calculate_mcc_f1

Documented in calculate_mcc_f1 date_to_st_week get_species

#' Calculate MCC and F1 score
#'
#' Given binary observed and predicted response, estimate Matthews correlation
#' coefficient (MCC) and the F1 score.
#'
#' @param observed logical or 0/1; the observed binary response.
#' @param predicted logical or 0/1; the predicted binary response. This will typically
#'   need to be generated by applying a threshold to the continuous predicted
#'   response.
#'
#' @return A list with two elements: `mcc` and `f1`.
#' @export
#' @examples
#' obs <- c(rep(1L, 1000L), rep(0L, 10000L))
#' pred <- c(rbeta(300L, 12, 2), rbeta(700L, 3, 4), rbeta(10000L, 2, 3))
#' calculate_mcc_f1(obs > 0, pred > 0.5)
calculate_mcc_f1 <- function(observed, predicted) {
  if (!requireNamespace("PresenceAbsence", quietly = TRUE)) {
    stop("Package 'PresenceAbsence' must be installed to use this function.")
  }
  stopifnot(is.logical(observed) || all(observed %in% c(0L, 1L)),
            is.logical(predicted) || all(predicted %in% c(0L, 1L)),
            length(observed) == length(predicted))

  # confusion matrix
  obs_pred <- data.frame(blank = "x",
                         obs = as.numeric(observed),
                         pred = as.numeric(predicted))
  cmx <- PresenceAbsence::cmx(obs_pred, na.rm = TRUE)
  tp <- cmx[1, 1]
  fp <- cmx[1, 2]
  fn <- cmx[2, 1]
  tn <- cmx[2, 2]

  # f1 score
  p_pat <- tp / (tp + fp)
  r_pat <- tp / (tp + fn)
  f1 <- 2 * ((p_pat * r_pat) / (p_pat + r_pat))

  # mcc
  mcc_denom <- sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
  mcc <- ((tp * tn) - (fp * fn)) / mcc_denom
  return(list(f1 = f1, mcc = mcc))
}


#' Get the Status and Trends week that a date falls into
#'
#' @param dates a vector of dates.
#'
#' @return An integer vector of weeks numbers from 1-52.
#' @export
#' @examples
#' d <- as.Date(c("2016-04-08", "2018-12-31", "2014-01-01", "2018-09-04"))
#' date_to_st_week(d)
date_to_st_week <- function(dates) {
  dv <- seq(from = 0, to = 1, length.out = 52 + 1)
  days <- (as.POSIXlt(dates)$yday + 0.5) / 366

  check_d <- function(x) {
    which(x >= dv[-length(dv)] & x < dv[-1])
  }
  vapply(days, check_d, FUN.VALUE = integer(length = 1))
}


#' Get eBird species code for a set of species
#'
#' Give a vector of species codes, common names, and/or scientific names, return
#' a vector of 6-letter eBird species codes. This function will only look up
#' codes for species for which eBird Status and Trends results exist.
#'
#' @param x character; vector of species codes, common names, and/or scientific
#'   names.
#'
#' @return A character vector of eBird species codes.
#' @export
#'
#' @examples
#' get_species(c("Black-capped Chickadee", "Poecile gambeli", "carchi"))
get_species <- function(x) {
  stopifnot(is.character(x))
  r <- ebirdst::ebirdst_runs
  x <- tolower(trimws(x))

  # species code
  code <- match(x, tolower(r$species_code))
  # scientific name
  sci <- match(x, tolower(r$scientific_name))
  # common names
  com <- match(x, tolower(r$common_name))
  # combine
  r$species_code[dplyr::coalesce(code, sci, com)]
}


# internal ----

is_integer <- function(x) {
  return(isTRUE(is.integer(x) || (is.numeric(x) && all(x == as.integer(x)))))
}

is_count <- function(x) {
  return(isTRUE(is_integer(x) && length(x) == 1 && x >= 0))
}

is_flag <- function(x) {
  return(isTRUE(is.logical(x) && length(x) == 1 && !is.na(x)))
}

Try the ebirdst package in your browser

Any scripts or data that you put into this service are public.

ebirdst documentation built on Nov. 16, 2023, 5:07 p.m.