R/predict_ages.R
In halk: Methods to Create Hierarchical Age Length Keys for Age Assignment

Documented in assign_ages

#' @export
predict.alk <- function(object, newdata,
                        numbers_col = NULL,
                        age_below_minlen = min,
                        age_above_maxlen = max,
                        ...) {
  if (is.null(object)) {
    return(NULL)
  }
  # stopifnot("data.frame" %in% class(newdata))
  size_col <- attr(object, "size_col")
  age_col <- attr(object, "age_col")
  size_bin <- attr(object, "size_bin")
  autobin <- attr(object, "autobin")
  min_age <- attr(object, "min_age")
  plus_group <- attr(object, "plus_group")
  if (is.vector(newdata)) {
    newdata_as_vector <- TRUE
    newdata <- data.frame(
      length = newdata,
      order_vector = 1:length(newdata)
    )
  } else if (is.data.frame(newdata)) {
    newdata_as_vector <- FALSE
    check_length_data(newdata, size_col)
    newdata <-
      rename_laa_cols(newdata, size_col = size_col) %>%
      dplyr::mutate(order_vector = 1:dplyr::n())
    if (!is.null(numbers_col)) {
      newdata <-
        newdata %>%
        tidyr::uncount(!!rlang::sym(numbers_col)) %>%
        dplyr::mutate(order_vector = 1:dplyr::n())
    }
  }
  object <- rename_laa_cols(object, size_col, age_col, num_col = numbers_col)
  if (all(is.na(newdata$length))) {
    out <-
      newdata %>%
      dplyr::mutate(est.age = NA, alk.n = NA)
  } else if (!autobin) {
    out <-
      newdata %>%
      dplyr::group_by(.data$length) %>%
      # get the total number in each length group
      dplyr::mutate(n = dplyr::n()) %>%
      tidyr::nest(data = -.data$length) %>%
      dplyr::left_join(object, by = c("length")) %>%
      tidyr::nest(ages = tidyselect::starts_with("age")) %>%
      dplyr::ungroup() %>%
      dplyr::mutate(est.age = purrr::map2(
        .data$data, .data$ages, function(x, y) {
          n <- unique(x$n)
          if (length(n) > 1) {
            stop("Something went wrong in predicting ages from an alk")
          }
          if (all(is.na(y))) {
            return(rep(NA, n))
          } else{
            est_ages <- floor(n * y)
            leftovers <- n - sum(est_ages)
            age_vec <- rep(names(est_ages), est_ages)
            if (leftovers > 0) {
              leftover_ages <- sample(
                names(y),
                size = leftovers,
                replace = TRUE,
                prob = y
              )
              age_vec <- c(age_vec, leftover_ages)
            }
            out <- as.numeric(gsub("age|\\+", "", age_vec))
            return(out)
          }
        })
      ) %>%
      tidyr::unnest(cols = c("data", "est.age")) %>%
      dplyr::select(-"ages", -"n")
    missing_ages <- dplyr::filter(out, is.na(.data$est.age))
    if (nrow(missing_ages) > 1) {
      warning(
        "You have length data that are missing from your ALK. ",
        "Consider changing the size_bin during ALK creation."
      )
    }
  } else {
    out <-
      newdata %>%
      dplyr::mutate(
        rounded_length = bin_lengths(.data$length, size_bin)
      ) %>%
      dplyr::group_by(.data$rounded_length) %>%
      # get the total number in each rounded length
      dplyr::mutate(n = dplyr::n()) %>%
      tidyr::nest(data = -"rounded_length") %>%
      dplyr::left_join(object, by = c("rounded_length" = "length")) %>%
      tidyr::nest(ages = tidyselect::starts_with("age")) %>%
      dplyr::ungroup() %>%
      dplyr::mutate(est.age = purrr::map2(
        .data$data, .data$ages, function(x, y) {
          n <- unique(x$n)
          if (length(n) > 1) {
            stop("Something went wrong in predicting ages from an alk")
          }
          if (all(is.na(y))) {
            # if no ages are present for length, assign NA for now
            # will assign later based on probability if defaults were used for alk
            return(rep(NA, n))
          } else{
            est_ages <- floor(n * y)
            leftovers <- n - sum(est_ages)
            age_vec <- rep(names(est_ages), est_ages)
            if (leftovers > 0) {
              leftover_ages <- sample(
                names(y),
                size = leftovers,
                replace = TRUE,
                prob = y
              )
              age_vec <- c(age_vec, leftover_ages)
            }
            out <- as.numeric(gsub("age|\\+", "", age_vec))
            return(out)
          }
        })
      ) %>%
      tidyr::unnest(cols = c("data", "est.age")) %>%
      dplyr::select(-"ages", -"n")
    missing_ages <- dplyr::filter(out, is.na(.data$est.age))
    out <- dplyr::filter(out, !is.na(.data$est.age))
    if (nrow(missing_ages) > 0) {
      if (autobin & size_bin <= 1) {
        normal_params <- attr(object, "dnorm_params")
        # get min and max lengths and ages
        minlen <- min(object$length)
        maxlen <- max(object$length)
        # assign age to missing rows based on normal params from alk inputs
        non_missing_ages <-
          missing_ages %>%
          dplyr::mutate(est.age = purrr::map_dbl(.data$length, function(x) {
            if (is.na(x)) {
              return(NA)
            } else if (x < minlen) {
              return(age_below_minlen(normal_params$age))
            } else if (x > maxlen) {
              return(age_above_maxlen(normal_params$age))
            } else {
              prob <- dnorm(x, normal_params$mean, normal_params$sd)
              if (all(prob < 6.691511e-05)) {
                closest_age_ind <- which.min(abs(x - normal_params$mean))
                return(normal_params$age[closest_age_ind])
              } else {
                return(normal_params$age[which.max(prob)])
              }
            }
          }))
      } else {
        warning(
          "Some lengths in newdata were not represented in your age-length key.",
          "\nConsider different bin sizes."
        )
        non_missing_ages <-
          missing_ages %>%
          dplyr::mutate(est.age = NA)
      }
      out <- dplyr::bind_rows(out, non_missing_ages)
    }
    out <-
      dplyr::select(out, -"rounded_length") %>%
      rename_laa_cols(
        size_col = size_col,
        age_col = age_col,
        num_col = numbers_col,
        goback = TRUE
      )
  }
  out <-
    out %>%
    dplyr::mutate(alk.n = attr(object, "alk_n")) %>%
    dplyr::arrange("order_vector") %>%
    dplyr::select(-"order_vector")
  if (newdata_as_vector) {
    out <- out$est.age
  }
  return(out)
}

#' Assign ages to non-aged data based on a fitted age model
#'
#' @param object An object of class "alk", "halk_fit" as
#' produced by \code{\link{make_alk}} or \code{\link{make_halk}}
#' @param newdata A vector or data.frame with size/length measurements
#' @param ... Additional parameters to pass to the S3 object methods
#'
#' @return A data.frame the same as \code{newdata}, but with ages assigned based
#' on the model provided in \code{object}
#' @export
#'
#' @examples
#' spp_alk <- make_halk(spp_data, levels = "spp")
#' spp_est_ages <- assign_ages(spp_data, spp_alk)
assign_ages <- function(newdata, object, ...) {
  UseMethod("assign_ages", object)
}

#' @export
assign_ages.alk <- function(newdata, object, ...) {
  est_ages <- predict(object, newdata)
  attr(est_ages, "age_levels") <- NULL
  return(est_ages)
}

#' @export
assign_ages.halk_fit <- function(newdata, object, ...) {
  levels <- attr(object, "levels")
  if (is.null(levels)) {
    est_ages <- predict(object, newdata)
  } else {
    # pull levels off attributes and reverse loop through them to predict age
    est_ages <- tibble::as_tibble(newdata[FALSE, ])
    unaged_data <- newdata
    temp_levels <- levels
    level_alks <- object
    for (i in rev(seq_along(levels))) {
      if (nrow(unaged_data) == 0) {
        next
      }
      temp_level_no_spp <- rm_spp_level(temp_levels)
      temp_nonlevel_names <- names(newdata)[!(names(newdata) %in% temp_levels)]
      est_level_ages <-
        unaged_data %>%
        # removed 3/29/2022 due to deprecation of across within filter
        # shouldn't be needed anyways because the inner join should remove rows
        # dplyr::filter(
        #   dplyr::across(tidyselect::all_of(temp_levels), ~ !is.na(.x))
        # ) %>%
        tidyr::nest(data = tidyselect::all_of(temp_nonlevel_names)) %>%
        dplyr::inner_join(
          level_alks %>%
            dplyr::select(dplyr::all_of(temp_levels), "alk") %>%
            dplyr::distinct(),
          by = temp_levels
        )
      if (nrow(est_level_ages) == 0) {
        # prep levels and alks for next iteration
        temp_levels <- temp_levels[-i]
        level_alks <-
          level_alks %>%
          dplyr::filter(is.na(!!rlang::sym(levels[i]))) %>%
          dplyr::select(tidyselect::all_of(temp_levels), "alk")
        next
      } else {
        est_level_ages <-
          est_level_ages %>%
          dplyr::mutate(est.age = purrr::map2(
            .data$data, .data$alk, function(x, y) {
              # there's a problem here with the predict function returning
              # more rows than what it's given -- at least for bluegill
              return(predict(y, x))
            })
          ) %>%
          tidyr::unnest(.data$est.age) %>%
          dplyr::select(-"data", -"alk") %>%
          dplyr::mutate(alk = levels[i]) %>%
          dplyr::relocate("alk.n", .after = "alk")
      }

      est_ages <- dplyr::bind_rows(est_ages, est_level_ages)
      unaged_data <- dplyr::anti_join(
        unaged_data,
        est_level_ages,
        by = temp_levels
      )
      # prep levels and alks for next iteration
      temp_levels <- temp_levels[-i]
      level_alks <-
        level_alks %>%
        dplyr::filter(is.na(!!rlang::sym(levels[i]))) %>%
        dplyr::select(tidyselect::all_of(temp_levels), "alk")
    }
  }
  attr(est_ages, "age_levels") <- levels
  return(est_ages)
}


#' @export
assign_ages.NULL <- function(newdata, object, ...) {
  return(NULL)
}

Any scripts or data that you put into this service are public.

halk documentation built on May 29, 2024, 5:22 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

halk
Methods to Create Hierarchical Age Length Keys for Age Assignment

R/predict_ages.R
In halk: Methods to Create Hierarchical Age Length Keys for Age Assignment

Defines functions assign_ages.NULL assign_ages.halk_fit assign_ages.alk assign_ages predict.alk

Documented in assign_ages

Try the halk package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

halk Methods to Create Hierarchical Age Length Keys for Age Assignment

R/predict_ages.R In halk: Methods to Create Hierarchical Age Length Keys for Age Assignment

Defines functions assign_ages.NULL assign_ages.halk_fit assign_ages.alk assign_ages predict.alk

Documented in assign_ages

Try the halk package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

halk
Methods to Create Hierarchical Age Length Keys for Age Assignment

R/predict_ages.R
In halk: Methods to Create Hierarchical Age Length Keys for Age Assignment