R/c14_date_list_calibrate.R

Defines functions hdr determine_calage_from_probability_distribution calibrate_to_probability_distribution determine_dates_out_of_range_of_calcurve calibrate.c14_date_list calibrate.default calibrate

Documented in calibrate calibrate.c14_date_list calibrate.default

#### calibrate ####

#' @name calibrate
#' @title Calibrate all valid dates in a \strong{c14_date_list}
#'
#' @description Calibrate all dates in a \strong{c14_date_list} with
#' \code{Bchron::BchronCalibrate()}. The function provides two different
#' kinds of output variables that are added as new list columns to the input
#' \strong{c14_date_list}: \strong{calprobdistr} and \strong{calrange}.
#' \strong{calrange} is accompanied by \strong{sigma}. See
#' \code{?Bchron::BchronCalibrate} and \code{?c14bazAAR:::hdr} for some more
#' information. \cr
#' \strong{calprobdistr}: The probability distribution of the individual date
#' for all ages with an individual probability >= 1e-06. For each date there's
#' a data.frame with the columns \strong{calage} and \strong{density}. \cr
#' \strong{calrange}: The contiguous ranges which cover the probability interval
#' requested for the individual date. For each date there's a data.frame with the
#' columns \strong{dens} and \strong{from} and \strong{to}.
#'
#' @param x an object of class c14_date_list
#' @param choices whether the result should include the full calibrated
#' probability dataframe ('calprobdistr') or the sigma range ('calrange').
#' Both arguments may be given at the same time.
#' @param sigma the desired sigma value (1,2,3) for the calibrated sigma ranges
#' @param calCurves	a vector of values containing either intcal20, shcal20,
#' marine20, or normal (older calibration curves are supposed such as intcal13).
#' Should be the same length the number of ages supplied.
#' See \link[Bchron]{BchronCalibrate} for more information
#' @param ... passed to \link[Bchron]{BchronCalibrate}
#'
#' @return an object of class c14_date_list with the additional columns
#' \strong{calprobdistr} or \strong{calrange} and \strong{sigma}
#'
#' @examples
#' calibrate(
#'   example_c14_date_list,
#'   choices = c("calprobdistr", "calrange"),
#'   sigma = 1
#' )
#'
#' @export
#'
#' @rdname calibrate
#'
calibrate <- function(
  x, choices = c("calrange"), sigma = 2, calCurves = rep("intcal20", nrow(x)), ...) {
  UseMethod("calibrate")
}

#' @rdname calibrate
#' @export
calibrate.default <- function(
  x, choices = c("calrange"), sigma = 2, calCurves = rep("intcal20", nrow(x)), ...) {
  stop("x is not an object of class c14_date_list")
}

#' @rdname calibrate
#' @export
calibrate.c14_date_list <- function(
  x, choices = c("calrange"), sigma = 2, calCurves = rep("intcal20", nrow(x)), ...) {

  choices <- match.arg(
    choices,
    c("calprobdistr", "calrange"),
    several.ok = TRUE
  )

  check_if_packages_are_available(c("Bchron", "plyr"))

  x %>% check_if_columns_are_present(c("c14age", "c14std"))

  # start message:
  message(paste0("Calibrating dates... ", {if (nrow(x) > 10000) {"This may take several minutes."}}))

  # setup progress bar
  pb <- utils::txtProgressBar(
    max = 100,
    style = 3,
    width = 50,
    char = "+"
  )

  # add empty column "calprobdistr"
  x %<>% add_or_replace_column_in_df("calprobdistr", replicate(nrow(x), data.frame()), .after = "c14std")

  # extract dates which are not out of range of calcurve
  outofrange <- x %>% determine_dates_out_of_range_of_calcurve(calCurves)
  if(length(outofrange) > 0) {
    calibrateable <- x[-outofrange, ]
    calCurves <- calCurves[-outofrange]
  } else {
    calibrateable <- x
  }

  # create empty calibration result data.frame list
  calibrateable_prodistr <- replicate(nrow(calibrateable), data.frame())

  # determine amount of calibration loop iteration to work through the data in stacks of 200 dates
  # -> artificial stacks
  step_width <- 200
  amount_of_iterations <- ceiling(nrow(calibrateable)/step_width)

  # increment progress bar
  utils::setTxtProgressBar(pb, 5)

  # calibration loop -- loop only for progress bar :-( -- code was much simpler without this
  for (i in 1:amount_of_iterations) {

    # define step sequence for this step
    step_seq <- (step_width * (i - 1) + 1):if (i == amount_of_iterations)
      {nrow(calibrateable)} else {(step_width * i)}

    # calibration in stacks of size step_width
    calibrateable_prodistr[step_seq] <- calibrate_to_probability_distribution(
      calibrateable[step_seq, ],
      eps = 1e-06,
      calCurves = calCurves[step_seq],
      ...
    )

    # increment progress bar
    utils::setTxtProgressBar(pb, 5 + 90 * i/amount_of_iterations)
  }

  # determine calage
  calage_vector <- determine_calage_from_probability_distribution(calibrateable_prodistr)

  # write result back into x
  if(length(outofrange) > 0) {
    x$calprobdistr[-outofrange] <- calibrateable_prodistr
  } else {
    x$calprobdistr <- calibrateable_prodistr
  }

  # vector of probabilities for 1, 2 and 3 sigma
  my_prob_vector <- c(0.6827, 0.9545, 0.9974)

  if ("calrange" %in% choices) {
    x %<>% add_or_replace_column_in_df("calrange",  replicate(nrow(x), data.frame()), .after = "calprobdistr")
    x %<>% add_or_replace_column_in_df("sigma",  NA_integer_, .after = "calrange")
    if(length(outofrange) > 0) {
      x$calrange[-outofrange] <- lapply(x$calprobdistr[-outofrange], hdr, prob = my_prob_vector[sigma])
    } else {
      x$calrange <- lapply(x$calprobdistr, hdr, prob = my_prob_vector[sigma])
    }
    x$sigma <- sigma
  } else {
    x$calrange <- x$sigma <- NULL
  }

  if (!("calprobdistr" %in% choices)) {
    x$calprobdistr <- NULL
  }

  # turning x back into a c14_date_list
  x %<>% as.c14_date_list()

  # increment progress bar
  utils::setTxtProgressBar(pb, 100)

  # close progress bar
  close(pb)

  return(x)
}

#' determine_dates_out_of_range_of_calcurve
#'
#' @param x c14_date_list
#'
#' @return indizes of dates in c14_date_list which are out of calcurve range
#'
#' @keywords internal
#' @noRd
determine_dates_out_of_range_of_calcurve <- function(x, calCurves) {
  # load intcal13 data from Bchron
  cal_curve <- NA
  cal_curve_string <- utils::data(list = calCurves[1], package = "Bchron", envir = environment())
  cal_curve <- eval(as.name(cal_curve_string))
  if(!is.data.frame(cal_curve)) stop("Problems loading intcal13 dataset from package Bchron.")

  toona <- which(is.na(x$c14age) | is.na(x$c14std))
  toosmall <- which(x$c14age < min(cal_curve[,2]))
  toobig <- which(x$c14age > max(cal_curve[,2]))
  outofrange <- c(toona, toosmall, toobig) %>% unique

  return(outofrange)
}

#' calibrate_to_probability_distribution
#'
#' @param x c14_date_list
#' @param ... further arguments passed to Bchron::BchronCalibrate()
#'
#' @return list with probability distribution data frames
#'
#' @keywords internal
#' @noRd
calibrate_to_probability_distribution <- function(x, calCurves, ...) {
  Bchron::BchronCalibrate(
    ages      = x$c14age,
    ageSds    = x$c14std,
    calCurves = calCurves,
    ...
  ) %>%
    # extract probability distribution from BchronCalibrate output
    # and craft new data.frame with year and density
    lapply(
      function(x) {
        data.frame(calage = x[["ageGrid"]], density = x[["densities"]])
      }
    ) %>%
    return()
}

#' determine_calage_from_probability_distribution
#'
#' @param x c14_date_list
#'
#' @return vector of calages
#'
#' @keywords internal
#' @noRd
determine_calage_from_probability_distribution <- function(x) {
  x %>%
    lapply(
      function(x) {
        x$calage[which.max(x$density)]
      }
    ) %>%
    unlist %>%
    return()
}

#' Calculate highest density regions for Bchron calibrated ages
#'
#' A function for computing highest density regions (HDRs).
#' Adopted from the Bchron package.
#'
#' @details The output of this function is a list of contiguous ranges which
#' cover the probability interval requested. A highest density region might
#' have multiple such ranges if the calibrated date is multi-modal. These
#' differ from credible intervals, which are always contiguous but will not
#' be a good representation of a multi-modal probability distribution.
#'
#' @param calprobdistr the probability distribution
#' @param prob The desired probability interval, in the range(0, 1)
#'
#' @return a dataframe containing the hdr
#'
#' @keywords internal
#' @noRd
hdr <- function(calprobdistr, prob = 0.95) {

  if(findInterval(prob, c(0, 1))!=1) stop('prob value outside (0,1).')

  ag <- calprobdistr$calage
  de <- calprobdistr$density

  # Put the probabilities in order
  o = order(de)
  cu = cumsum(de[o])

  # Find which ones are above the threshold
  good_cu = which(cu>1-prob)
  good_ag = sort(ag[o][good_cu])

  # Pick out the extremes of each range
  breaks = diff(good_ag)>1
  where_breaks = which(diff(good_ag)>1)
  n_breaks = sum(breaks) + 1

  # Store output
  out = data.frame(dens = double(), from=integer(), to=integer())
  low_seq = 1
  high_seq = ifelse(length(where_breaks)==0, length(breaks), where_breaks[1])
  for(i in 1:n_breaks) {
    curr_dens = round(100*sum(de[o][seq(good_cu[low_seq], good_cu[high_seq])]),1)
    out[i,] = c(curr_dens, good_ag[low_seq], good_ag[high_seq])
    low_seq = high_seq + 1
    high_seq = ifelse(i<n_breaks-1, where_breaks[i+1], length(breaks))
  }
  return(out)
}
nevrome/c14databases documentation built on Feb. 2, 2024, 2 a.m.