Nothing
#' Generate time bins
#'
#' A function to generate time bins for a given study interval and geological
#' timescale. This function is flexible in that either stage-level or higher
#' stratigraphic-level (e.g. period) time bins can be called, valid timescales
#' from [Macrostrat](https://macrostrat.org/api/defs/timescales?all) can be
#' used, or a `data.frame` of a geological timescale can be provided. In
#' addition, near equal-length time bins can be generated by grouping
#' intervals together. For example, for a target bin size of 10 Myr, the
#' function will generate groups of bins that have a mean bin length close to
#' 10 Myr. However, users may also want to consider grouping stages based on
#' other reasoning e.g. availability of outcrop (see Dean et al. 2020).
#'
#' @param interval \code{character or numeric}. Interval name available in
#' \code{\link{GTS2020}} or \code{\link{GTS2012}}. If a single interval name
#' is provided, this interval is returned. If two interval names are
#' provided, these intervals and those existing between are returned. If a
#' single numeric age is provided, the interval that covers this age is
#' returned. If two numeric ages are provided, the intervals occurring in
#' the range of these ages are returned. Defaults to "Phanerozoic". This
#' argument is ignored if `scale` is not "GTS2020" or "GTS2012".
#' @param rank \code{character}. Which stratigraphic rank is desired? Choose
#' from: "stage", "epoch", "period", "era", and "eon". This argument is
#' ignored if `scale` is not "GTS2020" or "GTS2012".
#' @param size \code{numeric}. If equal-length time bins are desired, specify
#' the length in millions of years (Myr) of the time bins desired.
#' @param assign \code{numeric}. A numeric vector of age estimates to use to
#' assign to requested bins. If assign is specified, a numeric vector is
#' returned of the midpoint age of the specified bins. Note this is the
#' simplified approach of assignment in `palaeoverse` included for data with
#' 'known' point-age estimates. For a wider range of binning methods, see
#' \code{\link[palaeoverse:bin_time]{palaeoverse::bin_time()}}.
#' @param scale \code{character} or \code{data.frame}. Specify the desired
#' geological timescale to be used: "GTS2020" (default), "GTS2012", a valid
#' timescale from
#' [Macrostrat](https://macrostrat.org/api/defs/timescales?all), or
#' user-input `data.frame`. If a `data.frame` is provided, it must contain
#' at least the following named columns: "interval_name", "max_ma", and
#' "min_ma". As such, age data should be provided in Ma.
#' @param plot \code{logical}. Should a plot of time bins be generated?
#' @importFrom graphics polygon title
#' @importFrom stats sd
#' @importFrom curl nslookup
#' @importFrom grDevices col2rgb
#' @importFrom utils read.csv
#'
#' @return A \code{data.frame} of time bins for the specified intervals or a
#' list with a \code{data.frame} of time bins and a named \code{numeric}
#' vector (bin number) of binned age estimates (midpoint of specified bins)
#' if `assign` is specified. By default, the time bins \code{data.frame}
#' contains the following columns: bin, interval_name, rank, max_ma, mid_ma,
#' min_ma, duration_myr, abbr (interval abbreviation), colour and font
#' (colour). If `size` is specified, the time bins \code{data.frame}
#' contains the following columns: bin, max_ma, mid_ma, min_ma,
#' duration_myr, grouping_rank, intervals, colour and font.
#'
#' @details This function uses either the Geological Time Scale 2020,
#' Geological Time Scale 2012, a valid timescale from
#' [Macrostrat](https://macrostrat.org/api/defs/timescales?all), or a
#' user-input `data.frame` (see `scale` argument) to generate time bins.
#' Additional information on included Geological Time Scales and source can
#' be accessed via:
#' - \code{\link{GTS2020}}
#' - \code{\link{GTS2012}}
#'
#' Available interval names are accessible via the `interval_name` column
#' in `GTS2012` and `GTS2020`. Data of the Geological Timescale 2020 and
#' 2012 were compiled by Lewis A. Jones (2022-07-02).
#'
#' @section References:
#' Dean, C.D., Chiarenza, A.A. and Maidment, S.C., 2020. Formation binning: a
#' new method for increased temporal resolution in regional studies, applied
#' to the Late Cretaceous dinosaur fossil record of North America.
#' Palaeontology, 63(6), 881-901. \doi{10.1111/pala.12492}.
#'
#' @section Developer(s):
#' Lewis A. Jones
#' @section Reviewer(s):
#' Kilian Eichenseer & William Gearty
#' @export
#' @examples
#' #Using numeric age
#' ex1 <- time_bins(interval = 10, plot = TRUE)
#'
#' #Using numeric age range
#' ex2 <- time_bins(interval = c(50, 100), plot = TRUE)
#'
#' #Using a single interval name
#' ex3 <- time_bins(interval = c("Maastrichtian"), plot = TRUE)
#'
#' #Using a range of intervals and near-equal duration bins
#' ex4 <- time_bins(interval = c("Fortunian", "Meghalayan"),
#' size = 10, plot = TRUE)
#'
#' #Assign bins based on given age estimates
#' ex5 <- time_bins(interval = c("Fortunian", "Meghalayan"),
#' assign = c(232, 167, 33))
#'
#' #Use user-input data.frame to generate near-equal length bins
#' scale <- data.frame(interval_name = 1:5,
#' min_ma = c(0, 18, 32, 38, 45),
#' max_ma = c(18, 32, 38, 45, 53))
#' ex6 <- time_bins(scale = scale, size = 20, plot = TRUE)
#'
#' #Use North American land mammal ages from Macrostrat
#' ex7 <- time_bins(scale = "North American land mammal ages", size = 10)
#'
time_bins <- function(interval = "Phanerozoic", rank = "stage", size = NULL,
assign = NULL, scale = "GTS2020", plot = FALSE) {
# Error handling -------------------------------------------------------
if (!is.character(interval) &&
!is.numeric(interval) &&
!is.null(interval)) {
stop("`interval` must be NULL or of class 'character' or 'numeric'")
}
if (!is.numeric(size) && !is.null(size)) {
stop("`size` should be a 'numeric' or NULL.")
}
if (!is.logical(plot)) {
stop("`plot` should be logical (TRUE/FALSE).")
}
if (is.numeric(assign) && any(assign < 0)) {
stop(paste("Age estimates for `assign` should be non-negative values.",
"Hint: You can transform your data using abs()."))
}
if (!is.character(scale) && !is.data.frame(scale)) {
stop(paste("`scale` must be either:\n",
"The name of an in-built time scale (e.g. 'GTS2020'),",
"the name of a Macrostrat time scale (see details),",
"or a `data.frame`."))
}
if (is.data.frame(scale) &&
any(!c("interval_name", "max_ma", "min_ma") %in% colnames(scale))) {
stop(paste("`scale` does not contain named columns:",
"'interval_name', 'max_ma', and 'min_ma'."))
}
if (length(interval) > 2 && !is.null(interval)) {
stop(paste("`interval` must be a 'character' or",
"'numeric' vector of length 1 or 2",
"or NULL."))
}
if (length(rank) > 1) {
stop("`rank` must be of length 1.")
}
if (!rank %in% c("stage", "epoch", "period", "era", "eon", NULL)) {
stop("`rank` must be either: stage, epoch, period, era, eon, or NULL.")
}
# Input dataframe scale ------------------------------------------------
if (is.data.frame(scale)) {
# Assign input scale to df
df <- scale
# Update scale/rank
scale <- "user"
rank <- "user"
# Add mid_ma
df$mid_ma <- (df$max_ma + df$min_ma) / 2
# Add duration
df$duration_myr <- (df$max_ma - df$min_ma)
# Add bin number
df$bin <- seq_len(nrow(df))
# Add scale as rank
df$rank <- scale
# Add colour column if it doesn't already exist
if (!"colour" %in% colnames(df)) {
df$colour <- "#80cdc1"
}
# Add abbr column if it doesn't already exist
if (!"abbr" %in% colnames(df)) {
df$abbr <- NA
}
}
# In-built scales ------------------------------------------------------
if (scale %in% c("GTS2020", "GTS2012")) {
if (is.null(interval)) {
stop("`interval` is NULL. You must define an interval/age range.")
}
# Which geological timescale to use?
if (scale == "GTS2020") {
df <- palaeoverse::GTS2020
} else if (scale == "GTS2012") {
df <- palaeoverse::GTS2012
}
# Update interval number to bin number
colnames(df)[which(colnames(df) == "interval_number")] <- "bin"
# Filter dataset by desired interval names (character string)
if (is.character(interval)) {
# Check interval names
int_index <- charmatch(interval, df$interval_name)
if (any(is.na(int_index))) {
stop(paste("Check spelling of specified intervals.",
"Available intervals are accessible via GTS2020 and GTS2012."))
}
# Subset df for intervals
if (length(int_index) > 1) {
int_min <- min(c(df$min_ma[int_index[1]], df$min_ma[int_index[2]]))
int_max <- max(c(df$max_ma[int_index[1]], df$max_ma[int_index[2]]))
int_index <- c(int_min, int_max)
} else {
int_index <- c(df$min_ma[int_index], df$max_ma[int_index])
}
# Subset df
int_index <- which(df$max_ma > min(int_index) &
df$min_ma < max(int_index))
}
# Filter dataset by desired interval ages (numeric string)
if (is.numeric(interval)) {
# Check age range
if (max(interval) > max(df$max_ma)) {
stop("maximum `interval` value is greater than available intervals")
}
if (min(interval) < min(df$min_ma)) {
stop("minimum `interval` value is less than available intervals")
}
# Subset df
int_index <- which(df$max_ma >= min(interval) &
df$min_ma <= max(interval))
}
# Sort int_index
int_index <- sort(int_index)
# Subset df for intervals
df <- df[int_index, ]
# Which rank should be used?
df <- df[which(df$rank == rank), ]
# Error handle
if (nrow(df) == 0) {
stop("No intervals are available for the defined interval range.")
}
}
# Macrostrat dataframe scale -------------------------------------------
if (!scale %in% c("GTS2020", "GTS2012", "user")) {
# Assign scale to rank
rank <- scale
# Try to get the time scale from Macrostrat
# Online and Macrostrat available?
tryCatch(
{
nslookup("macrostrat.org")
},
error = function(e) {
stop("Macrostrat is not available. Either the site is down or you are
not connected to the internet.",
call. = FALSE
)
}
)
url <- url(paste0("https://macrostrat.org/api/v2/defs/intervals",
"?format=csv×cale=",
gsub(" ", "%20", scale)))
df <- tryCatch(
{
read.csv(url, header = TRUE, stringsAsFactors = FALSE)
},
error = function(e) {
stop("`name` does not match a built-in or Macrostrat time scale.",
call. = FALSE)
})
df <- df[, c("name", "b_age", "t_age", "abbrev", "color")]
colnames(df) <- c("interval_name", "max_ma", "min_ma", "abbr", "colour")
# Add mid_ma
df$mid_ma <- (df$max_ma + df$min_ma) / 2
# Add duration
df$duration_myr <- (df$max_ma - df$min_ma)
# Add bin number
df$bin <- seq_len(nrow(df))
# Add scale as rank
df$rank <- scale
}
# Add font colour if it doesn't exist
if (!("font" %in% colnames(df))) {
# Font colours based on luminance as per
# https://stackoverflow.com/a/1855903/4660582
rgbs <- col2rgb(df$colour)
luminance <- apply(rgbs, 2, function(x) {
(0.299 * x[1] + 0.587 * x[2] + 0.114 * x[3]) / 255
})
df$font <- ifelse(luminance > .5, "black", "white")
}
# Tidy dataframe -------------------------------------------------------
# Abbreviate names if required
if (any(is.na(df$abbr))) {
df$abbr <- abbreviate(df$interval_name, minlength = 1,
use.classes = FALSE, named = FALSE)
}
# Reorder dataframe
df <- df[, c("bin", "interval_name", "rank", "max_ma", "mid_ma",
"min_ma", "duration_myr", "abbr", "colour", "font")]
# Generate equal-length bins? ------------------------------------------
if (is.numeric(size)) {
# Update bin size for age range
# How many bins should be generated?
n_bins <- round((max(df$max_ma) - min(df$min_ma)) / size)
size <- (max(df$max_ma) - min(df$min_ma)) / n_bins
#track cumulative sum
tracker <- list()
for (i in seq_len(length.out = nrow(df))) {
tracker[[i]] <- rep(NA, length.out = nrow(df))
tracker[[i]][i:nrow(df)] <- abs(
cumsum(df$duration_myr[i:nrow(df)]) - size)
}
# Calculate upper and lower limits for each bin
lower <- NULL
upper <- NULL
count <- 1
while (count <= nrow(df)) {
upper <- append(upper, which.min(tracker[[count]]))
lower <- append(lower, (count))
count <- which.min(tracker[[count]]) + 1
}
# Generate bin information
bin <- rev(seq_along(upper))
max_ma <- df[upper, c("max_ma")]
min_ma <- df[lower, c("min_ma")]
mid_ma <- (max_ma + min_ma) / 2
duration_myr <- max_ma - min_ma
intervals <- vector("character")
# Resolve any edge effect
if (duration_myr[length(duration_myr)] < size / 2) {
upper[length(upper) - 1] <- upper[length(upper)]
upper <- upper[-length(upper)]
lower <- lower[-length(lower)]
bin <- rev(seq_along(upper))
max_ma <- df[upper, c("max_ma")]
min_ma <- df[lower, c("min_ma")]
mid_ma <- (max_ma + min_ma) / 2
duration_myr <- max_ma - min_ma
}
# Get interval names
for (i in seq_along(upper)) {
intervals[i] <- toString(df[lower[i]:upper[i], c("interval_name")])
}
# Generate dataframe
grouping_rank <- rank
df <- cbind.data.frame(bin,
max_ma,
mid_ma,
min_ma,
duration_myr,
grouping_rank,
intervals)
# Message user
message(
paste("Target equal length time bins was set to",
round(size, digits = 2),
"Myr. \nGenerated time bins have a mean length of",
round(mean(df$duration_myr), digits = 2),
"Myr and a standard deviation of",
round(sd(df$duration_myr), digits = 2),
"Myr."
)
)
}
# Plot data? -----------------------------------------------------------
if (plot) {
if (is.numeric(size)) {
df$colour <- c("#80cdc1")
df$font <- c("black")
}
plot(1, type = "n",
xlim = c(max(df$max_ma), min(df$min_ma)),
ylim = c(0, max(df$duration_myr)),
xlab = "Time (Ma)",
ylab = "Duration (Myr)"
)
for (i in seq_len(length.out = nrow(df))) {
polygon(
x = c(df$min_ma[i], df$max_ma[i], df$max_ma[i], df$min_ma[i]),
y = c(0, 0, df$duration_myr[i], df$duration_myr[i]),
col = df$colour[i]
)
}
if (is.numeric(size)) {
title(paste(
"Mean bin length =",
round(mean(df$duration_myr), digits = 2),
"(standard deviation =",
round(sd(df$duration_myr), digits = 2),
")"
))
}
}
# Assign data? ---------------------------------------------------------
if (!is.null(assign)) {
if (is.numeric(assign)) {
if (any(assign > max(df$max_ma) | assign < min(df$min_ma))) {
stop("One or more ages is outside the specified time interval range")
}
tmp <- assign
for (i in seq_len(length.out = nrow(df))) {
assign[which(tmp <= df$max_ma[i] &
tmp >= df$min_ma[i])] <- df$mid_ma[i]
names(assign)[which(tmp <= df$max_ma[i] &
tmp >= df$min_ma[i])] <- df$bin[i]
}
assign <- list(df, assign)
names(assign) <- c("Bins", "Assignation")
return(assign)
} else {
stop("`assign` should be a numeric")
}
}
# Clean up --------------------------------------------------------------
# Ensure consistency
df <- df[order(df$mid_ma, decreasing = TRUE), ]
df$bin <- seq_len(nrow(df))
row.names(df) <- NULL
return(df)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.