library(rcarbon)
#' Filter archaeological site coordinates and dates, retaining only the
#' earliest radiocarbon date per site.
#'
#' @importFrom magrittr %>%
#' @importFrom dplyr group_by top_n
#' @importFrom rlang .data
#' @param sites A SpatialPointsDataFrame object with archaeological sites and
#' associated radiocarbon ages.
#' @param c14bp A string. Name of the field with the radiocarbon ages in C14 BP
#' format.
#' @return A SpatialPointsDataFrame object with the earliest C14 date for every
#' site.
#' @export
filterDates <- function(sites, c14bp) {
x <- c(colnames(sp::coordinates(sites)))[1]
y <- c(colnames(sp::coordinates(sites)))[2]
clusters <- sp::zerodist(sites, zero = 0, unique.ID = TRUE)
sites$clusterID <- clusters
sites.df <- as.data.frame(sites)
sites.max <- as.data.frame(sites.df %>% group_by(.data$clusterID) %>%
top_n(1, get(c14bp)))
xy <- cbind(sites.max[[x]], sites.max[[y]])
sites.max.spdf <- sp::SpatialPointsDataFrame(xy, sites.max)
sp::proj4string(sites.max.spdf) <- sp::proj4string(sites)
return(sites.max.spdf)
}
#' Perform regression of dates versus distances from multiple potential origins
#' in order to find the best model. It is also possible to specify multiple
#' distances for the spatial binning of the dates.
#'
#' @param ftrSites A SpatialPointsDataFrame object with associated earliest
#' C14 dates per site and respective calibrated distributions (CalDates
#' objects) in a field named "cal".
#' @param c14bp A string. Name of the field with the radiocarbon ages in C14
#' BP format.
#' @param origins A SpatialPointsDataFrame object. The sites to be tested for
#' the most likely origin of expansion.
#' @param siteNames A string. Name of the field with the site names or ids for
#' the potential origins.
#' @param binWidths A number or vector of numbers. Width(s) of the spatial bins
#' in km.
#' @param nsim A number. Number of simulations to be run during the
#' bootstrapping procedure. Default is 999.
#' @param method A string. Method to be used in the regression. One of "rma"
#' or "ols". Default is "rma".
#' @param ncores A number. Number of cores used for parallel processing.
#' Default is 1.
#' @return a list with two elements, the result of the iteration over all
#' potential origins and the best model selected among those.
#' @export
#' @examples
#' \donttest{
#' data(neof)
#' data(centers)
#' iter <- iterateSites(neof, "C14Age", centers, "Site", binWidths=500)
#' }
iterateSites <- function(ftrSites, c14bp, origins, siteNames, binWidths = 0,
nsim = 999, method = "rma", ncores = 1) {
datalen <- length(binWidths) * length(origins)
res <- data.table::data.table("r" = numeric(datalen), "p" = numeric(datalen),
"bin" = numeric(datalen), "n" = numeric(datalen),
"site" = character(datalen))
origins$r <- 0
# Perform reduced major axis regression for each bin width and for each
# hypothetical origin, if more than one is specified
pb <- utils::txtProgressBar(min = 0, max = datalen, style = 3)
counter <- 0
for (i in (1:length(binWidths))) {
for (j in (1:length(origins))) {
counter <- counter + 1
dateModel <- modelDates(ftrSites, c14bp = c14bp,
origin = origins[j,],
binWidth = binWidths[i], nsim = nsim,
method = method, ncores = ncores)
if (method == "rma") {
slope <- mean(dateModel$model[,"slo"])
} else if (method == "ols") {
# For practical reasons, consider only time-versus-distance
# for ols models
slope <- mean(dateModel$td.model[,"slo"])
}
# Only proceed if the slope is negative, i.e. dates become more
# recent with increasing distance
if (slope < 0) {
if (method == "rma") {
meanr <- mean(dateModel$model[,"r"])
meanp <- mean(dateModel$model[,"p"])
} else if (method == "ols") {
# For practical reasons, consider only time-versus-distance
# for ols models
meanr <- mean(dateModel$td.model[,"r"])
meanp <- mean(dateModel$td.model[,"p"])
}
# Save the best model
if (!exists("bestModel")) {
bestModel <- dateModel
bestModel$meanr <- meanr
} else if (meanp < 0.05 & meanr > bestModel$meanr) {
bestModel <- dateModel
bestModel$meanr <-meanr
}
if (meanp < 0.01) {
ast <- "**"
} else if (meanp < 0.05) {
ast <- "* "
} else {
ast <- " "
}
if (binWidths[i] > 0) {
nsites <- nrow(dateModel$binSites)
} else {
nsites <- nrow(dateModel$allSites)
}
# Extract the r and p of each model and store it in the result,
# together with the corresponding bin width
rval <- round(as.double(sqrt(meanr)), 4)
res[counter, "r"] <- rval
res[counter, "p"] <- round(as.double(meanp), 2)
res[counter, "bin"] <- binWidths[i]
res[counter, "n"] <- nsites
res[counter, "site"] <- paste(toString(origins[[siteNames]][j]),
ast, sep="")
if (rval > origins$r[j]) {
origins$r[j] <- rval
}
}
utils::setTxtProgressBar(pb, counter)
}
}
close(pb)
origins.idw <- interpolateIDW(origins, "r")
# Create map object
map <- list("sites" = ftrSites, "origins" = origins, "idw" = origins.idw)
class(map) <- "dateMap"
res <- res[order(-res$r),]
return(list("results" = res, "model" = bestModel, "map" = map))
}
#' Interpolate using inverse distance weighting.
#' @param points A SpatialPointsDataFrame object.
#' @param attr A string. Name of the field with values to interpolate.
#' @return A RasterLayer.
#' @export
interpolateIDW <- function(points, attr) {
grd <- as.data.frame(sp::spsample(points, "regular", n = 10000))
names(grd) <- c("x", "y")
sp::coordinates(grd) <- ~x+y
sp::proj4string(grd) <- sp::proj4string(points)
sp::gridded(grd) <- TRUE
sp::fullgrid(grd) <- TRUE
points.idw <- gstat::idw(get(attr) ~ 1, points, newdata = grd, idp = 2.0)
return(raster::raster(points.idw))
}
#' Perform regression of archaeological dates on great circle distances from
#' a hypothetical origin. Dates can be filtered to retain only the earliest
#' dates per distance bins (Hamilton and Buchanan 2007). Bootstrap is executed
#' to account for uncertainty in calibrated dates. Regression can be either
#' reduced major axis or ordinary least squares. If using ordinary least
#' squares, regression is performed both on time-versus-distance and on
#' distance-versus-time.
#'
#' @importFrom magrittr %>%
#' @importFrom dplyr group_by top_n
#' @importFrom rlang .data
#' @import rcarbon
#' @param ftrSites A SpatialPointsDataFrame object with associated earliest
#' C14 dates per site and respective calibrated distributions (CalDates
#' objects) in a field named "cal". Result of applying filterDates() and
#' calAll().
#' @param c14bp A string. Name of the field with the radiocarbon ages in C14
#' BP format.
#' @param origin A SpatialPointsDataFrame object. The site considered as
#' hypothetical origin of expansion.
#' @param binWidth A number. Width of the spatial bins in km, calculated as
#' distance intervals from the hypothetical origin. Default is 0 (no bins).
#' @param nsim A number. Number of simulations to be run during the
#' bootstrapping procedure. Default is 999.
#' @param method A string. Method to be used in the regression. One of "rma"
#' or "ols". Default is "rma".
#' @param ncores A number. Number of cores used for parallel processing.
#' Default is 1.
#' @return a dateModel object.
#' @export
#' @examples
#' \donttest{
#' data(neof)
#' data(centers)
#' jericho <- centers[centers$Site=="Jericho",]
#' model <- modelDates(neof, "C14Age", jericho, method="ols")
#' }
#' \dontshow{
#' data(neof)
#' data(centers)
#' jericho <- centers[centers$Site=="Jericho",]
#' model <- modelDates(neof[1:2,], "C14Age", jericho, method="ols",
#' nsim=1, ncores=2)
#' }
modelDates <- function(ftrSites, c14bp, origin, binWidth = 0, nsim = 999,
method = "rma", ncores = 1) {
cl <- parallel::makeCluster(ncores)
parallel::clusterEvalQ(cl, library("rcarbon"))
parallel::clusterEvalQ(cl, library("smatr"))
parallel::clusterExport(cl, "sampleCalDates")
ftrSites$id <- 1:nrow(ftrSites)
# Caculate great circle distances
ftrSites$dists <- sp::spDistsN1(ftrSites, origin, longlat = TRUE)
# Perform spatial binning, retain only earliest date per bin
if (binWidth > 0) {
ftrSites$bin <- floor(ftrSites$dists / binWidth)
ftrSites.df <- as.data.frame(ftrSites)
ftrSites.df$cal <- NULL
ftrSites.max <- ftrSites.df %>% group_by(.data$bin) %>% top_n(1, get(c14bp))
binSites <- ftrSites[match(ftrSites.max$id, ftrSites$id),]
dists <- binSites$dists
calDates <- binSites$cal
} else {
binSites <- NULL
dists <- ftrSites$dists
calDates <- ftrSites$cal
}
if (method == "rma") {
models <- vector("list", length = nsim)
parallel::clusterExport(cl, list("models", "dists", "calDates"),
envir = environment())
models <- parallel::parLapply(cl, 1:nsim, function(k) {
dates <- sampleCalDates(calDates)
model <- smatr::sma(dates ~ dists, robust = TRUE)
})
parallel::stopCluster(cl)
gc()
res <- matrix(nrow = nsim, ncol = 4)
# Save the coefficient, probability, slope and intercept of the
# regressions
colnames(res) <- c("r", "p", "slo", "int")
for (k in 1:nsim) {
res[k, "r"] <- models[[k]]$r[[1]]
res[k, "p"] <- models[[k]]$p[[1]]
res[k, "slo"] <- models[[k]]$coef[[1]][2, 1]
res[k, "int"] <- models[[k]]$coef[[1]][1, 1]
}
dateModel <- list("model" = res, "binSites" = binSites,
"allSites" = ftrSites, "binWidth" = binWidth,
"method" = method)
} else if (method == "ols") {
td.models <- vector("list", length = nsim)
dt.models <- vector("list", length = nsim)
parallel::clusterExport(cl, list("td.models", "dt.models", "dists", "calDates"),
envir = environment())
# Time-versus-distance
td.models <- parallel::parLapply(cl, 1:nsim, function(k) {
dates <- sampleCalDates(calDates)
model <- stats::lm(dates ~ dists)
})
# Distance-versus-time
dt.models <- parallel::parLapply(cl, 1:nsim, function(k) {
dates <- sampleCalDates(calDates)
model <- stats::lm(dists ~ dates)
})
parallel::stopCluster(cl)
gc()
# Save the coefficient, probability, slope and intercept of the
# regressions
td.res <- matrix(nrow = nsim, ncol = 4)
dt.res <- matrix(nrow = nsim, ncol = 4)
colnames(td.res) <- c("r", "p", "slo", "int")
colnames(dt.res) <- c("r", "p", "slo", "int")
for (k in 1:nsim) {
td.res[k, "r"] <- sqrt(summary(td.models[[k]])$r.squared)
td.res[k, "p"] <- summary(td.models[[k]])$coefficients[2, 4]
td.res[k, "slo"] <- td.models[[k]]$coefficients[[2]]
td.res[k, "int"] <- td.models[[k]]$coefficients[[1]]
dt.res[k, "r"] <- sqrt(summary(dt.models[[k]])$r.squared)
dt.res[k, "p"] <- summary(dt.models[[k]])$coefficients[2, 4]
int <- dt.models[[k]]$coefficients[[1]]
slo <- dt.models[[k]]$coefficients[[2]]
# Invert the intercept and slope for the plot
dt.res[k, "slo"] <- 1 / slo
dt.res[k, "int"] <- -int / slo
}
dateModel <- list("td.model" = td.res, "dt.model" = dt.res,
"binSites" = binSites, "allSites" = ftrSites,
"binWidth" = binWidth, "method" = method)
}
class(dateModel) <- "dateModel"
return(dateModel)
}
#' Plot a map with the results of the iteration over multiple sites, showing
#' an interpolated surface of r values for the sites considered as potential
#' origins.
#'
#' @param x a dateMap object. One of the list elements returned from
#' the iterateSites() function.
#' @param ... ignored
#' @return a spplot object.
#' @export
plot.dateMap <- function(x, ...) {
e <- raster::extent(x$sites)
sites <- list("sp.points", x$sites, cex = 0.25, col = "black",
alpha = 0.5)
continent <- list("sp.polygons", spDates::land, fill = "white")
plt <- sp::spplot(raster::mask(x$idw, spDates::land), cuts = 8,
col.regions = viridisLite::magma,
xlim = c(e[1], e[2]), ylim = c(e[3], e[4]),
xlab = list("R", cex = 0.75),
par.settings = list(panel.background = list(col = "lightcyan2"),
layout.heights = list(xlab.key.padding = 1)),
sp.layout = list(sites, continent),
scales = list(draw = TRUE, cex = 0.5, font = 1,
tck = c(1, 0)),
colorkey = list(height = 0.75, space = "bottom"),
main = list(label = "Interpolation map of potential origins' R values",
cex = 1))
return(plt)
}
#' Plot the results of the regression bootstrap on archaeological dates versus
#' distances from a hypothetical origin, with fitted lines.
#'
#' @importFrom rlang .data
#' @param x a dateModel object created with modelDates().
#' @param ... ignored
#' @return a ggplot object.
#' @export
plot.dateModel <- function(x, ...) {
if (!is.null(x$binSites)) {
binSites <- data.frame("dists" = x$binSites$dists,
"dates" = x$binSites$med,
"binned" = 1)
} else {
binSites <- NULL
}
allSites <- data.frame("dists" = x$allSites$dists,
"dates" = x$allSites$med,
"binned" = 0)
if (!is.null(binSites)) {
points <- rbind(allSites, binSites)
binLabel <- paste(x$binWidth, " km bins")
} else {
points <- allSites
binLabel <- NULL
}
if (x$method == "rma") {
model <- as.data.frame(x$model)
# Mean intercept and slope of bootstrap for display
int.m <- mean(model$int)
slo.m <- mean(model$slo)
# Display significant p value as either < 0.01 or < 0.05. If not
# significant, display actual value up to two decimals.
if (mean(model$p) < 0.01) {
pval <- "p < 0.01"
} else if (mean(model$p) < 0.05) {
pval <- "p < 0.05"
} else {
pval <- paste("p = ", format(mean(model$p), digits = 2,
scientific = FALSE))
}
rval <- paste("r = -", format(sqrt(mean(mean(model$r))), dig = 2),
sep="")
plt <- ggplot2::ggplot() + ggplot2::xlim(0, max(points$dists)) +
ggplot2::ylim(min(points$dates), max(points$dates)) +
ggplot2::geom_abline(data = model,
ggplot2::aes(intercept = .data$int,
slope = .data$slo),
alpha = 0.05, lwd = 0.5,
colour = "#f8766d") +
ggplot2::geom_abline(ggplot2::aes(intercept = int.m,
slope = slo.m)) +
ggplot2::geom_point(data = points, ggplot2::aes(x = .data$dists, y = .data$dates,
fill = factor(.data$binned)),
shape = 21, size = 2) +
ggplot2::labs(x = "Distance from origin (km)", y = "Cal yr BP",
fill = binLabel) +
ggplot2::scale_fill_manual(labels = c("all dates",
"earliest per bin"),
values = c("white", "black")) +
ggplot2::annotate("text", x = min(points$dists),
y = min(points$dates),
label = paste(rval, pval),
hjust = 0) +
ggplot2::theme(legend.position = c(1, 1),
legend.justification = c(1, 1))
} else if (x$method == "ols") {
# Time-versus-distance
td.model <- as.data.frame(x$td.model)
td.int.m <- mean(td.model$int)
td.slo.m <- mean(td.model$slo)
if (mean(td.model$p) < 0.01) {
td.pval <- "p < 0.01"
} else if (mean(td.model$p) < 0.05) {
td.pval <- "p < 0.05"
} else {
td.pval <- paste("p = ", format(mean(td.model$p), digits = 2,
scientific = FALSE))
}
td.rval <- paste("r = -", format(sqrt(mean(mean(td.model$r))), dig = 2),
sep="")
# Distance-versus-time
dt.model <- as.data.frame(x$dt.model)
dt.int.m <- mean(dt.model$int)
dt.slo.m <- mean(dt.model$slo)
if (mean(dt.model$p) < 0.01) {
dt.pval <- "p < 0.01"
} else if (mean(dt.model$p) < 0.05) {
dt.pval <- "p < 0.05"
} else {
dt.pval <- paste("p = ", format(mean(dt.model$p), digits = 2,
scientific = FALSE))
}
dt.rval <- paste("r = -", format(sqrt(mean(mean(dt.model$r))), dig = 2),
sep="")
plt <- ggplot2::ggplot() + ggplot2::xlim(0, max(points$dists)) +
ggplot2::ylim(min(points$dates), max(points$dates)) +
ggplot2::geom_abline(data = dt.model,
ggplot2::aes(intercept = .data$int,
slope = .data$slo),
alpha = 0.05, lwd = 0.5,
colour = "#00bfc4") +
ggplot2::geom_abline(data = td.model,
ggplot2::aes(intercept = .data$int,
slope = .data$slo),
alpha = 0.05, lwd = 0.5,
colour = "#f8766d") +
ggplot2::geom_abline(ggplot2::aes(intercept = dt.int.m,
slope = dt.slo.m),
lty = 2) +
ggplot2::geom_abline(ggplot2::aes(intercept = td.int.m,
slope = td.slo.m)) +
ggplot2::geom_point(data = points, ggplot2::aes(x = .data$dists,
y = .data$dates,
fill = factor(.data$binned)),
shape = 21, size = 2) +
ggplot2::labs(x = "Distance from origin (km)", y = "Cal yr BP",
fill = binLabel) +
ggplot2::scale_fill_manual(labels = c("all dates",
"earliest per bin"),
values = c("white", "black")) +
ggplot2::annotate("text", x = min(points$dists),
y = min(points$dates),
label = paste(td.rval, td.pval),
hjust = 0) +
ggplot2::theme(legend.position = c(1, 1),
legend.justification = c(1, 1))
}
return(plt)
}
#' Extract a single year estimate for ranges of calibrated dates with a
#' probability given by the calibrated probability distribution.
#'
#' @import rcarbon
#' @param calDates A CalDates object or a vector of CalDates.
#' @return A vector of cal BP single year estimates.
#' @export
sampleCalDates <- function(calDates) {
years <- numeric(length(calDates))
for (i in 1:length(calDates)) {
years[i] <- sample(calDates[i]$grids[[1]]$calBP, size = 1,
prob = calDates[i]$grids[[1]]$PrDens)
}
return(years)
}
#' Return summary statistics for a dateModel object.
#'
#' @param object A dateModel object created with modelDates().
#' @param ... ignored
#' @return A dataframe with estimated start date and speed of advance.
#' @export
summary.dateModel <- function(object, ...) {
if (object$method == "rma") {
start <- mean(object$model[, "int"])
start.SD <- stats::sd(object$model[, "int"])
speed <- 1 / mean(object$model[, "slo"])
speed.SD <- (1 / ((mean(object$model[, "slo"])) +
(1.96 * stats::sd(object$model[, "slo"])))) - speed
df <- data.frame(c(paste(format(start, digits = 0, scientific = FALSE),
"+/-", format(start.SD, digits = 0,
scientific = FALSE), "cal BP"),
paste(format(abs(speed), digits = 2,
scientific = FALSE),
"+/-", format(abs(speed.SD), digits = 2,
scientific = FALSE),
"km/yr")))
rownames(df) <- c("Start date:", "Speed of advance:")
colnames(df) <- NULL
} else if (object$method == "ols") {
# Time-versus-distance
td.start <- mean(object$td.model[, "int"])
td.start.SD <- stats::sd(object$td.model[, "int"])
td.speed <- 1 / mean(object$td.model[, "slo"])
td.speed.SD <- (1 / ((mean(object$td.model[, "slo"])) +
(1.96 * stats::sd(object$td.model[, "slo"])))) - td.speed
# Distance-versus-time
dt.start <- mean(object$dt.model[, "int"])
dt.start.SD <- stats::sd(object$dt.model[, "int"])
dt.speed <- 1 / mean(object$dt.model[, "slo"])
dt.speed.SD <- (1 / ((mean(object$dt.model[, "slo"])) +
(1.96 * stats::sd(object$dt.model[, "slo"])))) - dt.speed
df <- data.frame(c(paste(format(td.start, digits = 0,
scientific = FALSE), "+/-",
format(td.start.SD, digits = 0,
scientific = FALSE), "cal BP"),
paste(format(abs(td.speed), digits = 2,
scientific = FALSE),
"+/-", format(abs(td.speed.SD), digits = 2,
scientific = FALSE),
"km/yr")),
c(paste(format(dt.start, digits = 0,
scientific = FALSE), "+/-",
format(dt.start.SD, digits = 0,
scientific = FALSE), "cal BP"),
paste(format(abs(dt.speed), digits = 2,
scientific = FALSE),
"+/-", format(abs(dt.speed.SD), digits = 2,
scientific = FALSE),
"km/yr")))
rownames(df) <- c("Start date:", "Speed of advance:")
colnames(df) <- c("Time-versus-distance", "Distance-versus-time")
}
return(df)
}
#' Radiocarbon dates and coordinates of 717 Neolithic sites in the Near East
#' and Europe. Modified from Pinhasi et al. (2005). Only the earliest dates
#' per site are included.
#'
#' @format A data frame with 717 rows and 13 variables.
#' \itemize{
#' \item Latitude. Site latitude in decimal degrees.
#' \item Longitude. Site longitude in decimal degrees.
#' \item Site. Site name.
#' \item Location. Region where the site is located (Near East, Europe etc).
#' \item Country. Country where the site is located.
#' \item Period. Site period or culture (PPNA, PPNB, LBK etc.).
#' \item LabNumber. Laboratory number of the C14 date.
#' \item C14Age. Date in C14 years BP.
#' \item C14SD. Standard error of the radiocarbon date.
#' \item Material. Material dated (Charcoal, shell etc.).
#' \item Curve. Curve to be used in the calibration of each date (intcal13,
#' marine13).
#' \item cal. Calibrated dates as CalDates objects.
#' \item med. Median of the calibrated date in cal yr BP.
#' }
"neof"
#' Coordinates of 9 sites considered as potential centers of origin of the
#' Neolithic expansion. Modified from Pinhasi et al. (2005).
#'
#' @format A data frame with 9 rows and 3 variables.
#' \itemize{
#' \item Latitude. Site latitude in decimal degrees.
#' \item Longitude. Site longitude in decimal degrees.
#' \item Site. Site name.
#' }
"centers"
#' Land polygons.
#'
#' @format A SpatialPolygonsDataFrame object.
"land"
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.