#' Chronologies and growth rates of moss
#'
#' Provides facilities for estimating chronologies and growth rates
#' of moss (or similar) samples based on radiocarbon dating.
#'
#' @name DateMoss-package
#' @docType package
#' @author S. Wotherspoon, B. Raymond, S. Hall
NULL
##' Atmospheric radiocarbon calibration data
##'
##' Atmospheric radiocarbon data from 1450-2013. These data are
##' largely based on Hua et al (2013), but have been extrapolated at
##' both ends.
##'
##' @name Atmospheric14C
##' @docType data
##' @title Atmospheric radiocarbon calibration data
##' @format A data frame with 6 columns and 163 rows. The columns
##' represent
##' \tabular{rl}{
##' \code{Year} \tab Year of measurement. \cr
##' \code{F14C} \tab Fraction of modern carbon 14. \cr
##' \code{F14C.sigma} Standard deviation on F14C. \cr
##' \code{D14C} \tab Delta 14V. \cr
##' \code{F14C.sigma} \tab Standard deviation on D14C. \cr
##' \code{PMC} \tab Percent modern carbon.
##' }
##' @source Hua, Q., Barbetti, M. & Rakowski, A. Z. (2013).
##' Atmospheric radiocarbon for the period 1950-2010.
##' Radiocarbon, 55(4), 2059-2072.
##' @keywords data
NULL
##' Initial Chronology for initializing MCMC sampler
##'
##' The function \code{generateInitial} generates random chronologies
##' that can be used to initialize the MCMC sampler
##' \code{\link{metropolisDate}}.
##'
##' The user must provide the vector \code{lengths} containing the
##' continuous sequence of segment lengths in timewise order, the
##' earliest possible date \code{tmin} and the final date \code{tmax}
##' (usually the acquisition date). If in addition the user specifies
##' a maximal growth rate then cut dates will be generated such that the
##' growth rate never exceeds this upper limit, and if this is not
##' possible the function will raise an error.
##'
##' @title Initial Chronologies
##' @param lengths a vector of segment lengths (in timewise order)
##' @param tmin the minimum possible date
##' @param tmax the final date
##' @param chains the number of chains to generate initializations for
##' @param max.growth the maximum possible growth rate
##' @return a list of vectors of segment cut dates
##' @export
generateInitial <- function(lengths,tmin,tmax,chains=1L,max.growth=NULL) {
n <- length(lengths)
## The minimum time interval for each segment
dt.min <- if(!is.null(max.growth)) lengths/max.growth else rep(0,n)
## Residual, unaccounted for time
r <- (tmax-tmin)-sum(dt.min)
if(r < 0) stop("Max growth rate too low for time frame")
## Latest possible time for each cut
ts.min <- tmax-rev(cumsum(c(0,rev(dt.min))))
replicate(chains,ts.min - rev(c(0,sort(runif(n,0,r)))),simplify=FALSE)
}
##' Estimate segment times and growth rates by MCMC
##'
##' The two functions \code{metropolisDate0} and \code{metropolisDate}
##' estimate the segment cut times by Metropolis sampling. The
##' \code{metropolisDate0} variant assumes the standard deviation of
##' the PMC errors (\code{sigma}) is fixed, while
##' \code{metropolisDate} Gibbs samples for \code{sigma}.
##'
##' The user must supply the lengths \code{len} and mean carbon
##' concentrations \code{pmc} of the segments. These must be supplied
##' in timewise order - oldest segment first and the segments must be
##' contiguous, but the concentration data may contain missing values.
##'
##' The user must also supply atmospheric radiocarbon calibration data
##' as two vectors, \code{Year} the time of measurement in
##' (fractional) years and \code{PMC} the recorded percentage of
##' modern carbon.
##'
##' The user may also supply a function that computes the
##' contributions of each segment to the log prior. This function
##' takes two arguments, \code{ts} a vector of the (n+1) segment
##' endpoint times and \code{len} the lengths of the n segments. This
##' function must compute the log of the (non-normalized) contributions
##' to the prior from each segment.
##'
##' @title Estimate segment dates
##' @param ts.init a vector or a list of vectors of initial date
##' estimates, as generated by \code{generateInitial}
##' @param tmin the minimum possible date
##' @param len lengths of the segments (in timewise order, oldest
##' first)
##' @param pmc mean percentage modern carbon concentration of the
##' segments (in timewise order, oldest first)
##' @param Year calibration data - (fractional) year of atmospheric
##' carbon measurement
##' @param PMC calibration data - percent modern carbon
##' @param sigma standard deviation of PMC errors
##' @param iters number of samples to draw.
##' @param thin rate at which to thin samples.
##' @param chains number of chains to sample.
##' @param log.prior function to calculate the contribution from each
##' segment to the log prior for growth rates, given segment times and
##' lengths.
##' @param alpha shape parameter for the Gamma prior for tau
##' @param beta rate parameter for the Gamma prior for tau
##' @param verbose report progress at prompt?
##' @return \code{metropolisDate0} returns a list containing a coda
##' object describing the segment times. \code{metropolisDate}
##' returns a list containing a coda object describing the segment
##' times and a coda object describing
##' @importFrom coda mcmc mcmc.list
##' @export
metropolisDate <- function(ts.init,tmin,
len,pmc,Year,PMC,
iters=2000L,thin=25L,
chains=if(is.list(ts.init)) length(ts.init) else 1L,
log.prior=NULL,alpha=0.01,beta=0.01,
verbose=interactive()) {
## Ensure ordering of calibration data
PMC <- PMC[order(Year)]
Year <- Year[order(Year)]
## Integrate calibration carbon concentrations by trapezoidal rule,
## and construct interpolator
IPMC <- approxfun(Year,cumsum(c(0,diff(Year)*(PMC[-1L]+PMC[-length(PMC)])/2)))
## Given the segment end times, calculate the expected average PMC
## in each segment
segmentPMC <- function(ts) diff(IPMC(ts))/diff(ts)
## Install uniform prior when none is given
if(is.null(log.prior)) {
log.prior <- function(ts,len) 0
}
## Set up initial times
ts.init <- rep(if(is.list(ts.init)) ts.init else list(ts.init),length.out=chains)
n <- length(ts.init[[1L]])
pmc.na <- is.na(pmc)
m <- sum(!pmc.na)
## List of chains
ch.times <- vector(mode="list",chains)
ch.sigma <- vector(mode="list",chains)
## PARALLEL - parallelize over this loop
for(k1 in seq_len(chains)) {
## Allocate storage for this chain
ch.ts <- matrix(0,iters,n)
ch.sg <- double(iters)
## Initialize times for this chain
ts <- as.vector(ts.init[[k1]])
## Output initial iteration count
if(verbose) {
cat("iter ",sprintf("%6d",1))
flush.console()
}
for(k2 in seq_len(iters)) {
## Update iteration count
if(verbose && k2%%100==0) {
cat("\b\b\b\b\b\b");
cat(sprintf("%6d",k2));
flush.console()
}
for(k3 in seq_len(thin)) {
## Gibbs sample for sigma
r <- ifelse(pmc.na,0,pmc-segmentPMC(ts))
tau <- rgamma(1,alpha+m/2,beta+sum(r^2)/2)
sigma <- 1/sqrt(tau)
## Red-black update - update the times in two interleaved sets
for(rb in seq_len(2L)) {
## Contribution to log posterior from each segment
logps <- log.prior(ts,len)
logps <- logps + ifelse(is.na(pmc),0,dnorm(pmc,segmentPMC(ts),sigma,log=T))
## Indices to update
is <- seq.int(rb,n-1L,by=2L)
## New proposal
lwr <- c(tmin,ts)[is]
upr <- ts[is+1L]
ts.new <- ts
ts.new[is] <- runif(length(is),lwr,upr)
## Contribution to log posterior from each segment of proposal
logps.new <- log.prior(ts.new,len)
logps.new <- logps.new + ifelse(is.na(pmc),0,dnorm(pmc,segmentPMC(ts.new),sigma,log=T))
## Metropolis-Hastings rule - which proposals are kept?
logp.is <- c(0,logps)[is]+logps[is]
logp.is.new <- c(0,logps.new)[is]+logps.new[is]
keep <- logp.is.new-logp.is > log(runif(length(is)))
is <- is[keep]
ts[is] <- ts.new[is]
}
}
## Store this sample
ch.ts[k2,] <- ts
ch.sg[k2] <- sigma
}
## Store this chain's samples
colnames(ch.ts) <- paste0("time",seq_len(n))
ch.times[[k1]] <- ch.ts
ch.sg <- as.matrix(ch.sg)
colnames(ch.sg) <- "sigma"
ch.sigma[[k1]] <- ch.sg
if(verbose) cat("\n")
}
## Convert to a list of coda objects
if(chains==1L) {
list(times=mcmc(ch.times[[1L]],start=thin,thin=thin),
sigma=mcmc(ch.sigma[[1L]],start=thin,thin=thin))
} else {
list(times=do.call(mcmc.list,lapply(ch.times,mcmc,start=thin,thin=thin)),
sigma=do.call(mcmc.list,lapply(ch.sigma,mcmc,start=thin,thin=thin)))
}
}
##' @rdname metropolisDate
##' @export
metropolisDate0 <- function(ts.init,tmin,
len,pmc,Year,PMC,
sigma=4,iters=2000L,thin=25L,
chains=if(is.list(ts.init)) length(ts.init) else 1L,
log.prior=NULL,
verbose=interactive()) {
## Ensure ordering of calibration data
PMC <- PMC[order(Year)]
Year <- Year[order(Year)]
## Integrate calibration carbon concentrations by trapezoidal rule,
## and construct interpolator
IPMC <- approxfun(Year,cumsum(c(0,diff(Year)*(PMC[-1L]+PMC[-length(PMC)])/2)))
## Given the segment end times, calculate the expected average PMC
## in each segment
segmentPMC <- function(ts) diff(IPMC(ts))/diff(ts)
## Install uniform prior when none is given
if(is.null(log.prior)) {
log.prior <- function(ts,len) 0
}
## Set up initial times
ts.init <- rep(if(is.list(ts.init)) ts.init else list(ts.init),length.out=chains)
n <- length(ts.init[[1L]])
## List of chains
ch.times <- vector(mode="list",chains)
## PARALLEL - parallelize over this loop
for(k1 in seq_len(chains)) {
## Allocate storage for this chain
ch.ts <- matrix(0,iters,n)
## Initialize times for this chain
ts <- as.vector(ts.init[[k1]])
## Output initial iteration count
if(verbose) {
cat("iter ",sprintf("%6d",1))
flush.console()
}
for(k2 in seq_len(iters)) {
## Update iteration count
if(verbose && k2%%100==0) {
cat("\b\b\b\b\b\b");
cat(sprintf("%6d",k2));
flush.console()
}
for(k3 in seq_len(thin)) {
## Red-black update - update the times in two interleaved sets
for(rb in seq_len(2L)) {
## Contribution to log posterior from each segment
logps <- log.prior(ts,len)
logps <- logps + ifelse(is.na(pmc),0,dnorm(pmc,segmentPMC(ts),sigma,log=T))
## Indices to update
is <- seq.int(rb,n-1L,by=2L)
## New proposal
lwr <- c(tmin,ts)[is]
upr <- ts[is+1L]
ts.new <- ts
ts.new[is] <- runif(length(is),lwr,upr)
## Contribution to log posterior from each segment of proposal
logps.new <- log.prior(ts.new,len)
logps.new <- logps.new + ifelse(is.na(pmc),0,dnorm(pmc,segmentPMC(ts.new),sigma,log=T))
## Metropolis-Hastings rule - which proposals are kept?
logp.is <- c(0,logps)[is]+logps[is]
logp.is.new <- c(0,logps.new)[is]+logps.new[is]
keep <- logp.is.new-logp.is > log(runif(length(is)))
is <- is[keep]
ts[is] <- ts.new[is]
}
}
## Store this sample
ch.ts[k2,] <- ts
}
## Store this chain's samples
colnames(ch.ts) <- paste0("time",seq_len(n))
ch.times[[k1]] <- ch.ts
if(verbose) cat("\n")
}
## Convert to a list of coda objects
if(chains==1L) {
list(times=mcmc(ch.times[[1L]],start=thin,thin=thin))
} else {
list(times=do.call(mcmc.list,lapply(ch.times,mcmc,start=thin,thin=thin)))
}
}
##' Estimate growth rates from the estimated segment times
##'
##' This function generates samples of segment growth rates from the
##' sample of segment cut dates generated by \code{metropolisDate}, and
##' the vector \code{lengths} of segment lengths in timewise order.
##'
##' @title Estimate Growth Rates
##' @param times a fitted object returned by \code{metropolisDate}
##' @param lengths a vector of segment lengths
##' @return a coda object describing the growth rates
##' @importFrom coda mcmc mcmc.list thin
##' @export
growthRate <- function(times,lengths) {
coda <- function(tm) {
tm <- as.array(tm)
dt <- tm[,-1L]-tm[,-ncol(tm)]
growth <- t(lengths/t(dt))
colnames(growth) <- paste0("growth",seq_len(ncol(growth)))
mcmc(growth,start=start(times),thin=thin(times))
}
if(inherits(times,"mcmc.list"))
do.call(mcmc.list,lapply(times,coda))
else
coda(times)
}
##' Posterior prediction of segment mean carbon content
##'
##' Calculates the posterior segment mean carbon content given
##' atmospheric calibration data.
##'
##' @title Fitted Carbon Content
##' @param times a fitted object returned by \code{metropolisDate}
##' @param Year calibration data - (fractional) year of atmospheric
##' carbon measurement
##' @param PMC calibration data - recorded percentage of modern carbon
##' @return a coda object describing the growth rates
##' @importFrom coda mcmc mcmc.list thin
##' @export
fittedCarbon <- function(times,Year,PMC) {
## Integrate calibration carbon concentrations by trapezoidal rule,
## and construct interpolator
PMC <- PMC[order(Year)]
Year <- Year[order(Year)]
IPMC <- approxfun(Year,cumsum(c(0,diff(Year)*(PMC[-1L]+PMC[-length(PMC)])/2)))
## Given the segment end times, calculate the expected average PMC
## in each segment
segmentPMC <- function(ts) diff(IPMC(ts))/diff(ts)
coda <- function(tm) {
tm <- as.array(tm)
pmc <- matrix(0,nrow(tm),ncol(tm)-1L)
for(k in seq_len(nrow(tm))) pmc[k,] <- segmentPMC(tm[k,])
colnames(pmc) <- paste0("PMC",seq_len(ncol(pmc)))
mcmc(pmc,start=start(times),thin=thin(times))
}
if(inherits(times,"mcmc.list"))
do.call(mcmc.list,lapply(times,coda))
else
coda(times)
}
##' Priors for growth rates.
##'
##' These functions construct functions to evaluate the contribution
##' to the log prior from each segment, for use with
##' \code{metropolisDate}.
##'
##' The \code{growth.prior.uniform} creates a uniform prior that
##' assumes growth rates are uniform up to a user specified maximum
##' rate.
##'
##' The \code{growth.prior.gamma} creates a Gamma prior for grwoth
##' rates. The user can specify the either the parameters of the
##' distribution as either the prior mean and standard deviation or
##' the prior shape and rate of the Gamma distribution.
##'
##' @title Growth Priors
##' @param max.growth the maximum possible growth rate
##' @param mean the mean of the Gamma prior for growth
##' @param sd the standard deviation of the Gamma prior for growth
##' @param shape the shape parameter of the Gamma prior for growth
##' @param rate the rate parameter of the Gamma prior for growth
##' @return function to evaluate the contribution to the log prior
##' from each segment.
##' @export
growth.prior.uniform <- function(max.growth) {
## Return function to compute contributions to log prior
function(ts,ls) {
## Compute growth rates
gs <- ls/diff(ts)
## Growth rates above maximum are effectively impossible
ifelse(gs < max.growth,0,-1.0E12)
}
}
##' @rdname growth.prior.uniform
##' @export
growth.prior.gamma <- function(mean,sd,shape=(mean/sd)^2,rate=mean/sd^2) {
## Return function to compute contributions to log prior
function(ts,ls) {
## Compute growth rates
gs <- ls/diff(ts)
## Growth rates above maximum are effectively impossible
dgamma(gs,shape=shape,rate=rate,log=TRUE)
}
}
##' Quantiles from a coda object
##'
##' Converts an \code{mcmc} or \code{mcmc.list} object to a matrix and
##' applies quantile.
##'
##' @title Quantiles
##' @param x a coda object
##' @param ... further arguments passed to \code{quantile}
##' @return An array of quantiles
##' @importFrom stats quantile
##' @export
quantile.mcmc <- function(x,...) {
t(apply(x,2,quantile,...))
}
##' @rdname quantile.mcmc
##' @importFrom stats quantile
##' @export
quantile.mcmc.list <- function(x,...) {
t(apply(do.call(rbind,x),2,quantile,...))
}
##' Read sample data from an Excel file
##'
##' This function assumes the data are stored as one contigous block
##' of cells in an Excel spreadsheet, starting at the cell address
##' \code{address}. The function reads the largest contiguous block
##' that starts at this address and includes every column that has a
##' heading and every row for which the first three columns are not
##' all blank. The function then checks that the first few columns
##' have the names specified by \code{check.names}.
##'
##' The function also searches for the date the data were collected in
##' a cell below the cell containing the text "Date collected". This
##' date is extracted and added to the data.frame as an attribute.
##'
##' If \code{sheetname} is NULL, the workbook must consist of a single
##' sheet.
##'
##' @title Read Excel file
##' @param filename the Excel file to read
##' @param sheetname the name of the worksheet to read. If NULL, the
##' workbook must contain only one sheet.
##' @param address the cell address of the first header cell
##' @param check.names the names of the first few columns of data.
##' @return a dataframe of data
##' @importFrom XLConnect loadWorkbook getSheets readWorksheetFromFile
##' @export
readXLSample <- function(filename,sheetname=NULL,address="B4",
check.names=c("Lab ID", "Moss species", "Sample ID",
"Depth (mm)","Segment length (mm)","pMC")) {
trim <- function(s) gsub("^\\s+|\\s+$","",s)
## If no sheet specified, workbook must have one sheet
if(is.null(sheetname) || nchar(sheetname)==0) {
workbook <- loadWorkbook(filename)
sheetname <- getSheets(workbook)
}
if(length(sheetname)!=1) stop("Sheet is not uniquely identified")
## Decode excel cell address to (i,j)
address <- trim(toupper(address[1]))
if(length(grep("^([A-Z]+)([0-9]+)$",address))!=1) stop("Invalid cell address")
i <- as.integer(gsub("^([A-Z]+)([0-9]+)$","\\2",address))
letters <- gsub("^([A-Z]+)([0-9]+)$","\\1",address)
j <- sum((utf8ToInt(letters)-64)*(26^rev(seq_len(nchar(letters))-1)))
## Read whole sheet
fullsheet <- sheet <- readWorksheetFromFile(filename,sheet=sheetname,header=FALSE)
## Extract header and check column names
header <- trim(sheet[i,j:ncol(sheet)])
if(!all(check.names == header[seq_along(check.names)]))
stop("Header names do not match check.names argument")
## Extract data
sheet <- sheet[(i+1):nrow(sheet),j:ncol(sheet)]
## Trim to first blank row
nrows <- min(nrow(sheet),which(apply(sheet[,1:3],1,function(r) all(is.na(r))))-1)
sheet <- sheet[seq_len(nrows),!is.na(header)]
## Set column names and convert columns to appropriate data type
colnames(sheet) <- make.names(gsub(" |_|\\)|\\(","",header[!is.na(header)]))
## only convert column if it is of character type. Sometimes (specifically if the header cell is empty)
## the column may already be numeric and attempting to apply type.convert will result in an error
for(k in seq_len(ncol(sheet))) { if (is.character(sheet[,k])) sheet[,k] <- type.convert(sheet[,k]) }
## Extract aquisition date and add as attribute
fullsheet <- tolower(trim(as.matrix(fullsheet)))
k <- which(fullsheet=="date collected",arr.ind=TRUE)
if(nrow(k)!=1) stop("Unique collection date not found in data sheet")
acquisition <- strptime(fullsheet[k+c(1,0)],format=getOption("XLConnect.dateTimeFormat"))
acquisition <- 1900+acquisition$year+acquisition$yday/365.25
attr(sheet,"acquisition_date") <- acquisition
## Reverse the sheet.
sheet[rev(seq_len(nrow(sheet))),]
}
##' Smooth calibration curve
##'
##' The function \code{smoothCalibration} smooths
##' an atmospheric carbon curve. This function is experimental.
##'
##' A smoothed calibration curve can be used to provide an approximation for the
##' effect of having multiple strands in a sample of moss, each with
##' slightly different growth rates or physical alignment within the sample.
##' Currently the only method provided is "rgr", which
##' effectively assumes that the sample is comprised of multiple strands, each
##' with a constant (random) growth rate offset.
##'
##' @title Calibration Curve Smoothing
##' @param Year calibration data - vector of (fractional) year of atmospheric carbon measurement
##' @param PMC calibration data - vector of percent modern carbon values
##' @param smoothing.level (scalar) the level of smoothing to apply. The interpretation of this value depends on the method used
##' @param method string specifying the smoothing method to use. Currently only "rgr"
##' @return a data frame of smoothed carbon values with column names "Year" and "PMC"
##' @export
smoothCalibration <- function(Year,PMC,smoothing.level,method="rgr") {
method=match.arg(tolower(method),c("rgr"))
switch(method,
rgr={ groff=rnorm(1000,sd=smoothing.level) ## draw 1000 random growth rate offsets
groff=groff[abs(groff)<1] ## these are treated as a fraction, so can only keep those with abs value less than 1
## this is a bit rubbish, can surely be done better
af=approxfun(Year,PMC)
npts=length(Year)
oc=sapply(groff,function(z){ af(seq(npts-1,0,by=-1)*z+Year) })
data.frame(Year=Year,PMC=apply(oc,1,mean))
}
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.