R/DateMoss.R

#' Chronologies and growth rates of moss
#'
#' Provides facilities for estimating chronologies and growth rates
#' of moss (or similar) samples based on radiocarbon dating.
#'
#' @name DateMoss-package
#' @docType package
#' @author S. Wotherspoon, B. Raymond, S. Hall
NULL




##' Atmospheric radiocarbon calibration data
##'
##' Atmospheric radiocarbon data from 1450-2013.  These data are
##' largely based on Hua et al (2013), but have been extrapolated at
##' both ends.
##'
##' @name Atmospheric14C
##' @docType data
##' @title Atmospheric radiocarbon calibration data
##' @format A data frame with 6 columns and 163 rows.  The columns
##' represent
##' \tabular{rl}{
##' \code{Year} \tab Year of measurement. \cr
##' \code{F14C} \tab Fraction of modern carbon 14. \cr
##' \code{F14C.sigma} Standard deviation on F14C. \cr
##' \code{D14C} \tab Delta 14V. \cr
##' \code{F14C.sigma} \tab Standard deviation on D14C. \cr
##' \code{PMC} \tab Percent modern carbon.
##' }
##' @source Hua, Q., Barbetti, M. & Rakowski, A. Z. (2013).
##' Atmospheric radiocarbon for the period 1950-2010.
##' Radiocarbon, 55(4), 2059-2072.
##' @keywords data
NULL



##' Initial Chronology for initializing MCMC sampler
##'
##' The function \code{generateInitial} generates random chronologies
##' that can be used to initialize the MCMC sampler
##' \code{\link{metropolisDate}}.
##'
##' The user must provide the vector \code{lengths} containing the
##' continuous sequence of segment lengths in timewise order, the
##' earliest possible date \code{tmin} and the final date \code{tmax}
##' (usually the acquisition date).  If in addition the user specifies
##' a maximal growth rate then cut dates will be generated such that the
##' growth rate never exceeds this upper limit, and if this is not
##' possible the function will raise an error.
##'
##' @title Initial Chronologies
##' @param lengths a vector of segment lengths (in timewise order)
##' @param tmin the minimum possible date
##' @param tmax the final date
##' @param chains the number of chains to generate initializations for
##' @param max.growth the maximum possible growth rate
##' @return a list of vectors of segment cut dates
##' @export
generateInitial <- function(lengths,tmin,tmax,chains=1L,max.growth=NULL) {
  n <- length(lengths)

  ## The minimum time interval for each segment
  dt.min <- if(!is.null(max.growth)) lengths/max.growth else rep(0,n)
  ## Residual, unaccounted for time
  r <- (tmax-tmin)-sum(dt.min)
  if(r < 0) stop("Max growth rate too low for time frame")

  ## Latest possible time for each cut
  ts.min <- tmax-rev(cumsum(c(0,rev(dt.min))))

  replicate(chains,ts.min - rev(c(0,sort(runif(n,0,r)))),simplify=FALSE)
}


##' Estimate segment times and growth rates by MCMC
##'
##' The two functions \code{metropolisDate0} and \code{metropolisDate}
##' estimate the segment cut times by Metropolis sampling.  The
##' \code{metropolisDate0} variant assumes the standard deviation of
##' the PMC errors (\code{sigma}) is fixed, while
##' \code{metropolisDate} Gibbs samples for \code{sigma}.
##'
##' The user must supply the lengths \code{len} and mean carbon
##' concentrations \code{pmc} of the segments.  These must be supplied
##' in timewise order - oldest segment first and the segments must be
##' contiguous, but the concentration data may contain missing values.
##'
##' The user must also supply atmospheric radiocarbon calibration data
##' as two vectors, \code{Year} the time of measurement in
##' (fractional) years and \code{PMC} the recorded percentage of
##' modern carbon.
##'
##' The user may also supply a function that computes the
##' contributions of each segment to the log prior.  This function
##' takes two arguments, \code{ts} a vector of the (n+1) segment
##' endpoint times and \code{len} the lengths of the n segments.  This
##' function must compute the log of the (non-normalized) contributions
##' to the prior from each segment.
##'
##' @title Estimate segment dates
##' @param ts.init a vector or a list of vectors of initial date
##' estimates, as generated by \code{generateInitial}
##' @param tmin the minimum possible date
##' @param len lengths of the segments (in timewise order, oldest
##' first)
##' @param pmc mean percentage modern carbon concentration of the
##' segments (in timewise order, oldest first)
##' @param Year calibration data - (fractional) year of atmospheric
##' carbon measurement
##' @param PMC calibration data - percent modern carbon
##' @param sigma standard deviation of PMC errors
##' @param iters number of samples to draw.
##' @param thin rate at which to thin samples.
##' @param chains number of chains to sample.
##' @param log.prior function to calculate the contribution from each
##' segment to the log prior for growth rates, given segment times and
##' lengths.
##' @param alpha shape parameter for the Gamma prior for tau
##' @param beta rate parameter for the Gamma prior for tau
##' @param verbose report progress at prompt?
##' @return \code{metropolisDate0} returns a list containing a coda
##' object describing the segment times.  \code{metropolisDate}
##' returns a list containing a coda object describing the segment
##' times and a coda object describing
##' @importFrom coda mcmc mcmc.list
##' @export
metropolisDate <- function(ts.init,tmin,
                           len,pmc,Year,PMC,
                           iters=2000L,thin=25L,
                           chains=if(is.list(ts.init)) length(ts.init) else 1L,
                           log.prior=NULL,alpha=0.01,beta=0.01,
                           verbose=interactive()) {

  ## Ensure ordering of calibration data
  PMC <- PMC[order(Year)]
  Year <- Year[order(Year)]

  ## Integrate calibration carbon concentrations by trapezoidal rule,
  ## and construct interpolator
  IPMC <- approxfun(Year,cumsum(c(0,diff(Year)*(PMC[-1L]+PMC[-length(PMC)])/2)))

  ## Given the segment end times, calculate the expected average PMC
  ## in each segment
  segmentPMC <- function(ts) diff(IPMC(ts))/diff(ts)


  ## Install uniform prior when none is given
  if(is.null(log.prior)) {
    log.prior <- function(ts,len) 0
  }

  ## Set up initial times
  ts.init <- rep(if(is.list(ts.init)) ts.init else list(ts.init),length.out=chains)
  n <- length(ts.init[[1L]])
  pmc.na <- is.na(pmc)
  m <- sum(!pmc.na)

  ## List of chains
  ch.times <- vector(mode="list",chains)
  ch.sigma <- vector(mode="list",chains)

  ## PARALLEL - parallelize over this loop
  for(k1 in seq_len(chains)) {
    ## Allocate storage for this chain
    ch.ts <- matrix(0,iters,n)
    ch.sg <- double(iters)

    ## Initialize times for this chain
    ts <- as.vector(ts.init[[k1]])


    ## Output initial iteration count
    if(verbose) {
      cat("iter ",sprintf("%6d",1))
      flush.console()
    }

    for(k2 in seq_len(iters)) {

      ## Update iteration count
      if(verbose && k2%%100==0) {
        cat("\b\b\b\b\b\b");
        cat(sprintf("%6d",k2));
        flush.console()
      }

      for(k3 in seq_len(thin)) {

        ## Gibbs sample for sigma
        r <- ifelse(pmc.na,0,pmc-segmentPMC(ts))
        tau <- rgamma(1,alpha+m/2,beta+sum(r^2)/2)
        sigma <- 1/sqrt(tau)

        ## Red-black update - update the times in two interleaved sets
        for(rb in seq_len(2L)) {

          ## Contribution to log posterior from each segment
          logps <- log.prior(ts,len)
          logps <- logps + ifelse(is.na(pmc),0,dnorm(pmc,segmentPMC(ts),sigma,log=T))

          ## Indices to update
          is <- seq.int(rb,n-1L,by=2L)

          ## New proposal
          lwr <- c(tmin,ts)[is]
          upr <- ts[is+1L]
          ts.new <- ts
          ts.new[is] <- runif(length(is),lwr,upr)

          ## Contribution to log posterior from each segment of proposal
          logps.new <- log.prior(ts.new,len)
          logps.new <- logps.new + ifelse(is.na(pmc),0,dnorm(pmc,segmentPMC(ts.new),sigma,log=T))

          ## Metropolis-Hastings rule - which proposals are kept?
          logp.is <- c(0,logps)[is]+logps[is]
          logp.is.new <- c(0,logps.new)[is]+logps.new[is]
          keep <- logp.is.new-logp.is > log(runif(length(is)))
          is <- is[keep]
          ts[is] <- ts.new[is]
        }
      }

      ## Store this sample
      ch.ts[k2,] <- ts
      ch.sg[k2] <- sigma
    }

    ## Store this chain's samples
    colnames(ch.ts) <- paste0("time",seq_len(n))
    ch.times[[k1]] <- ch.ts
    ch.sg <- as.matrix(ch.sg)
    colnames(ch.sg) <- "sigma"
    ch.sigma[[k1]] <- ch.sg
    if(verbose) cat("\n")
  }

  ## Convert to a list of coda objects
  if(chains==1L) {
    list(times=mcmc(ch.times[[1L]],start=thin,thin=thin),
         sigma=mcmc(ch.sigma[[1L]],start=thin,thin=thin))
  } else {
    list(times=do.call(mcmc.list,lapply(ch.times,mcmc,start=thin,thin=thin)),
         sigma=do.call(mcmc.list,lapply(ch.sigma,mcmc,start=thin,thin=thin)))
  }
}




##' @rdname metropolisDate
##' @export
metropolisDate0 <- function(ts.init,tmin,
                           len,pmc,Year,PMC,
                           sigma=4,iters=2000L,thin=25L,
                           chains=if(is.list(ts.init)) length(ts.init) else 1L,
                           log.prior=NULL,
                           verbose=interactive()) {

  ## Ensure ordering of calibration data
  PMC <- PMC[order(Year)]
  Year <- Year[order(Year)]

  ## Integrate calibration carbon concentrations by trapezoidal rule,
  ## and construct interpolator
  IPMC <- approxfun(Year,cumsum(c(0,diff(Year)*(PMC[-1L]+PMC[-length(PMC)])/2)))

  ## Given the segment end times, calculate the expected average PMC
  ## in each segment
  segmentPMC <- function(ts) diff(IPMC(ts))/diff(ts)


  ## Install uniform prior when none is given
  if(is.null(log.prior)) {
    log.prior <- function(ts,len) 0
  }

  ## Set up initial times
  ts.init <- rep(if(is.list(ts.init)) ts.init else list(ts.init),length.out=chains)
  n <- length(ts.init[[1L]])

  ## List of chains
  ch.times <- vector(mode="list",chains)

  ## PARALLEL - parallelize over this loop
  for(k1 in seq_len(chains)) {
    ## Allocate storage for this chain
    ch.ts <- matrix(0,iters,n)

    ## Initialize times for this chain
    ts <- as.vector(ts.init[[k1]])


    ## Output initial iteration count
    if(verbose) {
      cat("iter ",sprintf("%6d",1))
      flush.console()
    }

    for(k2 in seq_len(iters)) {

      ## Update iteration count
      if(verbose && k2%%100==0) {
        cat("\b\b\b\b\b\b");
        cat(sprintf("%6d",k2));
        flush.console()
      }

      for(k3 in seq_len(thin)) {

        ## Red-black update - update the times in two interleaved sets
        for(rb in seq_len(2L)) {

          ## Contribution to log posterior from each segment
          logps <- log.prior(ts,len)
          logps <- logps + ifelse(is.na(pmc),0,dnorm(pmc,segmentPMC(ts),sigma,log=T))

          ## Indices to update
          is <- seq.int(rb,n-1L,by=2L)

          ## New proposal
          lwr <- c(tmin,ts)[is]
          upr <- ts[is+1L]
          ts.new <- ts
          ts.new[is] <- runif(length(is),lwr,upr)

          ## Contribution to log posterior from each segment of proposal
          logps.new <- log.prior(ts.new,len)
          logps.new <- logps.new + ifelse(is.na(pmc),0,dnorm(pmc,segmentPMC(ts.new),sigma,log=T))

          ## Metropolis-Hastings rule - which proposals are kept?
          logp.is <- c(0,logps)[is]+logps[is]
          logp.is.new <- c(0,logps.new)[is]+logps.new[is]
          keep <- logp.is.new-logp.is > log(runif(length(is)))
          is <- is[keep]
          ts[is] <- ts.new[is]
        }
      }

      ## Store this sample
      ch.ts[k2,] <- ts
    }

    ## Store this chain's samples
    colnames(ch.ts) <- paste0("time",seq_len(n))
    ch.times[[k1]] <- ch.ts
    if(verbose) cat("\n")
  }

  ## Convert to a list of coda objects
  if(chains==1L) {
    list(times=mcmc(ch.times[[1L]],start=thin,thin=thin))
  } else {
    list(times=do.call(mcmc.list,lapply(ch.times,mcmc,start=thin,thin=thin)))
  }
}



##' Estimate growth rates from the estimated segment times
##'
##' This function generates samples of segment growth rates from the
##' sample of segment cut dates generated by \code{metropolisDate}, and
##' the vector \code{lengths} of segment lengths in timewise order.
##'
##' @title Estimate Growth Rates
##' @param times a fitted object returned by \code{metropolisDate}
##' @param lengths a vector of segment lengths
##' @return a coda object describing the growth rates
##' @importFrom coda mcmc mcmc.list thin
##' @export
growthRate <- function(times,lengths) {

  coda <- function(tm) {
    tm <- as.array(tm)
    dt <- tm[,-1L]-tm[,-ncol(tm)]
    growth <- t(lengths/t(dt))
    colnames(growth) <- paste0("growth",seq_len(ncol(growth)))
    mcmc(growth,start=start(times),thin=thin(times))
  }

  if(inherits(times,"mcmc.list"))
    do.call(mcmc.list,lapply(times,coda))
  else
    coda(times)
}

##' Posterior prediction of segment mean carbon content
##'
##' Calculates the posterior segment mean carbon content given
##' atmospheric calibration data.
##'
##' @title Fitted Carbon Content
##' @param times a fitted object returned by \code{metropolisDate}
##' @param Year calibration data - (fractional) year of atmospheric
##' carbon measurement
##' @param PMC calibration data - recorded percentage of modern carbon
##' @return a coda object describing the growth rates
##' @importFrom coda mcmc mcmc.list thin
##' @export
fittedCarbon <- function(times,Year,PMC) {

  ## Integrate calibration carbon concentrations by trapezoidal rule,
  ## and construct interpolator
  PMC <- PMC[order(Year)]
  Year <- Year[order(Year)]
  IPMC <- approxfun(Year,cumsum(c(0,diff(Year)*(PMC[-1L]+PMC[-length(PMC)])/2)))

  ## Given the segment end times, calculate the expected average PMC
  ## in each segment
  segmentPMC <- function(ts) diff(IPMC(ts))/diff(ts)


  coda <- function(tm) {
    tm <- as.array(tm)
    pmc <- matrix(0,nrow(tm),ncol(tm)-1L)
    for(k in seq_len(nrow(tm))) pmc[k,] <- segmentPMC(tm[k,])
    colnames(pmc) <- paste0("PMC",seq_len(ncol(pmc)))
    mcmc(pmc,start=start(times),thin=thin(times))
  }

  if(inherits(times,"mcmc.list"))
    do.call(mcmc.list,lapply(times,coda))
  else
    coda(times)
}




##' Priors for growth rates.
##'
##' These functions construct functions to evaluate the contribution
##' to the log prior from each segment, for use with
##' \code{metropolisDate}.
##'
##' The \code{growth.prior.uniform} creates a uniform prior that
##' assumes growth rates are uniform up to a user specified maximum
##' rate.
##'
##' The \code{growth.prior.gamma} creates a Gamma prior for grwoth
##' rates.  The user can specify the either the parameters of the
##' distribution as either the prior mean and standard deviation or
##' the prior shape and rate of the Gamma distribution.
##'
##' @title Growth Priors
##' @param max.growth the maximum possible growth rate
##' @param mean the mean of the Gamma prior for growth
##' @param sd the standard deviation of the Gamma prior for growth
##' @param shape the shape parameter of the Gamma prior for growth
##' @param rate the rate parameter of the Gamma prior for growth
##' @return function to evaluate the contribution to the log prior
##' from each segment.
##' @export
growth.prior.uniform <- function(max.growth) {
  ## Return function to compute contributions to log prior
  function(ts,ls) {
    ## Compute growth rates
    gs <- ls/diff(ts)
    ## Growth rates above maximum are effectively impossible
    ifelse(gs < max.growth,0,-1.0E12)
  }
}

##' @rdname growth.prior.uniform
##' @export
growth.prior.gamma <- function(mean,sd,shape=(mean/sd)^2,rate=mean/sd^2) {
  ## Return function to compute contributions to log prior
  function(ts,ls) {
    ## Compute growth rates
    gs <- ls/diff(ts)
    ## Growth rates above maximum are effectively impossible
    dgamma(gs,shape=shape,rate=rate,log=TRUE)
  }
}



##' Quantiles from a coda object
##'
##' Converts an \code{mcmc} or \code{mcmc.list} object to a matrix and
##' applies quantile.
##'
##' @title Quantiles
##' @param x a coda object
##' @param ... further arguments passed to \code{quantile}
##' @return An array of quantiles
##' @importFrom stats quantile
##' @export
quantile.mcmc <- function(x,...) {
  t(apply(x,2,quantile,...))
}

##' @rdname quantile.mcmc
##' @importFrom stats quantile
##' @export
quantile.mcmc.list <- function(x,...) {
  t(apply(do.call(rbind,x),2,quantile,...))
}


##' Read sample data from an Excel file
##'
##' This function assumes the data are stored as one contigous block
##' of cells in an Excel spreadsheet, starting at the cell address
##' \code{address}.  The function reads the largest contiguous block
##' that starts at this address and includes every column that has a
##' heading and every row for which the first three columns are not
##' all blank.  The function then checks that the first few columns
##' have the names specified by \code{check.names}.
##'
##' The function also searches for the date the data were collected in
##' a cell below the cell containing the text "Date collected". This
##' date is extracted and added to the data.frame as an attribute.
##'
##' If \code{sheetname} is NULL, the workbook must consist of a single
##' sheet.
##'
##' @title Read Excel file
##' @param filename the Excel file to read
##' @param sheetname the name of the worksheet to read.  If NULL, the
##' workbook must contain only one sheet.
##' @param address the cell address of the first header cell
##' @param check.names the names of the first few columns of data.
##' @return a dataframe of data
##' @importFrom XLConnect loadWorkbook getSheets readWorksheetFromFile
##' @export
readXLSample <- function(filename,sheetname=NULL,address="B4",
                         check.names=c("Lab ID", "Moss species", "Sample ID",
                           "Depth (mm)","Segment length (mm)","pMC")) {

  trim <- function(s) gsub("^\\s+|\\s+$","",s)

  ## If no sheet specified, workbook must have one sheet
  if(is.null(sheetname) || nchar(sheetname)==0) {
    workbook <- loadWorkbook(filename)
    sheetname <- getSheets(workbook)
  }
  if(length(sheetname)!=1) stop("Sheet is not uniquely identified")

  ## Decode excel cell address to (i,j)
  address <- trim(toupper(address[1]))
  if(length(grep("^([A-Z]+)([0-9]+)$",address))!=1) stop("Invalid cell address")
  i <- as.integer(gsub("^([A-Z]+)([0-9]+)$","\\2",address))
  letters <- gsub("^([A-Z]+)([0-9]+)$","\\1",address)
  j <- sum((utf8ToInt(letters)-64)*(26^rev(seq_len(nchar(letters))-1)))

  ## Read whole sheet
  fullsheet <- sheet <- readWorksheetFromFile(filename,sheet=sheetname,header=FALSE)
  ## Extract header and check column names
  header <- trim(sheet[i,j:ncol(sheet)])
  if(!all(check.names == header[seq_along(check.names)]))
    stop("Header names do not match check.names argument")

  ## Extract data
  sheet <- sheet[(i+1):nrow(sheet),j:ncol(sheet)]
  ## Trim to first blank row
  nrows <- min(nrow(sheet),which(apply(sheet[,1:3],1,function(r) all(is.na(r))))-1)
  sheet <- sheet[seq_len(nrows),!is.na(header)]
  ## Set column names and convert columns to appropriate data type
  colnames(sheet) <- make.names(gsub(" |_|\\)|\\(","",header[!is.na(header)]))
  ## only convert column if it is of character type. Sometimes (specifically if the header cell is empty)
  ##  the column may already be numeric and attempting to apply type.convert will result in an error
  for(k in seq_len(ncol(sheet))) { if (is.character(sheet[,k])) sheet[,k] <- type.convert(sheet[,k]) }

  ## Extract aquisition date and add as attribute
  fullsheet <- tolower(trim(as.matrix(fullsheet)))
  k <- which(fullsheet=="date collected",arr.ind=TRUE)
  if(nrow(k)!=1) stop("Unique collection date not found in data sheet")
  acquisition <- strptime(fullsheet[k+c(1,0)],format=getOption("XLConnect.dateTimeFormat"))
  acquisition <- 1900+acquisition$year+acquisition$yday/365.25
  attr(sheet,"acquisition_date") <- acquisition

  ## Reverse the sheet.
  sheet[rev(seq_len(nrow(sheet))),]
}

##' Smooth calibration curve
##'
##' The function \code{smoothCalibration} smooths
##' an atmospheric carbon curve. This function is experimental.
##'
##' A smoothed calibration curve can be used to provide an approximation for the
##' effect of having multiple strands in a sample of moss, each with
##' slightly different growth rates or physical alignment within the sample.
##' Currently the only method provided is "rgr", which
##' effectively assumes that the sample is comprised of multiple strands, each
##' with a constant (random) growth rate offset.
##'
##' @title Calibration Curve Smoothing
##' @param Year calibration data - vector of (fractional) year of atmospheric carbon measurement
##' @param PMC calibration data - vector of percent modern carbon values
##' @param smoothing.level (scalar) the level of smoothing to apply. The interpretation of this value depends on the method used
##' @param method string specifying the smoothing method to use. Currently only "rgr"
##' @return a data frame of smoothed carbon values with column names "Year" and "PMC"
##' @export
smoothCalibration <- function(Year,PMC,smoothing.level,method="rgr") {
    method=match.arg(tolower(method),c("rgr"))
    switch(method,
           rgr={ groff=rnorm(1000,sd=smoothing.level) ## draw 1000 random growth rate offsets
                 groff=groff[abs(groff)<1] ## these are treated as a fraction, so can only keep those with abs value less than 1
                 ## this is a bit rubbish, can surely be done better
                 af=approxfun(Year,PMC)
                 npts=length(Year)
                 oc=sapply(groff,function(z){ af(seq(npts-1,0,by=-1)*z+Year) })
                 data.frame(Year=Year,PMC=apply(oc,1,mean))
             }
           )
}
AustralianAntarcticDataCentre/datemoss documentation built on May 5, 2019, 8:14 a.m.