R/basics.R

Defines functions as.paleoTSfit akaike.wts IC compareModels

Documented in as.paleoTSfit compareModels IC

# basic functions #

#' @title Make a Paleontological Time-series object
#' @description Combines information into an object of class \code{paleoTS}
#' @param mm vector of sample means
#' @param vv vector of sample variances
#' @param nn vector of sample sizes
#' @param tt vector of sample ages
#' @param MM vector of true means (simulated data)
#' @param genpars generating parameters (simulated data)
#' @param label optional, label for time-series
#' @param start.age optional, age of oldest sample
#' @param oldest value indicating if the oldest sample is first or last in the
#'   sequence
#' @param reset.time logical; if TRUE, then change time scale to start at t=0
#'   and adjust \code{start.age} accordingly
#'
#' @return a \code{paleoTS} object
#' @export
#' @details This function combines data into a \code{paleoTS} object. For
#'   empirical data it may be more convenient to use \code{read.paleoTS}. \cr\cr
#'   If sample ages decrease through the sequence, as if given in millions of
#'   years ago, \code{tt} will automatically be converted to time elapsed from
#'   the beginning of the sequence as long as \code{reset.time} = TRUE.
#' @note   All model-fitting functions estimate the contribution of sampling
#'   noise to the observed differences between samples.  They do this assuming
#'   that the trait is represented by sample means, which have sampling
#'   variances equal to variance divided by sample size, \code{vv/nn}.  If one
#'   is interested in analyzing statistics other than the sample mean (medians,
#'   quantiles, or other statistics), use the the following procedure: set the
#'   statistic in question as the \code{mm} values, replace \code{vv} with a
#'   vector of the squared standard errors for each estimate (generated by other
#'   means, for example bootstrapping), and set all values of \code{nn} to one.
#' @seealso \code{\link{read.paleoTS}}
#' @examples
#' x <- as.paleoTS(mm = rnorm(20), vv = rep(1, 20), nn = rep(25, 20), tt=1:20)
#' plot(x) # easier to use sim.Stasis()
as.paleoTS<- function (mm, vv, nn, tt, MM = NULL, genpars = NULL, label = NULL,
                       start.age = NULL, oldest = c("first", "last"), reset.time = TRUE)
{
  oldest<- match.arg(oldest)

  # check that mm, vv, nn, tt are of the same length
  n.mm <- length(mm)
  n.vv <- length(vv)
  n.nn <- length(nn)
  n.tt <- length(tt)

  n.check <- c(n.mm, n.vv, n.nn, n.tt)
  if(!all(n.check == n.mm))  stop("mm, vv, nn and tt must be all of the same length")

  y <- list(mm = mm, vv = vv, nn = nn, tt = tt, MM = MM, genpars = genpars,
            label = label, start.age = start.age)
  if (oldest == "last") {
    oo <- length(y$mm):1
    y$mm <- y$mm[oo]
    y$vv <- y$vv[oo]
    y$nn <- y$nn[oo]
    y$tt <- y$tt[oo]
  }
  if(y$tt[1] > y$tt[n.mm])	timeDir<- "decreasing"
  else 					timeDir<- "increasing"

  if (reset.time) {
    if (y$tt[1] != 0) {
      sa <- y$tt[1]
      if (!is.null(y$start.age) && sa != y$start.age)
        stop("Age of first sample does not match start.age argument")
      if(timeDir == "decreasing")	y$tt <- sa - y$tt   # decreasing ages (e.g., Ma)
      else					    y$tt <- y$tt - sa   # increasing ages (elapsed time)
      y$start.age<- sa
    }
  }
  y$timeDir<- timeDir
  class(y) <- "paleoTS"
  return(y)
}



#' Create a \code{paleoTSfit} object
#'
#' @param logL model log-likelihood
#' @param parameters model parameter estimates
#' @param modelName model name
#' @param method parameterization, either "AD" or "Joint"
#' @param K number of model parameters
#' @param n sample size
#' @param se standard errors of parameter estimates
#'
#' @return a \code{paleoTSfit} object
#' @note All model-fitting functions use this function to package the resulting
#'   data-model fits. Users will not need to call this function.
#' @export
as.paleoTSfit<- function(logL, parameters, modelName, method, K, n, se)
{
  ic<- IC(logL=logL, K=K, n=n, method='AICc')
  y<- list(logL=logL, AICc=ic, parameters=parameters, modelName=modelName, method=method, se=se, K=K, n=n)
  class(y)<- "paleoTSfit"
  return(y)
}



#' Read a text-file with data from a paleontological time-series
#' @param file  file name; if not supplied, an interactive window prompts the
#'   user to navigate to the text file
#' @param oldest "first" if samples are in order from oldest to youngest, "last"
#'   if the opposite
#' @param reset.time logical; see \code{\link{as.paleoTS}}
#' @param ... other arguments, passed to \code{read.table}
#'
#' @return a \code{paleoTS} object
#' @export
#' @details This function reads a text file with a specified format and converts
#'   it into a \code{paleoTS} object. It will often be the easiest way for users
#'   to import their own data. The text file should have four columns without
#'   headers, in this order: sample size, sample means, sample variances, sample
#'   ages.
#' @seealso \code{\link{as.paleoTS}}
#'
read.paleoTS<- function (file = NULL, oldest = "first", reset.time = TRUE, ...)
{
  if (is.null(file)) {
    ff <- file.choose()
    x <- utils::read.table(ff, ...)
    lab1 <- basename(ff)
  }
  else {
    x <- utils::read.table(file = file, ...)
    #lab1 <- paste(getwd(), file)
    lab1<- file
  }
  xr <- as.paleoTS(mm = x[, 2], vv = x[, 3], nn = x[, 1], tt = x[,
         4], label = lab1, oldest = oldest, reset.time = reset.time)
  return(xr)
}


# internal function, not documented
akaike.wts<- function(aa)
  ## aa is a vector of AIC or AICc values
{
  # get subset, dropping NAs if need be
  okset<- !is.na(aa)
  aas<- aa[okset]

  ma<- min(aas)
  delt<- aas - ma
  denom<- sum(exp(-delt/2))
  ww<- exp(-delt/2)/denom
  names(ww)<- names(aa)

  aw<- ww[okset]
  return(aw)
}



#' Compute Information Criteria
#'
#' @param logL log-likelihood
#' @param K number of parameters
#' @param n sample size
#' @param method either "AIC", "AICc", or "BIC"
#'
#' @return the value of the specified information criterion
#' @export
#'
#' @note This function is used internally by the model-fitting functions. It
#'   will not generally be called directly by the user.
IC<- function(logL, K, n=NULL, method=c("AICc", "AIC", "BIC"))
{
  method<-match.arg(method)
  if ((method=="AICc" || method=="BIC") && is.null(n))  stop('AICc requires n.')
  if (method=="AIC")	ic<- -2*logL + 2*K
  if (method=="AICc")	ic<- -2*logL + 2*K + (2*K*(K+1))/(n-K-1)
  if (method=="BIC")	ic<- -2*logL + K*log(n)

  return(ic)
}



#' @title Compute a pooled variance
#' @description Computes a pooled variance from samples in a paleontological
#'   time-series
#' @param y either a \code{paleoTS} object, or a vector of sample variances
#' @param nn a vector of sample sizes
#' @param minN sample size below which variances are replaced with pooled
#'   variances. See Details.
#' @param ret.paleoTS if TRUE, a \code{paleoTS} object is returned. If FALSE,
#'   the value of the pooled variance is returned.
#'
#' @return  if \code{ret.paleoTS = TRUE} a \code{paleoTS} object with all (or
#'   some) variances replaced with the pooled variance; otherwise the pooled
#'   variance
#' @export
#' @details  A pooled variance of a set of populations is the weighted average
#'   of the individual variances of the populations, with the weight for each
#'   population equal to its sample size minus one. \cr \cr For many kinds of
#'   traits, variation levels tend to be similar among closely related
#'   populations. When this is true and sample sizes are low, much of the
#'   observed differences in variance among samples will be due to the high
#'   noise of estimated the variances. Replacing the observed variances of all
#'   populations (or only those with \code{nn < minN}) with the estimated pooled
#'   variance can reduce this noise.
#' @examples
#' data(cantius_L)
#' cant_all <- pool.var(cantius_L, ret.paleoTS = TRUE)   # replace all variances with pooled variance
#' cant_n5  <- pool.var(cantius_L, minN = 5, ret.paleoTS = TRUE)  # replace only pops with n < 5
#'
pool.var<- function (y, nn = NULL, minN = NULL, ret.paleoTS = FALSE)
{
  if (class(y) == "paleoTS") {
    if (all(y$nn == 1)) 	vp <- mean(y$vv)
    else vp <- sum(y$vv * (y$nn - 1))/sum(y$nn - 1)
  }
  else vp <- sum(y * (nn - 1)/sum(nn - 1))

  # replace all vv's, or just those with nn<minN
  if (ret.paleoTS) {
    yn <- y
    if(is.null(minN))  yn$vv <- rep(vp, length(y$mm)) else yn$vv[yn$nn<minN]<- vp
    return(yn)
  }
  else {
    return(vp)
  }
}



#' Test for heterogeneity of variances among samples in a time-series
#'
#' @param y a \code{paleoTS} object
#' @param method test to use; currently only \code{"Bartlett"} is implemented
#' @return a list with the test statistic, degrees of freedom, and p-value
#' @export
#' @references
#' Sokal, R. R and F. J. Rohlf.  1995. \emph{Biometry} 3rd Ed.
#' @note Most often, this function will be used to assess if it is reasonable to
#' pool variances across samples using \code{pool.var}. A significant result means
#' that the null hypothesis of equal variances across samples is rejected.  Even in
#' this case, however, it may still be preferable to pool variances, at least for
#' some populations, if sample sizes are quite low.
#' @seealso \code{\link{pool.var}}
#' @examples
#' data(cantius_L)
#' test.var.het(cantius_L)  # significant, but still may want to pool variances
test.var.het<- function (y, method = "Bartlett")
  # test for variance heterogeneity among samples in a paleoTS object
{

  if(all(y$vv == 0)) stop("All sample variances are zero.")
  vp<- pool.var(y)
  NN<- sum(y$nn)
  k<- length(y$vv)
  top<- (NN-k)*log(vp)-(sum((y$nn-1)*log(y$vv)))
  bot<- 1+ (1/(3*(k-1)))*( (sum(1/(y$nn-1))) - (1/(NN-k)))
  TT<- top/bot
  p.val<- stats::pchisq(TT, df=k-1, lower.tail=FALSE)

  w<-list(stat=TT, p.value=p.val, df=k-1)
  return (w)
}



#' Approximate log-transformation of time-series data
#'
#' @param y a \code{paleoTS} object
#'
#' @return the converted \code{paleoTS} object
#' @export
#'
#' @details   For a random variable x, its approximate mean on a natural log
#'   scale is the log of its untransformed mean.  The approximate variance on a
#'   log scale is equal to the squared coefficient of variation.
#' @note This transformation only makes sense for variables with dimension and a
#'   true zero point, such as lengths and areas.
#' @references Hunt, G. 2006. Fitting and comparing models of phyletic
#'   evolution: random walks and beyond.  \emph{Paleobiology} 32:578-601. \cr
#'   Lewontin, R. 1966. On the measurement of relative variability.
#'   \emph{Systematic Zoology} 15:141-142.
#' @examples x <- sim.Stasis(ns = 10, theta = 20, omega = 1)
#' plot(x)
#' xl <- ln.paleoTS(x)
#' plot(xl)
ln.paleoTS <- function (y)
  {
  logx<- y
  logx$mm<- log(y$mm)
  logx$vv<- (sqrt(y$vv)/y$mm)^2

  return (logx)
}

#' Convert time-series to standard deviation units
#'
#' @param y a \code{paleoTS} object
#' @param center optional translation of time-series according to "mean" or
#'   "start"; see Details
#'
#' @return the converted \code{paleoTS} object
#' @details   The standardization expresses each sample mean as the deviation
#'   from the overall mean, divided by the pooled within-sample variance. Sample
#'   variances are also divided by the pooled sample variance. \cr \cr Essentially,
#'   this converts paleontological time-series data into standard deviation
#'   units, similar to the computation of evolutionary rates in haldanes.  This
#'   operation \emph{does not} change the relative fit of models, but it does
#'   facilitate the comparison of parameter estimates across time-series of
#'   traits measured in different units. \cr \cr If argument \code{center} = "start"
#'   the time-series is translated such that the trait mean of the first sample
#'   is zero.
#' @export
#'
#' @examples
#' x <- sim.Stasis(ns = 8, theta = 1, omega = 4, vp = 2)
#' xs <- std.paleoTS(x, center = "start")
#' plot(x, ylim = range(c(x$mm, xs$mm)))
#' plot(xs, col = "red", add = TRUE)
#' legend(x = "topright", c("unstandardized", "standardized"), lty=1, col=c("black", "red"), cex=0.7)
std.paleoTS <- function (y, center=c("mean", "start"))
{
  vp<- pool.var(y)
  sp<- sqrt(vp)

  ys <- y
  ys$mm<- (y$mm - mean(y$mm)) /sp
  ys$vv<- y$vv/vp

  center <- match.arg(center)
  if (center == "start")  ys$mm <- ys$mm - ys$mm[1]

  return(ys)
}


#' @title Subsample a paleontological time-series
#' @description Subsampling is done according to the supplied logical vector or,
#' if none is supplied, as a proportion of samples, randomly chosen.
#'
#' @param y a \code{paleoTS} object
#' @param ok a logical vector, \code{TRUE} for populations to retain
#' @param k proportion of samples to retain, with the samples chosen randomly
#' @param reset.time if TRUE, resets the time so that the first population time is zero
#'
#' @return the sub-sampled \code{paleoTS} object
#' @export
#'
#' @examples
#' x <- sim.GRW(ns=20)
#' plot(x)
#' xs1 <- sub.paleoTS(x, k = 0.5)
#' plot(xs1, add = TRUE, col="green")
#' keep <- rep(c(TRUE, FALSE), 10)
#' xs2 <- sub.paleoTS(x, ok = keep)
#' plot(xs2, add = TRUE, col = "red")
sub.paleoTS <- function (y, ok=NULL, k=0.1, reset.time = TRUE)
{
  ys<- y
  ns<- length(y$mm)
  take<- array(FALSE, dim=ns)
  if (!is.null(ok) )
    take<- ok
  else
    take[sample(1:ns, size=round(k*ns))]<- TRUE

  # compute new start age (can change if first point is not sampled)
  # only needed if y$start.age != NULL
  if(is.null(y$start.age)) {start.age.sub <- NULL} else{
    td <- y$tt[1] - y$tt[take][1]  # time difference between 1st sample of y and subsetted y
    if(y$timeDir == "increasing") start.age.sub <- y$start.age + td  else  start.age.sub <- y$start.age - td
  }

  slab <- paste ("Subsetted from--", y$label)
  ys <- as.paleoTS(mm = y$mm[take], vv = y$vv[take], nn = y$nn[take], tt = y$tt[take],
                   MM = y$MM[take], genpars = y$genpars, label = slab, start.age = start.age.sub,
                   oldest = "first", reset.time = reset.time)

  # ys$mm<- y$mm[take]
  # ys$vv<- y$vv[take]
  # ys$nn<- y$nn[take]
  # ys$tt<- y$tt[take]
  # ys$MM<- y$MM[take]
  # ys$label<- paste ("Subsetted from--", y$label)

  return(ys)
}


#' @title Compare model fits for a paleontological time-series
#' @description Takes output from model-fitting functions and compiles model-fit
#'   information (log-likelihood, AICc, etc.) into a convenient table
#'
#' @param ... any number of model fit (\code{as.paleoTSfit}) objects
#' @param silent if \code{TRUE}, suppresses printing
#' @param sort if \code{TRUE}, the table sorts models from best to worst
#'
#' @return if \code{silent = FALSE}, the table is printed and nothing is
#'   returned. If  \code{silent = TRUE}, printing is suppressed and a list of
#'   two objects is returned: the table of model fits, \code{modelFits}, and a
#'   list of parameter estimates, \code{pl}.
#' @export
#'
#' @examples
#' x <- sim.GRW(ns = 40, ms = 0.5, vs = 0.1)
#' m1 <- fitSimple(x, model = "GRW")  # the true model
#' m2 <- fitSimple(x, model = "URW")
#' plot(x, modelFit = m1)
#' compareModels(m1, m2)
compareModels<- function(..., silent = FALSE, sort = FALSE)
{
  modelList<- list(...)

  # make sure all are paleoTSfit objects, and all use same method (AD or Joint)
  classv<- sapply(modelList, FUN=class)
  methv<- sapply(modelList, FUN=function(x){x$method})
  nv<- sapply(modelList, FUN=function(x){x$n})
  nm<- length(modelList)

  if(!all(classv=='paleoTSfit'))  	stop("All objects must be of class 'paleoTSfit'")
  if(!all(methv==methv[1]))			stop(paste("All model fits must use the same method (AD or Joint)", sep='\n'))
  else method<- methv[1]
  if(!all(nv==nv[1]))				stop("Objects have differing n.")
  else nn<- nv[1]


  # construct data frame and parameter list
  logL<- sapply(modelList, FUN=function(x){x$logL})
  K<- sapply(modelList, FUN=function(x){x$K})
  AICc<- sapply(modelList, FUN=function(x){x$AICc})
  dAICc <- AICc - min(AICc)
  Akaike.wt<- round(akaike.wts(AICc),3)
  df<- data.frame(logL, K, AICc, dAICc, Akaike.wt)
  rn<- sapply(modelList, FUN=function(x){x$modelName})  # extract model names
  rn<- make.unique(rn)  # handles situation if there are non-unique model names
  row.names(df)<- rn
  if(sort) { oo <- order(AICc, decreasing = FALSE)
             df <- df[oo,]  }

  pl<- lapply(modelList, FUN=function(x){x$parameters})
  names(pl)<- row.names(df)

  # print information
  if(!silent)
  {
    cat ('\nComparing ', nm, ' models [n = ', nn, ',', ' method = ', methv[1], ']\n\n', sep='')
    print (df)
  }

  if(silent)		return(list(modelFits=df, parameters=pl))
  else 			invisible(df)
}

Try the paleoTS package in your browser

Any scripts or data that you put into this service are public.

paleoTS documentation built on May 2, 2019, 1:07 p.m.