#' Function to construct one clade CMR-model.
#'
#' This function generates a structure containing the necessary functions for a CMR analysis of a fossil dataset.
#'
#' @param Obs a matrix with size \emph{number of taxa} by \emph{number of intervals (n)}. Each taxon has a row with 0's (unobserved) and 1's (observed) for each interval in the analysis. Oldest interval is first column.
#' @param dts a vector of interval durations. Defaults to a series of 1's if not supplied. Oldest interval is first entry.
#' @param spec/ext/samp are formulas specifying the particular model. Use \code{spec=~time} for temporally variable rates, \code{spec~div} for diversity dependence. Other drivers should have names as entries into \code{data}
#' @param data a data frame with putative drivers.
#' @param pfix a switch to select which approach is used to solve the identifiability problem in the model. If \code{pfix = 1}, then the sampling rate in the first and the last intervals are assumed to be equal to the mean sampling rate for the whole period. If \code{pfix = 2}, then the first two intervals, and the last two intervals have the same sampling rate. Defaults to \code{pfix = 2}
#' @param priorsMu distribution used for prior for the baseline rates - a probability density function. Default is \code{dnorm}.
#' @param priorsCov distribution used for prior for the driver coefficients - a probability density function. Default is \code{dnorm}.
#' @param priorsStd distribution used for prior for log of the standard deviation of the random effects - a probability density function. Default is \code{dunif}.
#' @param priorPars list of lists of the parameters for the priors. First list is parameters for baseline rates, then list of parameters for covariates and third is list of parameters for ln of standard deviation of random effects))). Defaults to \code{list(list(log(0.1),5),list(0,5),list(-3,3))}.
#' @param modeltype sets the type parameterisation of the rates of speciation and extinction. 'I' and 'II' specifies that the probabilities of extinction and seniority are calculated based on actual durations of intervals, whereas 'III' and 'IV' uses the differences between mid-points of intervals. 'I' and 'III' uses a basic implementation of probability of extinction as \eqn{1 - exp(-rate * dt)}. Models 'II' and 'IV' use a more detailed functional relationship between rate and probability, following Raup (1984): (µ*((exp((lambda-µ)*dt))-1))/(lambda*((exp((lambda-µ)*dt)))-µ). Modeltype 'V' implements a temporally uninformed model, estimating the probabilities directly (or rather their logit).
#' @return The function returns an object of class CMR_model. This most important output is out$probfun which is a function for the posterior. This output can be fed directly into the sampler \link{MCMC_CMR}. It also returns \code{$Obs} and \code{$dts} as well as the full call (exlcluding default settings), \code{$call}
#' @export
#' @examples sim <- sim_BD(ext = function(t,n){0.01+0.03*t},samp = 0.8,n_init = 10)
#' plot(sim)
#' mod1 <- make.BayesCMR(sim$FosRec>0)
#' fit <- MCMC_CMR(mod1)
#' plot(fit)
#' summary(fit)
#' matplot(fit$Chain[,1:3],type="l")
make_BayesCMR <- function(Obs,dts=rep(1,dim(Obs)[2]),
spec = ~ 1,
ext = ~ 1,
samp = ~ 1,
data = NULL,
pfix = 2,
priorMu = dnorm,
priorCov = dnorm,
priorStd = dunif,
priorPars = list(list(log(0.1),5),list(0,5),list(-3,3)),
modeltype='III'){
# Model generating function for a Compadre analysis.
# Minimum input is a matrix of observed/unobserved of dimensions
# taxa by temporal interval. dts is vector of interval
# durations (default sets to 1).
# Dts are in this version difference between mid-poitns of intervals for spec/ext and true dts for samp.
dts_se <- dts[-1]/2 + dts[-length(dts)]/2
datacopy = data;
# Current version implements paraclade extinction and growth from Raup 1984
# used to get seniority and extinction. Current version 26.03.2019
## INPUT CHECKS
fullcall <- deparse(match.call());
if (length(dts) != dim(Obs)[2]){
stop("Number of intervals in *dts* does not match dimension of Obs")
}
if (!is.null(data)){
# If drivers are given
if (NROW(data) != length(dts)){
stop("Size of data frame with drivers does not match dimension of Obs.")
}
}
undv <- make_unvd(Obs);
u = undv$u;
v = undv$v;
n = undv$n;
d = undv$d;
prior <- list(); #making all prior terms part of this list.
prix = 1; # ticker for prior parts.
origdata = data;
# normalizing data
# If last entry (stage) is included in the data.frame, must be removed from normalization for the Speciation/extinction drivers
# Split into two data's, one for samp (length(dts)) and one for spec/ext (SE)
if (!is.null(data)){
if (dim(data)[2]>1){
dataSE = as.data.frame(apply(data[-length(dts),] ,2,normfun))
data = as.data.frame(apply(data,2,normfun))
} else {
dataSE = as.data.frame(normfun(data[-length(dts),]))
colnames(dataSE) <- colnames(data)
data = as.data.frame(apply(data,2,normfun))
}
} else {
dataSE = data.frame(tmp=rep(0,length(dts)-1));
data = data.frame(tmp=rep(0,length(dts)));
# generating bollocks data.frames so it works when there are no drivers, but temporal variability. Used in making the model.matrices below.
}
updmmx <- c(F,F,F) # if updating the model matrices for any X (i.e. if diversity is a driver)
# If mmxxx needs to be updated, i.e. if diversity is a term in any of the rates.
# SO this fails if not data given and div-dep, since data then doesn't exist properly.
if ("div" %in% rownames(attr(terms(spec),"factors"))){
if (is.null(dataSE)){
dataSE = data.frame(div=rep(0,length(dts)-1))
} else {
dataSE$div = rep(0,dim(dataSE)[1])
}
updmmx[1] <- TRUE
# Perhaps have these as lists with indexes instead? If there is an interaction, several rows (not just $div) must be updated!
}
if ("div" %in% rownames(attr(terms(ext),"factors"))){
if (is.null(dataSE)){
dataSE = data.frame(div=rep(0,length(dts)-1))
} else {
dataSE$div = rep(0,dim(dataSE)[1])
}
updmmx[2] <- TRUE
}
if ("div" %in% rownames(attr(terms(samp),"factors"))){
# This is not possible... we can not have diversity estimated affecting the sampling of 'diversity' itself. Throw error
error('Model with diversity impact on sampling is not possible.')
}
# Making design matrixes for rates. THESE ARE TEMPORARY if diversity is a term. If not they are unchanging.
tmpspec <- update(spec,~.-time) # taking out TIME, since time is the Random effects.
mmspec <- model.matrix(tmpspec,dataSE)
mmspec_f <- function(x){
tmp <- dataSE;
tmp$div <- n_norm(x)
return(model.matrix(tmpspec,tmp))}
tmpext <- update(ext,~.-time) # taking out TIME, since time is the Random effects.
mmext <- model.matrix(tmpext,dataSE)
mmext_f <- function(x){
tmp <- dataSE;
tmp$div <- n_norm(x)
return(model.matrix(tmpext,tmp))}
tmpsamp <- update(samp,~.-time) # taking out TIME, since time is the Random effects.
mmsamp <- model.matrix(tmpsamp,data)
RE = c(F,F,F)
if ("time" %in% (attr(terms(spec),"term.labels"))){
# random effects/temporal effect speciation rates
Zspec <- diag(length(dts)-1) # basically the RE for spec
RE[1] = TRUE
} else {
Zspec <- NULL
}
if ("time" %in% (attr(terms(ext),"term.labels"))){
# random effects/temporal effect speciation rates
Zext <- diag(length(dts)-1) # basically the RE for spec
RE[2] = TRUE
} else {
Zext <- NULL
}
if ("time" %in% (attr(terms(samp),"term.labels"))){
# random effects/temporal effect speciation rates
# Making 'RE' matrices.
RE[3] = TRUE
if (pfix==2){
# first two and last two sampling rates are linked
Zsamp <- rbind(c(1,rep(0,length(dts)-3)),diag(length(dts)-2),c(rep(0,length(dts)-3),1))
} else if (pfix==1){
# first is equal to last is equal to mean, i.e. no RE for these two
Zsamp <- rbind(rep(0,length(dts)-2),diag(length(dts)-2),rep(0,length(dts)-2))
}
} else {
Zsamp <- NULL
}
# cbind(mmsamp*c(0.3), Zsamp*runif(length(dts)))
# THIS MIGHT BE DIFFERENT FRMO BEFORE, NOW DRIVERS
# are BEfore variances in the x-array.
alphinx= list();
alphinx$specInx = c(1,seq(4, length.out=(dim(mmspec)[2]-1)))
alphinx$extInx = c(2,seq(max(c(3,alphinx$specInx))+1,length.out=(dim(mmext)[2]-1)))
alphinx$sampInx = c(3,seq(max(c(3,alphinx$specInx,alphinx$extInx))+1,length.out=(dim(mmsamp)[2]-1)))
alphinx$varInx = seq(max(unlist(alphinx)+1),length.out=sum(RE))
alphinx$specReInx = seq(max(unlist(alphinx)+1),length.out=RE[1]*(length(dts)-1))
alphinx$extReInx = seq(max(unlist(alphinx)+1),length.out=RE[2]*(length(dts)-1))
alphinx$sampReInx = seq(max(unlist(alphinx)+1),length.out=RE[3]*(length(dts)-2))
names(alphinx$specInx) <- colnames(mmspec)
names(alphinx$extInx) <- colnames(mmext)
names(alphinx$sampInx) <- colnames(mmsamp)
npar <- dim(mmspec)[2] + dim(mmext)[2] + dim(mmsamp)[2] + sum(RE*1) + RE[1]*(length(dts)-1) + RE[2]*(length(dts)-1) + RE[3]*(length(dts)-2)
# This is number of parameters in total.
# sum(DivDep*1) + nSmpTS+nExtTS+nSpecTS + sum(Driv_x_Div_Spec) + sum(Driv_x_Div_Ext); # number of mean+hyperparameters
# nTS <- c(nSpecTS,nExtTS,nSmpTS);
# ALL x-arrays endd with the RE's, so all indexes below here that are not the first 3+sum(RE) sohuld have sum(DivDep) added to them.
# All these have priors dnorm(0,5), the Sd[RE] are log(sd) really.
# [�_fec,�_ext,�samp] &
# (possibly)[st_RE_fec,st_RE_ext,st_RE_samp] &
# possibly [alphaTS1,alphaTS2 etc]
#
#
if (is.null(Zsamp)){
# If no RE for sampling
if (dim(mmsamp)[2]==1){
# if only intercept
lsampfun <- function(x){rep(x[3],length(dts))}
} else {
# here with drivers
lsampfun <- function(x){rowSums(cbind(mmsamp %*% x[alphinx$sampInx]))}
}
} else {
# if RE's
lsampfun <- function(x){rowSums(cbind(mmsamp %*% x[alphinx$sampInx], Zsamp %*% x[alphinx$sampReInx]))}
}
# making div function
n_est <- function(x){ (n[1:length(dts)-1]/
(rate2prob(exp(lsampfun(x)),dts)[1:(length(dts)-1)]))}
n_norm <- function(x){( n_est(x)-mean(n_est(x)))/sd(n_est(x))};
if (is.null(Zspec)){# If no RE for speciation
if (dim(mmspec)[2]==1){# if only intercept
lspecfun <- function(x){rep(x[1],length(dts)-1)}
} else { # here with drivers
if (updmmx[1]){
lspecfun <- function(x){rowSums(cbind(mmspec_f(x) %*% x[alphinx$specInx])) }
} else {
lspecfun <- function(x){rowSums(cbind(mmspec %*% x[alphinx$specInx]))}
}
}
} else {# if RE's
if (updmmx[1]){
lspecfun <- function(x){
return(rowSums(cbind(mmspec_f(x) %*% x[alphinx$specInx], Zspec %*% x[alphinx$specReInx]))) }
} else {
lspecfun <- function(x){rowSums(cbind(mmspec %*% x[alphinx$specInx], Zspec %*% x[alphinx$specReInx]))}
}
}
if (is.null(Zext)){# If no RE for extinction
if (dim(mmext)[2]==1){# if only intercept
lextfun <- function(x){rep(x[2],length(dts)-1)}
} else { # here with drivers
if (updmmx[2]){
# If div-dep, fill in with divers
lextfun <- function(x){rowSums(cbind(mmext_f(x) %*% x[alphinx$extInx])) }
} else {
lextfun <- function(x){rowSums(cbind(mmext_f(x) %*% x[alphinx$extInx]))}
}
}
} else {# if RE's
# Should also be here with drivers, and divdep interactions and all
# lextfun <- function(x){rowSums(cbind(mmext %*% x[alphinx$extInx], Zext %*% x[alphinx$extReInx]))}
if (updmmx[2]){
# If div-dep, fill in with divers
lextfun <- function(x){rowSums(cbind(mmext_f(x) %*% x[alphinx$extInx], Zext %*% x[alphinx$extReInx]))}
} else {
lextfun <- function(x){rowSums(cbind(mmext %*% x[alphinx$extInx], Zext %*% x[alphinx$extReInx]))}
}
}
# Defining the likelihoodfunction.
myll <- function(x){
lfec = lspecfun(x);
lext = lextfun(x);
lsmp = lsampfun(x);
if (modeltype=='I'){
extin <- 1-exp(-exp(lext)*dts[-length(dts)])
gamin <- (exp(-exp(lext)*dts[-length(dts)]))/(exp((exp(lfec)-exp(lext))*dts[-length(dts)]))
sampin <- rate2prob(exp(lsmp),dts)
} else if (modeltype == 'II'){
extin <- (exp(lext)*((exp((exp(lfec)-exp(lext))*dts[-length(dts)]))-1))/ (exp(lfec)*((exp((exp(lfec)-exp(lext))*dts[-length(dts)])))-exp(lext))
gamin <- (1-(exp(lext)*((exp((exp(lfec)-exp(lext))*dts[-length(dts)]))-1))/ (exp(lfec)*((exp((exp(lfec)-exp(lext))*dts[-length(dts)])))-exp(lext)))/ exp((exp(lfec)-exp(lext))*dts[-length(dts)])
sampin <- rate2prob(exp(lsmp),dts)
} else if (modeltype == 'III'){
extin <- 1-exp(-exp(lext)*dts_se)
gamin <- (exp(-exp(lext)*dts_se))/(exp((exp(lfec)-exp(lext))*dts_se))
sampin <- rate2prob(exp(lsmp),dts)
} else if (modeltype == 'IV'){
extin <- (exp(lext)*((exp((exp(lfec)-exp(lext))*dts_se))-1))/ (exp(lfec)*((exp((exp(lfec)-exp(lext))*dts_se)))-exp(lext))
gamin <- (1-(exp(lext)*((exp((exp(lfec)-exp(lext))*dts_se))-1))/ (exp(lfec)*((exp((exp(lfec)-exp(lext))*dts_se)))-exp(lext)))/ exp((exp(lfec)-exp(lext))*dts_se)
sampin <- rate2prob(exp(lsmp),dts)
} else if (modeltype == 'V'){
extin <- exp(lext)/(exp(lext)+1)
gamin <- exp(lfec)/(exp(lfec)+1)
sampin <- exp(lsmp)/(exp(lsmp)+1)
}
ll <- pradel_unvd_gam(
ext = extin,
gam = gamin,
p = sampin,
u,n,v,d);
if (is.infinite(ll$LogL)){ll$LogL = -Inf};
if (is.na(ll$LogL)){ll$LogL = -Inf};
if (is.nan(ll$LogL)){ll$LogL = -Inf};
return(ll)
}
# Defining a function to transform rates to probabilities
pradel_probs <- function(x){
lfec = lspecfun(x);
lext = lextfun(x);
lsmp = lsampfun(x);
if (modeltype=='I'){
extin <- 1-exp(-exp(lext)*dts[-length(dts)])
gamin <- (exp(-exp(lext)*dts[-length(dts)]))/(exp((exp(lfec)-exp(lext))*dts[-length(dts)]))
sampin <- rate2prob(exp(lsmp),dts)
} else if (modeltype == 'II'){
extin <- (exp(lext)*((exp((exp(lfec)-exp(lext))*dts[-length(dts)]))-1))/ (exp(lfec)*((exp((exp(lfec)-exp(lext))*dts[-length(dts)])))-exp(lext))
gamin <- (1-(exp(lext)*((exp((exp(lfec)-exp(lext))*dts[-length(dts)]))-1))/ (exp(lfec)*((exp((exp(lfec)-exp(lext))*dts[-length(dts)])))-exp(lext)))/ exp((exp(lfec)-exp(lext))*dts[-length(dts)])
sampin <- rate2prob(exp(lsmp),dts)
} else if (modeltype == 'III'){
extin <- 1-exp(-exp(lext)*dts_se)
gamin <- (exp(-exp(lext)*dts_se))/(exp((exp(lfec)-exp(lext))*dts_se))
sampin <- rate2prob(exp(lsmp),dts)
} else if (modeltype == 'IV'){
extin <- (exp(lext)*((exp((exp(lfec)-exp(lext))*dts_se))-1))/ (exp(lfec)*((exp((exp(lfec)-exp(lext))*dts_se)))-exp(lext))
gamin <- (1-(exp(lext)*((exp((exp(lfec)-exp(lext))*dts_se))-1))/ (exp(lfec)*((exp((exp(lfec)-exp(lext))*dts_se)))-exp(lext)))/ exp((exp(lfec)-exp(lext))*dts_se)
sampin <- rate2prob(exp(lsmp),dts)
} else if (modeltype == 'V'){
extin <- exp(lext)/(exp(lext)+1)
gamin <- exp(lfec)/(exp(lfec)+1)
sampin <- exp(lsmp)/(exp(lsmp)+1)
}
return(list(extprobs = extin,gamprobs=gamin,samprobs=sampin))
}
inxDriv = seq(4,length.out=dim(mmspec)[2]+ dim(mmext)[2]+dim(mmsamp)[2]-3) # index into 'drivers' non intercepts.
if (any(RE)){
prfun <- function(x){unlist(c(sapply(1:3,function(ii){do.call(priorMu, c(x[ii],priorPars[[1]],log=T))}),
sapply(inxDriv,function(ii){do.call(priorCov,c(x[ii],priorPars[[2]],log=T))}),
sapply(alphinx$varInx,function(ii){do.call(priorStd,c(x[ii],priorPars[[3]],log=T))}),
sapply(1:sum(RE),function(ii){dnorm( x[alphinx[[4+which(RE)[ii]]]],0,exp(x[alphinx$varInx[ii]]),log=T)})))}
} else { # no random effects, i.e. no varinx
prfun <- function(x){unlist(c(sapply(1:3,function(ii){do.call(priorMu, c(x[ii],priorPars[[1]],log=T))}),
sapply(inxDriv,function(ii){do.call(priorCov,c(x[ii],priorPars[[2]],log=T))})))}
}
probfun <- function(x){myll(x)$LogL+sum(unlist(prfun(x)))}
out <- list(probfun= probfun,likfun=myll,priorf = prfun,
npar = npar,test = 'dummy',
specfun = lspecfun,extfun=lextfun,sampfun=lsampfun,inx = alphinx,driverinx = inxDriv,
n_est=n_est,n_norm=n_norm,Obs=Obs,dts=dts,
date=date(),call=fullcall,modeltype=modeltype,
mmsamp=mmsamp,mmspec=mmspec_f,mmext=mmext_f,spec = spec,ext = ext,samp=samp,
pradel_probs = pradel_probs,origdata = origdata)
# Zsmp = Zsamp, Zspc = Zspec, Zext = Zext,dataSE = dataSE,
# inxDriv = inxDriv,RE = RE);
attr(out,"class")<-"CMR_model"
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.