R/DAMOCLES_bootstrap.R

Defines functions DAMOCLES_bootstrap

Documented in DAMOCLES_bootstrap

#' Phylogenetic community structure hypothesis test
#' 
#' This function computes the maximum likelihood estimates of colonisation and
#' local extinction rate for a given phylogeny and presence-absence data under
#' the DAMOCLES model.  These rate estimates are used to simulate null
#' communities under the DAMOCLES model.  Standardized effect size of mean
#' nearest taxon distance (mntd), mean phylogentic distance (mpd) and
#' loglikelihood are calculated For comparison, standardised effect sizes are
#' also calculated relative to a "Random-Draw" null model i.e. presence absence
#' randomised across tips
#' 
#' The output is a list of two dataframes. The first dataframe, summary_table,
#' contains the summary results. The second dataframe, null_community_data,
#' contains decsriptive statistics for each null community.
#' 
#' @param phy phylogeny in phylo format
#' @param pa presence-absence table.\cr The first column contains the labels of
#' the species (corresponding to the tip labels in the phylogeny.\cr The second
#' column contains the presence (1) or absence (0) of species in the local
#' community.
#' @param initparsopt The initial values of the parameters that must be
#' optimized
#' @param idparsopt The ids of the parameters that must be optimized, e.g. 1:2
#' for extinction rate, and offset of immigration rate The ids are defined as
#' follows: \cr id == 1 corresponds to mu (extinction rate) \cr id == 2
#' corresponds to gamma_0 (offset of immigration rate) \cr
#' @param parsfix The values of the parameters that should not be optimized.
#' See idparsfix.
#' @param idparsfix
#' 
#' The ids of the parameters that should not be optimized, e.g. c(1) if mu
#' should not be optimized, but only gamma_0. In that case idparsopt must be
#' c(2). The default is to fix the parameters not specified in idparsopt.
#' @param pars2 Vector of settings: \cr \code{pars2[1]} sets the relative
#' tolerance in the parameters \cr \cr \code{pars2[2]} sets the relative
#' tolerance in the function \cr \cr \code{pars2[3]} sets the absolute
#' tolerance in the parameters \cr \cr \code{pars2[4]} sets the maximum number
#' of iterations
#' @param pchoice sets which p-value to optimize and with which root state to
#' simulate (default pchoice = 0) \cr pchoice == 0 correspond to optimizing sum
#' of p_0f + p_1f, and simulating with an equal number of root states being 0
#' or 1 \cr pchoice == 1 correspond to optimizing p_0f, and simulating with
#' root state being 0\cr pchoice == 2 correspond to optimizing p_1f, and
#' simulating with root state being 1
#' @param runs the number null communities to generate.
#' @param estimate_pars Whether to estimate parameters on the simulated
#' datasets (default = FALSE).
#' @param conf.int The width of the conifdence intervals calculated on
#' bootstrapped parameter estimates
#' @return \item{summary_table}{ \code{mu} gives the maximum likelihood
#' estimate of mu and confidence intervals in brackets if estimate_pars = TRUE
#' \code{gamma_0} gives the maximum likelihood estimate of gamma_0 and
#' confidence intervals in brackets if bootstrap=TRUE \code{loglik} gives the
#' maximum loglikelihood \code{df} gives the number of estimated parameters,
#' i.e. degrees of feedom \code{conv} gives a message on convergence of
#' optimization; conv = 0 means convergence \code{n.obs} gives the number of
#' species locally present in the observed community \code{mntd.obs} gives the
#' MNTD of the observed community \code{mpd.obs} gives the MPD of the observed
#' community \code{runs} gives the number of null communities simulated
#' \code{mntd.mean.RD} mean of MNTD from null communities generated by a
#' "Random Draw" model \code{mntd.sd.RD} standard deviation of MNTD from null
#' communities generated by a "Random Draw" model \code{mntd.obs.z.RD}
#' standardized effect size of MNTD compared to null communities generated by a
#' "Random Draw" model (= -1*(mntd.obs - mntd.mean.RD)/ mntd.sd.RD)
#' \code{mntd.obs.rank.RD} rank of observed MNTD compared to null communities
#' generated by a "Random Draw" model \code{mntd.obs.q.RD} quantile of observed
#' MNTD vs. null communities (= mntd.obs.rank.RD /runs + 1) \code{mpd.mean.RD}
#' mean of MPD from null communities generated by a "Random Draw" model
#' \code{mpd.sd.RD} standard deviation of MPD from null communities generated
#' by a "Random Draw" model \code{mpd.obs.z.RD} standardized effect size of MPD
#' compared to null communities generated by a "Random Draw" model (=
#' -1*(mpd.obs - mpd.mean.RD)/ mpd.sd.RD) \code{mpd.obs.rank.RD} rank of
#' observed MPD compared to null communities generated by a "Random Draw" model
#' \code{mpd.obs.q.RD} quantile of observed MPD vs. null communities (=
#' mpd.obs.rank.RD /runs + 1) \code{n.mean.DAMOCLES} mean number of species
#' locally present in the null communities generated by DAMOCLES
#' \code{mntd.mean.DAMOCLES} mean of MNTD from null communities generated by
#' DAMOCLES \code{mntd.sd.DAMOCLES} standard deviation of MNTD from null
#' communities generated by DAMOCLES \code{mntd.obs.z.DAMOCLES} standardized
#' effect size of MNTD compared to null communities generated by DAMOCLES (=
#' -1*(mntd.obs - mntd.mean.DAMOCLES)/ mntd.sd.DAMOCLES)
#' \code{mntd.obs.rank.DAMOCLES} rank of observed MNTD compared to null
#' communities generated by DAMOCLES \code{mntd.obs.q.DAMOCLES} quantile of
#' observed MNTD vs. null communities (= mntd.obs.rank.DAMOCLES /runs + 1)
#' \code{mpd.mean.DAMOCLES} mean of MPD from null communities generated by
#' DAMOCLES \code{mpd.sd.DAMOCLES} standard deviation of MPD from null
#' communities generated by DAMOCLES \code{mpd.obs.z.DAMOCLES} standardized
#' effect size of MPD compared to null communities generated by DAMOCLES (=
#' -1*(mpd.obs - mpd.mean.DAMOCLES)/ mpd.sd.DAMOCLES)
#' \code{mpd.obs.rank.DAMOCLES} rank of observed MPD compared to null
#' communities generated by DAMOCLES \code{mpd.obs.q.DAMOCLES} quantile of
#' observed MPD vs. null communities (= mpd.obs.rank.DAMOCLES /runs + 1)
#' \code{loglik.mean.DAMOCLES} mean of loglikelihoods from null communities
#' generated by DAMOCLES \code{loglik.sd.DAMOCLES} standard deviation of
#' loglikelihoods from null communities generated by DAMOCLES
#' \code{loglik.obs.z.DAMOCLES} standardized effect size of loglikelihood
#' compared to null communities generated by DAMOCLES (= -1*(loglik.obs -
#' loglik.mean.DAMOCLES)/ loglik.sd.DAMOCLES) \code{loglik.obs.rank.DAMOCLES}
#' rank of observed loglikelihood compared to null communities generated by
#' DAMOCLES \code{loglik.obs.q.DAMOCLES} quantile of observed loglikelihoods
#' vs. null communities (= loglik.obs.rank.DAMOCLES /runs + 1) }
#' \item{null_community_data}{ \code{run} gives the simulation run
#' \code{root.state.print} gives the state of the ancestral species in the
#' local community assumed in the simulation, i.e. present (1) or absent (0)
#' \code{n} gives the number of species locally present in the observed
#' community \code{n.RD} gives the number of species locally present in the
#' null community generated by a "Random Draw" model \code{mntd.RD} gives the
#' MNTD of the null community generated by a "Random Draw" model \code{mpd.RD}
#' gives the MPD of the null community generated by a "Random Draw" model
#' \code{n.DAMOCLES} gives the number of species locally present in the null
#' community generated by DAMOCLES \code{mntd.DAMOCLES} gives the MNTD of the
#' null community generated by DAMOCLES \code{mpd.DAMOCLES} gives the MPD of
#' the null community generated by DAMOCLES \code{loglik.DAMOCLES} gives the
#' maximum loglikelihood for the null community generated by DAMOCLES
#' \code{mu.DAMOCLES} gives the maximum likelihood estimate of mu for the null
#' community generated by DAMOCLES \code{gamma_0.DAMOCLES} gives the maximum
#' likelihood estimate of gamma_0 for the null community generated by DAMOCLES
#' }
#' @author Rampal S. Etienne
#' @seealso \code{\link{DAMOCLES_ML}} \code{\link{DAMOCLES_sim}}
#' @references Pigot, A.L. & R.S. Etienne (2015). A new dynamic null model for
#' phylogenetic community structure. Ecology Letters 18: 153-163.
#' @keywords models
#' @export DAMOCLES_bootstrap
DAMOCLES_bootstrap = function(
   phy = ape::rcoal(10),
   pa = matrix(c(phy$tip.label,sample(c(0,1),ape::Ntip(phy),replace = T)),
      nrow = ape::Ntip(phy),ncol = 2),
   initparsopt = c(0.1,0.1),
   idparsopt = 1:length(initparsopt),
   parsfix = NULL,
   idparsfix = NULL,
   pars2 = c(1E-3,1E-4,1E-5,1000),
   pchoice = 0,
   runs = 999,
   estimate_pars = FALSE,
   conf.int = 0.95)
{
				
	# CALCULATE OSBERVED N, MNTD AND MPD
	
	obs.samp = matrix(as.numeric(pa[,2]),nrow = 1)
	colnames(obs.samp) = phy$tip.label

	n.obs = sum(obs.samp)
	mntd.obs = picante::mntd(obs.samp, stats::cophenetic(phy))
	mpd.obs = picante::mpd(obs.samp, stats::cophenetic(phy))
	
	# ESTIMATE COLONISATION AND LOCAL EXTINCTION RATES FROM EMPIRICAL DATA		
	
	pars = DAMOCLES_ML(phy = phy,
	                   pa = pa,
	                   initparsopt = initparsopt,
	                   idparsopt = idparsopt,
	                   parsfix = parsfix,
	                   idparsfix = idparsfix,
	                   pars2 = pars2,
	                   pchoice = pchoice)
	
  # SIMULATE NULL COMMUNITY UNDER THESE ESTIMATED RATES	
  
  runs2 = runs
  if(pchoice == 0)
  {
     root.state = c(0,1)
     # MAKE NUMBER OF RUNS EVEN AND THEN DIVIDE BY HALF
     runs2 = ((runs + runs %% 2))/2
  }
  if(pchoice == 1)
  {
     root.state = 0
  }
  if(pchoice == 2)	
  {
     root.state = 1
  }
  
	# SIMULATE NULL COMMUNITIES UNDER RANDOM DRAW MODEL AND DAMOCLES USING ESTIMATED PARAMETERS AND CALCULATE PHYLOGENETIC STRUCTURE
	nullCommStats = expand.grid(run = 1:runs2,root.state = root.state,n = sum(as.numeric(pa[,2])),n.RD = sum(as.numeric(pa[,2])),mntd.RD = NA,mpd.RD = NA,n.DAMOCLES = NA,mntd.DAMOCLES = NA,mpd.DAMOCLES = NA,loglik.DAMOCLES = NA,mu.DAMOCLES = NA,gamma_0.DAMOCLES = NA)

	for(run in 1:dim(nullCommStats)[1]){     
    	DAMOCLES_community = DAMOCLES_sim(phy,mu = pars[1],gamma_0 = pars[2],gamma_td = 0,sigma = 0,psiBranch = 0,psiTrait = 0,z = 0,phi = 0,traitOpt = 0,br0 = 0,br_td = 0,nTdim = 1,root.state = nullCommStats$root.state[run],root.trait.state = 0,plotit = FALSE,keepExtinct = FALSE)	
  		DAMOCLES.samp = matrix(as.numeric(DAMOCLES_community[[1]]$state),nrow = 1)
  		colnames(DAMOCLES.samp) = phy$tip.label
  		nullCommStats$n.DAMOCLES[run] = sum(DAMOCLES.samp)
  		nullCommStats$mntd.DAMOCLES[run] = picante::mntd(DAMOCLES.samp,stats::cophenetic(phy))
  		nullCommStats$mpd.DAMOCLES[run] = picante::mpd(DAMOCLES.samp,stats::cophenetic(phy))
  		RD.samp = matrix(as.numeric(sample(pa[,2])),nrow = 1)
  		colnames(RD.samp) = phy$tip.label
  		nullCommStats$mntd.RD[run] = picante::mntd(RD.samp,stats::cophenetic(phy))
  		nullCommStats$mpd.RD[run] = picante::mpd(RD.samp,stats::cophenetic(phy))
  		
  		if(estimate_pars == TRUE)
      {
	  	   DAMOCLES.pa = pa
	  	   DAMOCLES.pa[,2]= DAMOCLES_community[[1]]$state	
  		   parsNull = DAMOCLES_ML(phy,DAMOCLES.pa,initparsopt = c(pars[[1]],pars[[2]]),idparsopt = idparsopt,parsfix = parsfix,idparsfix = idparsfix,pars2 = pars2,pchoice = pchoice)
  		   nullCommStats$mu.DAMOCLES[run] = parsNull$mu
	       nullCommStats$gamma_0.DAMOCLES[run] = parsNull$gamma_0
		     nullCommStats$loglik.DAMOCLES[run] = parsNull$loglik
      }
  	}
	
	# CALCULATE MEAN AND STANDARD DEVIATION OF N, MNTD AND MPD FOR SIMULATED DATA  	
	n.mean.DAMOCLES = mean(nullCommStats$n.DAMOCLES)
	mntd.mean.DAMOCLES = mean(nullCommStats$mntd.DAMOCLES)
	mntd.sd.DAMOCLES = stats::sd(nullCommStats$mntd.DAMOCLES)
	mpd.mean.DAMOCLES = mean(nullCommStats$mpd.DAMOCLES)
	mpd.sd.DAMOCLES = stats::sd(nullCommStats$mpd.DAMOCLES)
	loglik.mean.DAMOCLES = mean(nullCommStats$loglik.DAMOCLES)
	loglik.sd.DAMOCLES = stats::sd(nullCommStats$loglik.DAMOCLES)
	
	mntd.mean.RD = mean(nullCommStats$mntd.RD)
	mntd.sd.RD = stats::sd(nullCommStats$mntd.RD)
	mpd.mean.RD = mean(nullCommStats$mpd.RD)
	mpd.sd.RD = stats::sd(nullCommStats$mpd.RD)
	
	# CALCULATE STANDARDISED EFFECTS SIZES OF OBSERVED DATA
	mntd.obs.z.RD = -1 * (mntd.obs - mntd.mean.RD)/mntd.sd.RD
	mntd.obs.rank.RD = rank(c(mntd.obs,nullCommStats$mntd.RD))[1]
	mntd.obs.q.RD = (mntd.obs.rank.RD/(dim(nullCommStats)[1] + 1))
	
	mpd.obs.z.RD = -1 * (mpd.obs - mpd.mean.RD)/mpd.sd.RD
	mpd.obs.rank.RD = rank(c(mpd.obs,nullCommStats$mpd.RD))[1]
	mpd.obs.q.RD = (mpd.obs.rank.RD/(dim(nullCommStats)[1] + 1))
		
	mntd.obs.z.DAMOCLES = -1 * (mntd.obs - mntd.mean.DAMOCLES)/mntd.sd.DAMOCLES
	mntd.obs.rank.DAMOCLES = rank(c(mntd.obs, nullCommStats$mntd.DAMOCLES))[1]
	mntd.obs.q.DAMOCLES = mntd.obs.rank.DAMOCLES/(dim(nullCommStats)[1] + 1)
	
	mpd.obs.z.DAMOCLES = -1 * (mpd.obs - mpd.mean.DAMOCLES)/mpd.sd.DAMOCLES
	mpd.obs.rank.DAMOCLES = rank(c(mpd.obs, nullCommStats$mpd.DAMOCLES))[1]
	mpd.obs.q.DAMOCLES = mpd.obs.rank.DAMOCLES/(dim(nullCommStats)[1] + 1)
	
	if(estimate_pars == TRUE)
  {
  	loglik.obs.z.DAMOCLES = (pars$loglik - loglik.mean.DAMOCLES)/loglik.sd.DAMOCLES
  	loglik.obs.rank.DAMOCLES = rank(c(pars$loglik, nullCommStats$loglik.DAMOCLES))[1]
  	loglik.obs.q.DAMOCLES = loglik.obs.rank.DAMOCLES/(dim(nullCommStats)[1] + 1)
  	mu = paste(pars$mu,"(",stats::quantile(nullCommStats$mu.DAMOCLES,(1 - conf.int)/2),":",stats::quantile(nullCommStats$mu.DAMOCLES,conf.int + (1 - conf.int)/2),")",sep = "")
  	gamma_0 = paste(pars$gamma_0,"(",stats::quantile(nullCommStats$gamma_0.DAMOCLES,(1 - conf.int)/2),":",stats::quantile(nullCommStats$gamma_0.DAMOCLES,conf.int + (1 - conf.int)/2),")",sep = "")
 	} else {
  	loglik.obs.z.DAMOCLES = NA
  	loglik.obs.rank.DAMOCLES = NA
  	loglik.obs.q.DAMOCLES = NA
  	mu = pars$mu
  	gamma_0 = pars$gamma_0
	}	
  if(pchoice == 0)	
  {
   	 root.state.print = paste(0,",",1,sep = "")
  } else {
     root.state.print = root.state
  }
	  	
	value = c(root.state.print,mu,gamma_0,pars$loglik,pars$df,pars$conv,n.obs,mntd.obs,mpd.obs,dim(nullCommStats)[1],
	mntd.mean.RD,mntd.sd.RD,mntd.obs.z.RD,mntd.obs.rank.RD,mntd.obs.q.RD,
	mpd.mean.RD,mpd.sd.RD,mpd.obs.z.RD,mpd.obs.rank.RD,mpd.obs.q.RD,
	n.mean.DAMOCLES,
	mntd.mean.DAMOCLES,mntd.sd.DAMOCLES,mntd.obs.z.DAMOCLES,mntd.obs.rank.DAMOCLES,mntd.obs.q.DAMOCLES,
	mpd.mean.DAMOCLES,mpd.sd.DAMOCLES,mpd.obs.z.DAMOCLES,mpd.obs.rank.DAMOCLES,mpd.obs.q.DAMOCLES,
	loglik.mean.DAMOCLES,loglik.sd.DAMOCLES,loglik.obs.z.DAMOCLES,loglik.obs.rank.DAMOCLES,loglik.obs.q.DAMOCLES)
	
	desc = c("Root_state","mu","gamma_0","loglik.obs","df","conv","n.obs","mntd.obs","mpd.obs","runs",
	"mntd.mean.RD","mntd.sd.RD","mntd.obs.z.RD","mntd.obs.rank.RD","mntd.obs.q.RD",
	"mpd.mean.RD","mpd.sd.RD","mpd.obs.z.RD","mpd.obs.rank.RD","mpd.obs.q.RD",
	"n.mean.DAMOCLES",
	"mntd.mean.DAMOCLES","mntd.sd.DAMOCLES","mntd.obs.z.DAMOCLES","mntd.obs.rank.DAMOCLES","mntd.obs.q.DAMOCLES",
	"mpd.mean.DAMOCLES","mpd.sd.DAMOCLES","mpd.obs.z.DAMOCLES","mpd.obs.rank.DAMOCLES","mpd.obs.q.DAMOCLES",
	"loglik.mean.DAMOCLES","loglik.sd.DAMOCLES","loglik.obs.z.DAMOCLES","loglik.obs.rank.DAMOCLES","loglik.obs.q.DAMOCLES")
		
	summaryResults = data.frame(desc,value)
	
	# SAVE SUMMARY RESULTS AND ALSO RESULTS FOR EACH NULL COMMUNITY
	results = list()
	results[[1]] = summaryResults
	results[[2]] = nullCommStats
  names(results) = c("summary_table","null_community_data")
	
	return(results)
}	

Try the DAMOCLES package in your browser

Any scripts or data that you put into this service are public.

DAMOCLES documentation built on Aug. 12, 2020, 5:08 p.m.