R/solowbeet2.R

#' Solow & Beet's extinction date model
#'
# "The trouble with poet is, how do you know it's deceased?" --\emph{Sweeney Todd}
#'
#' @description
#' The Solow & Beet method uses a Bayesian model to evaluate the extinction date of a species, using mixed certainty datsets. The \code{solowbeet} function can be used to implement model 1 (which assumes invalid sightings aren't generated until after a species is extinct) or model 2 (which assumes that valid and invalid sightings could be mixed before the true extinction date). Both models treat valid and invalid sightings as separate processes, and make inference on the likelihood of the extinction date based on what sighting rates would have generated the dataset in hand.
#'
#' @description 
#' The solowbeet function runs a full panel including plots, exporting plotable data (for use in a preferred graphics package) and two objects in $Results that allow hypothesis testing for extinction by the specified year T.bound. First is the Bayes factor which is calculated as p(t|E)/p(t|E') where t is the data and E is the hypothesis the species is extinct. Second is the posterior probability of extinction P(E|t), which (if the prior P(E)=P(E')=0.5 for simplicity's sake) is given as P(t|E)/{P(t|E)+P(t|E')}. The two are connected but the Bayes factor requires no prior probability of extinction. Both use a prior on P(T_E). See Carlson et al. manuscript for details. 
#' 
#' @references Solow, A., Smith, W., Burgman, M., Rout, T., Wintle, B. and Roberts, D., 2012. Uncertain Sightings and the Extinction of the Ivory-Billed Woodpecker. Conservation Biology, 26(1), pp.180-184.
#' @references Solow, A.R. and Beet, A.R., 2014. On uncertain sightings and inference about extinction. Conservation biology, 28(4), pp.1119-1123.
#'
#' @param DATA Input data should be a data frame with two columns: \code{year} and \code{sighting}. Year records the date of a sighting (though it could be any continuous units of time) and sighting records the quality of the sighting. A "1" is a certain sighting (guaranteed valid) while a "2" or anything higher is uncertain. Uncertain sightings are not necessarily invalid, and researchers may want to differentiate within uncertain sightings by quality. We typically use three categories: 1 is physical evidence or certain expert sightings, 2 are plausible expert sightings, and 3 are dubious or novice sightings without strong evidence.
#' @param inputs Should be an argument formatted as: \code{list(dataStr = "Vini vidivici", T = 1969, posteriorPrior = "Unif")}. The argument sets, in order: the name of the species, the T.bound (the outer bound the model runs to; MUST be later than (T_last + increment2*(T_last-T_first)), and the posterior prior. Options are uniform ("Unif"), linear ("Tri"), or negative exponential ("Exp")
#' @param modelnumber Which model do you want to implement - 1 or 2?
#' @param gamma Gamma sets the slope of the exponential decline if the exponential prior is selected. It defaults to six, from the original implementation with ivory-billed woodpeckers.
#' @param plots Four panel plot of sightings by certainty,  estimated sighting rates over time, priors, and likelihood of extinction date.
#' @param increment Used in numerical integration over omega.
#' @param increment2 Used to estimate posterior pdf. A smaller increment2 allows finer resolution likelihood plots and more precise estimates. We suggest 0.01 for single-species, non-spatial sightings; but the argument defaults to 0.05 for spatExtinct.
#' @param verbose Leave it turned off. Don't turn it on.
#' @keywords Solow \& Beet, bayesian model, extinction date estimator
#' @export
#' @examples
#'
#' data(thylacine)
#' solowbeet(thylacine,inputs=list(dataStr = 'Thylacinus cynocephalus', T=c(2017), posteriorPrior='Unif'),
#'          modelnumber=2,plots=TRUE)
#'



solowbeet2 <- function(data, inputs, modelnumber, gamma=6, plots=FALSE,
	increment=0.01, increment2=0.05, verbose=FALSE,
	testyear=NA){
  ## R conversion: added 'increment' and 'increment2' as arguments, and changed names
  ## 'increment' used in numerical integration over omega
  ## 'increment2' used to estimate posterior pdf
  ## Also added 'modelnumber' argument, rather than having it as part of 'inputs'
  ## R conversion: also added 'gamma' as an argument
  ## plot priors for tauE


  ## (1) Extract basic summary statistics from data
  if(verbose){
		print.noquote("--------------------------------------------------")
    print.noquote(paste("1. Setting up data...", date()))
	}

  DATA <- list(data=data)
  ## certian sighting coded as a 1 in data file
  ## examines data + plots
  indmc <- (data[,2] == 1) ## (**) select all certain sightings
  tMin <- min(data[,1]) ## (**) earliest sighting date. First date on record for real data
  if(sum(indmc) == 0){ ## no certain sightings
     tCert <- tMin ## if no certain sightings. does not mean we assume 1st sighting is certain
  }else{
	  tCert <- max(data[indmc,1]) ## (**) last date seen with certainty
  }

  DATA$tCert <- tCert
  DATA$tMin <- tMin
  n <- nrow(data) ## (**) total number of sightings
  DATA$N <- n

  ## (2) Map times onto [0,1] unit interval
  ## (evaluate p(t|TE))
  if(verbose){
		print.noquote(paste("2. Mapping times onto unit interval...", date()))
	}

  tL <- (tCert-tMin)/(inputs$T - tMin) ## (**) date of last certain sighting on unit interval
  tL <- round((1/increment2)*tL)*increment2 ## (**) makes sure that rates are calculated at
  ## same time points when using either original data or data with no sightings
  ## if all uncertain then tL = 0.
  tLV <- seq(from = (tL+increment), to = 1-(increment/2), by = increment2)
  #tLV <- (increment):increment2:1-(increment/2)
  dVec <- (data[,1]-tMin)/(inputs$T-tMin) #put all dates in unit interval

  ## (3) Empirical estimate of rate
  if(verbose){
  	print.noquote(paste("3. Empirical estimates of rate...", date()))
	}

tmp <- sub.estimate.rate(DATA,increment,inputs)
  rate <- tmp$rate
  rateV <- tmp$rateV

  ## (3) Select model to analyse data
  ## calls the likelihood function to obtain the integrand p(t|tauE)
  if(verbose){
  	print.noquote(paste("4. Calculate likelihood...", date()))
	}

  if((modelnumber != 1) & (modelnumber != 2)){
    error('modelnumber not defined')
  }else{
    p_t_tau_E <- likelihood.all(data = data,tLV = tLV,indmc = indmc,tL = tL,
			increment = increment, n = n, dVec = dVec,
			modelnumber=modelnumber)
  }

  if(is.null(p_t_tau_E)){ ## not extinct
	  Results <- -Inf
  }

  ## (5) Prior distributions
  if(verbose){
	  print.noquote(paste("5. Calculate prior...", date()))
	}

  prior <- as.list(NULL)
  prior$Exp <- (gamma * exp(-gamma*tLV)/(1-exp(-gamma))) ## truncated gamma
  prior$Unif <- rep(1, length(tLV))
  prior$Tri <- (-2 * tLV) + 2
	k <- match(inputs$posteriorPrior, names(prior))
  if(is.na(k)){
    error("No such prior!")
  }else{
    p_tau_E <- prior[[k]]
  }


 ## (6) Posterior distribution
  if(verbose){
	  print.noquote(paste("6. Calculate posterior...", date()))
	}

  ## posterior pdf of T_E, p(tau_e)
  ## posterior pdf, p_tauE_t <- (p_t_tau_E .* p_tau_E) / integral
  ## from bayes p(TauE|t) <- p(t|TauE)*p(tauE)/p(t)
  ## p(t|TauE) <- likelihood
  ## p(tauE) <- prior for extinction
  ## p(t) <- integral of p(t|TauE)*p(tauE)
  ## First part corresponds to Equation 3 in paper - checked
  f <- p_t_tau_E * p_tau_E ## calculate posterior distribution
	p_t_E <- sum(f) * increment2  ## integral of posterior distribution
	p_tauE_t <- f / p_t_E ## normalized posterior
	# p_t_notE <- prob not extinct = prob extinction at time 2010
	p_t_notE <- p_t_tau_E[length(p_t_tau_E)]

  ## (7) Bayes Factor calculation
  ## (corresponds to Equation 2 in the paper - checked)
  if(verbose){
  	print.noquote(paste("7. Calculate Bayes Factor...", date()))
	}

  BayesFactor <- p_t_E / p_t_notE

  ## (8) Plot results and output results
	if(verbose){
		print.noquote(paste("8. Graph and output results...", date()))
	}

  yrplot <- tMin + tLV * (inputs$T - tMin) ## V3: added 27 Jan 2017
  if(plots){
		layout(matrix(1:4, ncol=2))
		par(mar=c(4,4,1,1))
		plot.woodpecker.data(DATA, inputs$dataStr)
		plot.prior(tLV, prior, gamma)
		## plots p(t|tau_e) and rate of change
		plot.rate(rate = rate,rateV = rateV)
	  ## plots p(t|tau_e) and rate of change
	  plot.results(yrplot,integrand = p_t_tau_E)
  }
  
  #NEWCODE
  hell.df <- data.frame(yr=yrplot,post=f)
  p_test_E <- sum(hell.df$post[hell.df$yr<testyear]) * increment2
  p_test_notE <- sum(hell.df$post[hell.df$yr>=testyear]) * increment2
  bayes.test <- p_test_E/p_test_notE
  p.test <- p_test_notE/(p_test_E+p_test_notE)
  
  plotobj <- data.frame(year = yrplot, integrand = round(p_t_tau_E, 6)) ## V3: added 27 Jan 2017
  list(DATA = DATA, Results = c('Bayes factor'=BayesFactor,'Probability of persistence'=p_t_notE/( p_t_notE+p_t_E)), rate = rate, plotobj = plotobj,
       testresults = c('Bayes factor (test)'=bayes.test,'Probability of persistence (test)'=p.test))
}
cjcarlson/spatExtinct documentation built on May 25, 2019, 3:26 p.m.