R/Palaeodose_Computation.R

Defines functions Palaeodose_Computation

Documented in Palaeodose_Computation

#' Bayesian analysis for the palaeodose estimation of various samples
#'
#' This function computes the palaeodose (in Gy) of one or various samples according to the model developed in Combes et al (2015),
#' based on an output of \code{\link{Generate_DataFile}} or \code{\link{Generate_DataFile_MG}}
#' or both of them using \code{\link{combine_DataFiles}}.\cr
#' Samples, for which data is avalilable in several BIN files, can be analysed.\cr
#' Single-grain or Multi-grain OSL measurements can be analysed simultaneouly.
#'
#' @param DATA list of objects: LT, sLT, ITimes, dLab, ddot_env, regDose, J, K, Nb_measurement,
#' provided by \code{\link{Generate_DataFile}} or \code{\link{Generate_DataFile_MG}}.
#' \code{DATA} contains information for more than one sample.
#' @param SampleNames character vector: names of sample. The length of this vector is equal to \code{Nb_sample}.
#' @param Nb_sample integer: number of samples.
#' @param BinPerSample integer vector (with default): vector with the number of BIN files per sample.
#' The length of this vector is equal to \code{Nb_sample}.
#' \code{BinPerSample}[i] correponds to the number of BIN files for the sample whose number ID is equal to \code{i}.
#' For more information to fill this vector, we refer to detatils in \code{\link{Generate_DataFile}} or \code{\link{Generate_DataFile_MG}}.
#' @param SavePdf boolean (with default): if TRUE save graph in pdf file named \code{OutputFileName} in folder \code{OutputFilePath}.
#' @param OutputFileName character (with default): name of the pdf files that will be generated by the function.
#' @param OutputFilePath character (with default): path to the pdf files that will be generated by the function.
#' @param SaveEstimates boolean (with default): if TRUE save Bayes estimates and credible interval at level 68% and 95%,
#' in a csv table named \code{OutputFileName} in folder \code{OutputFilePath}.
#' @param OutputTableName character (with default): name of the table that will be generated by the function if \code{SaveEstimates}=TRUE.
#' @param OutputTablePath character (with default): path to the table that will be generated by the function if \code{SaveEstimates}=TRUE.
#' @param LIN_fit logical (with default): if TRUE (default) allows a linear component,
#' on top of the (default) saturating exponential curve, for the fitting of dose response curves.
#' Please see details for more informations on the proposed dose response curves.
#' @param Origin_fit logical (with default): if TRUE, forces the dose response curves to pass through the origin.
#' Please see details for more informations on the proposed growth curves.
#' @param distribution character (with default): type of distribution that defines
#' how individual equivalent dose values are distributed around the palaeodose.
#' Allowed inputs are \bold{"cauchy"}, \bold{"gaussian"}, \bold{"lognormal_A"} and \bold{"lognormal_M"}.
#' @param Iter integer (with default): number of iterations for the MCMC computation (for more information see \code{\link{jags.model}}).
#' @param t integer (with default): 1 every \code{t} iterations of the MCMC is considered for sampling the posterior distribution
#' (for more information see \code{\link{jags.model}}).
#' @param n.chains integer (with default): number of independent chains for the model (for more information see \code{\link{jags.model}}).
#'
#' @details
#'
#' \bold{** Option on growth curves **}
#'
#' As for \code{\link{Age_Computation}} and \code{\link{AgeS_Computation}}, the user can choose from 4 dose response curves:
#' \itemize{
#'   \item \bold{Saturating exponential plus linear growth} (\code{PalaeodosesMultiBF_EXPLIN}):
#'
#'   for all \code{x} in IR+, \code{f(x)=a(1-exp(-x/b))+cx+d}; select
#'   \itemize{
#'     \item \code{LIN_fit=TRUE}
#'     \item \code{Origin_fit=FALSE}
#'   }
#'   \item \bold{Saturating exponential growth} (\code{PalaeodosesMultiBF_EXP}):
#'
#'   for all \code{x} in IR+, \code{f(x)=a(1-exp(-x/b))+d}; select
#'   \itemize{
#'     \item \code{LIN_fit=FALSE}
#'     \item \code{Origin_fit=FALSE}
#'   }
#'   \item \bold{Saturating exponential plus linear growth and fitting through the origin} (\code{PalaeodosesMultiBF_EXPLINZO}):
#'
#'   for all \code{x} in IR+, \code{f(x)=a(1-exp(-x/b))+cx}; select
#'   \itemize{
#'     \item \code{LIN_fit=TRUE}
#'     \item \code{Origin_fit=TRUE}
#'   }
#'   \item \bold{Saturating exponential growth and fitting through the origin} (\code{PalaeodosesMultiBF_EXPZO}):
#'
#'   for all \code{x} in IR+, \code{f(x)=a(1-exp(-x/b))}; select
#'   \itemize{
#'     \item \code{LIN_fit=FALSE}
#'     \item \code{Origin_fit=TRUE}
#'   }
#' }
#'
#' \bold{** Option on equivalent dose distribution around the palaeodose **}
#'
#' The use can choose between :
#' \itemize{
#'   \item \code{cauchy}: a Cauchy distribution with location parameter equal to the palaeodose of the sample
#'   \item \code{gaussian}: a Gaussian distribution with mean equal to the palaeodose of the sample
#'   \item \code{lognormal_A}: a log-normal distribution with mean or \bold{A}verage equal to the palaeodose of the sample
#'   \item \code{lognormal_M}: a log-normal distribution with \bold{M}edian equal to the palaeodose of the sample
#' }
#'
#' @return
#' \bold{NUMERICAL OUTPUT}\cr
#'
#' \enumerate{
#'  \item \bold{A list containing the following objects:}
#'  \itemize{
#'   \item \bold{Sampling} that corresponds to a sample of the posterior distributions
#'  of palaeodose and equivalent dose dispersion parameters (both in Gy).
#'   \item \bold{Model_GrowthCurve}, stating which dose response fitting option was chosen;
#'   \item \bold{Distribution}, stating which distribution was chosen to model the dispersion of
#'  individual equivalent dose values around the palaeodose of the sample.
#'  }
#'  \item\bold{The Gelman and Rubin test of convergency}: prints the result of the Gelman and Rubin test of convergency for palaeodose and equivalent dose dispersion parameters for each sample.
#' A result close to one is expected.\cr
#' In addition, the user must visually assess the convergency of the trajectories by looking at the pdf file
#' generated by the function (see \bold{PLOT OUTPUT} for more informations).\cr
#' If both convergencies (Gelman and Rubin test and plot checking) are satisfactory,
#' the user can consider the printed estimates as valid. Otherwise, the user may try increasing the number of MCMC interations
#' (\code{Iter}) to reach convergency.
#'  \item\bold{Credible intervals and Bayes estimates}: prints the Bayes esitmates, the credible intervals at 95% and 68% for
#' the palaeodose and equivalent dose dispersion parameters for each sample.
#' }
#'
#' \bold{PLOT OUTPUT}
#'
#' \enumerate{
#'  \item\bold{MCMC trajectories}
#' A graph with the MCMC trajectories and posterior distributions of the palaeodose and equivalent dose dispersion parameters
#' is displayed, there is one page per sample. \cr
#' The first line of the figure correponds to the palaeodose parameter and the second to the equivalent dose dispersion parameter.
#' On each line, the plot on the left represents the MCMC trajectories, and the one on the right the posterior distribution of the parameter.
#'  \item \bold{Summary of palaeodose estimates}: plot credible intervals and Bayes estimate of each sample palaeodose on a same graph.
#' }
#' To give result in a publication, we recommend to give the Bayes estimate of the parameters as well as the credible interval at 95% or 68%.
#'
#' @author Claire Christophe, Sebastian Kreutzer, Anne Philippe, Guillaume Guérin
#'
#' @note Please note that the initial values for all MCMC are currently all the same for all chains since we rely on the automatic
#' initial value generation of JAGS. This is not optimal and will be changed in future. However, it does not affect the quality
#' of the age estimates if the chains have converged.
#'
#' @seealso
#' \code{\link{Generate_DataFile}}, \code{\link{Generate_DataFile_MG}}, \code{\link{combine_DataFiles}},
#' \code{\link{rjags}}, \code{\link{plot_MCMC}},
#' \code{\link{Age_Computation}}, \code{\link{AgeS_Computation}}
#'
#' @examples
#' ## Load data
#' data(DATA1,envir = environment())
#' ## Palaeodose computation of samples GDB3
#' P=Palaeodose_Computation(DATA=DATA1,Nb_sample=1,SampleNames=c("GDB5"),Iter=100)
#'
#' @references
#' Combes, B., Philippe, A., Lanos, P., Mercier, N., Tribolo, C., Guerin, G., Guibert, P., Lahaye, C., 2015.
#' A Bayesian central equivalent dose model for optically stimulated luminescence dating.
#' Quaternary Geochronology 28, 62-70. doi:10.1016/j.quageo.2015.04.001
#'
#' @export
Palaeodose_Computation<-function(
  DATA,
  SampleNames,
  Nb_sample,
  BinPerSample = rep(1, Nb_sample),
  SavePdf = FALSE,
  OutputFileName = c('MCMCplot'),
  OutputFilePath = c(''),
  SaveEstimates = FALSE,
  OutputTableName = c("DATA"),
  OutputTablePath = c(''),
  LIN_fit = TRUE,
  Origin_fit = FALSE,
  distribution = c("cauchy"),
  Iter = 50000,
  t = 5,
  n.chains = 3
){

  PriorPalaeodose=c(0,400)

  #--Index preparation
  CSBinPerSample=cumsum(BinPerSample)
  LengthSample=c()
  for(ns in 1:Nb_sample){
    LengthSample=c(LengthSample,length(DATA$LT[[ns]][,1]))
  }
  CSLengthSample=c()
  CSLengthSample=c(0,cumsum(LengthSample))
  index2=c(0,cumsum(DATA$J))

  #--- File preparation
  LT=matrix(data=0,nrow=sum(DATA$J),ncol=(max(DATA$K)+1))
  sLT=matrix(data=0,nrow=sum(DATA$J),ncol=(max(DATA$K)+1))
  IrrT=matrix(data=0,nrow=sum(DATA$J),ncol=(max(DATA$K)))
  for(ns in 1:Nb_sample){
    LT[seq(CSLengthSample[ns]+1,CSLengthSample[ns+1],1),1:length(DATA$LT[[ns]][1,])]<-DATA$LT[[ns]]
    sLT[seq(CSLengthSample[ns]+1,CSLengthSample[ns+1],1),1:length(DATA$sLT[[ns]][1,])]<-DATA$sLT[[ns]]
    IrrT[seq(CSLengthSample[ns]+1,CSLengthSample[ns+1],1),1:length(DATA$ITimes[[ns]][1,])]<-DATA$ITimes[[ns]]
  }

  #--- BUG file selection
  Model_Palaeodose<-0
  data(Model_Palaeodose,envir = environment())

  if(LIN_fit==TRUE){
    cLIN=c('LIN')
  }else{cLIN=c()}
  if(Origin_fit==TRUE){
    cO=c("ZO")
  }else{cO=c()}
  Model_GrowthCurve=c(paste("PalaeodosesMultiBF_EXP",cLIN,cO,sep=""))

  #--- JAGS run
  dataList = list('N'= LT,'sN'=sLT,"IT"=IrrT,
                  "sDlab"=DATA$dLab[1,],
                  'J'=DATA$J,
                  'K'=DATA$K,
                  "I"=Nb_sample,
                  "index"=index2,
                  "xbound"=PriorPalaeodose,
                  "BinPerSample"=BinPerSample,
                  "CSBinPerSample"=CSBinPerSample)

  ##set connection
  con <- textConnection(Model_Palaeodose[[Model_GrowthCurve]][[distribution]])

  jags <-
    rjags::jags.model(
      file =con,
      data = dataList,
      n.chains = n.chains,
      n.adapt = Iter
    )

  ##close text connection
  close(con)

  update(jags,Iter)
  echantillon =  rjags::coda.samples(jags,c("D","sD"),min(Iter,10000),thin=t)

  sample=echantillon[[1]]
  for(i in 2:n.chains){
    sample=rbind(sample,echantillon[[i]])
  }

  ##MCMC plot
  if(SavePdf){
    pdf(file=paste(OutputFilePath,OutputFileName[1],'.pdf',sep=""))
  }

  plot_MCMC(
    echantillon,
    sample_names = SampleNames)

  if(SavePdf){
  dev.off()
  }

  ##--- Graphical interpretation, and print result

  ##- Gelman and Rubin test of convergency of the MCMC
  CV=gelman.diag(echantillon,multivariate=FALSE)
  cat(paste("\n\n>> Results of the Gelman and Rubin criterion of convergence <<\n"))
  for(i in 1:Nb_sample){
    cat("----------------------------------------------\n")
    cat(paste(" Sample name: ", SampleNames[i],"\n"))
    cat("---------------------\n")
    cat(paste("\t\t", "Point estimate", "Uppers confidence interval\n"))
    cat(paste(paste("D_",SampleNames[i],sep=""),"\t",round(CV$psrf[i,1],2),"\t\t",round(CV$psrf[i,2],2),"\n"))
    cat(paste(paste("sD_",SampleNames[i],sep=""),"\t",round(CV$psrf[(Nb_sample+i),1],2),"\t\t",round(CV$psrf[(Nb_sample+i),2],2),"\n"))
  }

  cat("\n\n---------------------------------------------------------------------------------------------------\n")
  cat(" *** WARNING: The following information are only valid if the MCMC chains have converged  ***\n")
  cat("---------------------------------------------------------------------------------------------------\n\n")
  # Matrix of results
  rnames=rep(NA,2*Nb_sample)
  for(i in 1:Nb_sample){
    rnames[i]=c(paste("D_",SampleNames[i],sep=""))
    rnames[Nb_sample+i]=c(paste("sD_",SampleNames[i],sep=""))
  }
  R=matrix(data=NA,ncol=8,nrow=2*Nb_sample,
           dimnames=list(rnames,c("lower bound at 95%","lower bound at 68%","Bayes estimate","upper bound at 68%","upper bound at 95%",
                                  "","Convergencies: Point estimate","Convergencies: uppers confidence interval")))

  ##- Bayes estimate and credible interval
  cat(paste("\n\n>> Bayes estimates of Age, Palaeodose and its dispersion for each sample and credible interval <<\n"))
  PalaeodosePlot95=matrix(data=NA,nrow=Nb_sample,ncol=3)
  PalaeodosePlot68=matrix(data=NA,nrow=Nb_sample,ncol=3)
  PalaeodosePlotMoy=rep(0,Nb_sample)
  for(i in 1:Nb_sample){
    cat("----------------------------------------------\n")
    cat(paste(" Sample name: ", SampleNames[i],"\n"))
    cat("---------------------\n")

    cat(paste("Parameter", "\t","Bayes estimate","\t"," Credible interval \n"))
    cat(paste(paste("D_",SampleNames[i],sep=""),"\t",round(mean(sample[,i]),3),'\n'))
    cat("\t\t\t\t\t\t lower bound \t upper bound\n")
    HPD_95=CredibleInterval(sample[,i],0.95)
    HPD_68=CredibleInterval(sample[,i],0.68)
    cat("\t\t\t\t at level 95% \t",round(c(HPD_95[2]),2),"\t\t",round(c(HPD_95[3]),2),"\n")
    cat("\t\t\t\t at level 68% \t",round(c(HPD_68[2]),2),"\t\t",round(c(HPD_68[3]),2),"\n")
    PalaeodosePlot95[i,]=HPD_95
    PalaeodosePlot68[i,]=HPD_68
    PalaeodosePlotMoy[i]=round(mean(sample[,i]),2)
    R[i,3]=round(mean(sample[,i]),3)
    R[i,c(1,5)]=round(HPD_95[2:3],3)
    R[i,c(2,4)]=round(HPD_68[2:3],3)


    cat(paste("\nParameter", "\t","Bayes estimate","\t"," Credible interval \n"))
    cat(paste(paste("sD_",SampleNames[i],sep=""),"\t",round(mean(sample[,(Nb_sample+i)]),3),'\n'))
    cat("\t\t\t\t\t\t lower bound \t upper bound\n")
    HPD_95=CredibleInterval(sample[,(Nb_sample+i)],0.95)
    HPD_68=CredibleInterval(sample[,(Nb_sample+i)],0.68)
    cat("\t\t\t\t at level 95% \t",round(c(HPD_95[2]),2),"\t\t",round(c(HPD_95[3]),2),"\n")
    cat("\t\t\t\t at level 68% \t",round(c(HPD_68[2]),2),"\t\t",round(c(HPD_68[3]),2),"\n")

    R[Nb_sample+i,3]=round(mean(sample[,(Nb_sample+i)]),3)
    R[Nb_sample+i,c(1,5)]=round(HPD_95[2:3],3)
    R[Nb_sample+i,c(2,4)]=round(HPD_68[2:3],3)

  }
  R[,c(7,8)]=round(CV$psrf,2)

  cat("\n----------------------------------------------\n")

  if(SaveEstimates==TRUE){
    write.csv(R,file=c(paste(OutputTablePath,"Estimates",OutputTableName,".csv",sep="")))
  }

  Info=list("Sampling"=echantillon,"Model_GrowthCurve"=Model_GrowthCurve, "Distribution"=distribution)
  return(Info)

}
R-Lum/BayLum documentation built on April 19, 2024, 9:33 a.m.