R/RUI.R

Defines functions getNames getParNames violinPlot densityPlot tracePlot readMCMC BaM

Documented in BaM densityPlot getNames getParNames readMCMC tracePlot violinPlot

#***************************************************************************----
# R User Interface: main functions  ----

#*******************************************************************************
#' Run BaM
#'
#' Run BaM.exe
#'
#' @param mod model object, the model to be calibrated
#' @param data dataset object, calibration data
#' @param remnant list of remnantErrorModel objects.
#'     WARNING: make sure you use a list of length mod$nY (even if mod$nY=1!)
#' @param mcmc mcmcOptions object, MCMC simulation number and options
#' @param cook mcmcCooking object, properties of MCMC cooking (burn and slice)
#' @param summary mcmcSummary object, properties of MCMC summary
#' @param residuals residualOptions object, properties of residual analysis
#' @param pred list of prediction objects, properties of prediction experiments
#' @param doCalib Logical, do Calibration? (mcmc+cooking+summary+residuals)
#' @param doPred Logical, do Prediction?
#' @param na.value numeric, value used for NAs when writing the dataset in BaM format
#' @param run Logical, run BaM? if FALSE, just write config files.
#' @param preClean Logical, start by cleaning up workspace?
#'  Be careful, this will delete all files in the workspace, including old results!
#' @param workspace Character, directory where config and result files are stored.
#' @param dir.exe Character, directory where BaM executable stands.
#' @param name.exe Character, name of the executable without extension ('BaM' by default).
#' @param predMaster_fname Character, name of configuration file pointing to all prediction experiments.
#' @return Nothing: just write config files and runs the executable.
#' @examples
#' # Fitting a rating curve - see https://github.com/BaM-tools/RBaM
#' workspace=tempdir()
#' D=dataset(X=SauzeGaugings['H'],Y=SauzeGaugings['Q'],Yu=SauzeGaugings['uQ'],data.dir=workspace)
#' # Parameters of the low flow section control: activation stage k, coefficient a and exponent c
#' k1=parameter(name='k1',init=-0.5,prior.dist='Uniform',prior.par=c(-1.5,0))
#' a1=parameter(name='a1',init=50,prior.dist='LogNormal',prior.par=c(log(50),1))
#' c1=parameter(name='c1',init=1.5,prior.dist='Gaussian',prior.par=c(1.5,0.05))
#' # Parameters of the high flow channel control: activation stage k, coefficient a and exponent c
#' k2=parameter(name='k2',init=1,prior.dist='Gaussian',prior.par=c(1,1))
#' a2=parameter(name='a2',init=100,prior.dist='LogNormal',prior.par=c(log(100),1))
#' c2=parameter(name='c2',init=1.67,prior.dist='Gaussian',prior.par=c(1.67,0.05))
#' # Define control matrix: columns are controls, rows are stage ranges.
#' controlMatrix=rbind(c(1,0),c(0,1))
#' # Stitch it all together into a model object
#' M=model(ID='BaRatin',
#'         nX=1,nY=1, # number of input/output variables
#'         par=list(k1,a1,c1,k2,a2,c2), # list of model parameters
#'         xtra=xtraModelInfo(object=controlMatrix)) # use xtraModelInfo() to pass the control matrix
#' # Call BaM to write configuration files. To actually run BaM, use run=TRUE,
#' # but BaM executable needs to be downloaded first (use downloadBaM())
#' BaM(mod=M,data=D,run=FALSE,workspace=workspace)
#' @export
#' @importFrom utils write.table
BaM <- function(mod,data,
                remnant=rep(list(remnantErrorModel()),mod$nY),
                mcmc=mcmcOptions(),cook=mcmcCooking(),summary=mcmcSummary(),
                residuals=residualOptions(),pred=NULL,
                doCalib=TRUE,doPred=FALSE,na.value=-9999,
                run=TRUE,preClean=FALSE,
                workspace=file.path(getwd(),'BaM_workspace'),
                dir.exe=file.path(find.package('RBaM'),'bin'),name.exe='BaM',
                predMaster_fname="Config_Pred_Master.txt"
){
  #oooooooooooooooooooooooooooooooooooooooooo
  # Preliminaries
  if(preClean){file.remove(list.files(workspace,full.names=T))}
  # check length(remnantErrorModel)==mod$nY

  #oooooooooooooooooooooooooooooooooooooooooo
  # Write config files
  quickWrite(toString(mod),workspace,mod$fname)
  npar=length(mod$par)
  if(npar>0){
    for (i in 1:npar){
      p=mod$par[[i]]
      if(is.parameter_VAR(p)){
        writeConfig.parameter_VAR(workspace,paste0('Config_',p$name,'_VAR.txt'),p)
      }
    }
  }
  writeConfig.xtra(workspace,mod)
  quickWrite(toString(data),workspace,data$fname)
  quickWrite(toString(mcmc),workspace,mcmc$fname)
  quickWrite(toString(cook),workspace,cook$fname)
  quickWrite(toString(summary),workspace,summary$fname)
  quickWrite(toString(residuals),workspace,residuals$fname)
  for(i in 1:length(remnant)){
    quickWrite(toString(remnant[[i]]),workspace,remnant[[i]]$fname)
  }
  #oooooooooooooooooooooooooooooooooooooooooo
  # Predictions
  if(!is.null(pred)){
    # Individual prediction experiments
    if(is.prediction(pred)){ # a single prediction experiment
      npred=1
      quickWrite(toString(pred),workspace,pred$fname)
      writePredInputs(pred)
      parSamples=pred$parSamples
    } else { # a list of several prediction experiments
      npred=length(pred)
      parSamples=pred[[1]]$parSamples
      for(i in 1:npred){
        # Verify that all prediction experiments use the same parSamples
        # (possibly the MCMC-generated one when parSamples==NULL)
        if(!identical(pred[[i]]$parSamples,parSamples)){
          stop("All prediction experiments should use the same `parSamples` (possibly the MCMC-generated one when `parSamples`==NULL)",call.=FALSE)
        }
        quickWrite(toString(pred[[i]]),workspace,pred[[i]]$fname)
        writePredInputs(pred[[i]])
      }
    }
    # Master prediction file
    expFiles=getNames(pred,name='fname')
    if(any(duplicated(expFiles))){
      warning('Duplicated prediction experiments.')
    }
    val <- vector(mode='list',length=npred+1)
    val[[1]] <- npred
    for(i in 1:npred){val[[i+1]] <- expFiles[i]}
    comment <- c('Number of prediction experiments',
                 paste0('Config file for experiment ',1:npred))
    txt <- toString_engine(val,comment)
    quickWrite(txt=txt,dir=workspace,fname=predMaster_fname)
  }
  runO <- runOptions(doMCMC=doCalib,doSummary=doCalib,
                  doResiduals=doCalib,doPrediction=doPred)
  quickWrite(toString(runO),workspace,runO$fname)

  #oooooooooooooooooooooooooooooooooooooooooo
  # Write data file
  foo=data$data
  foo[is.na(foo)] <- na.value
  utils::write.table(foo,sep='\t',quote=FALSE,
                     file=data$data.file,row.names=FALSE)

  #oooooooooooooooooooooooooooooooooooooooooo
  # General controller
  rem.txt <- c()
  for(i in 1:length(remnant)){rem.txt <- c(rem.txt,remnant[[i]]$fname)}
  val <- list(paste0(workspace,.Platform$file.sep),
              runO$fname,mod$fname,mod$xtra$fname,
              data$fname,rem.txt,mcmc$fname,cook$fname,
              summary$fname,residuals$fname,predMaster_fname)
  comment <- c('Workspace','Config file: run options','Config file: model',
               'Config file: xtra model information','Config file: Data',
               'Config file: Remnant sigma (as many files as there are output variables separated by commas)',
               'Config file: MCMC','Config file: cooking of MCMC samples',
               'Config file: summary of MCMC samples','Config file: residual analysis',
               'Config file: prediction experiments (master file)')
  txt <- toString_engine(val,comment)
  quickWrite(txt=txt,dir=dir.exe,fname="Config_BaM.txt")

  #oooooooooooooooooooooooooooooooooooooooooo
  # Run exe

  # If a prediction provides its own parSamples, need to backup-overwrite-restore cooked MCMC file
  if(!is.null(pred)){
    if(!is.null(parSamples)){
      # backup
      invisible(file.copy(from=file.path(workspace,cook$result.fname),
                          to=file.path(workspace,paste0('BACKUP_',cook$result.fname))))
      # overwrite
      utils::write.table(parSamples,file.path(workspace,cook$result.fname),row.names=FALSE)
    }
  }

  res=0
  if(run){
    ok=foundBaM(exedir=dir.exe,exename=name.exe)
    if(!ok){
      message(paste0('BaM executable was not found in folder: ',
                     dir.exe,'. Call function downloadBaM() to download it.'))
      return()
    }
    res=try(runExe(exedir=dir.exe,exename=name.exe))
  }
  if(res!=0){stop('BaM executable crashed with error code: ',res,call.=FALSE)}

  # If a prediction provides its own parSamples, need to cleanup
  if(!is.null(pred)){
    if(!is.null(parSamples)){
      # restore
      invisible(file.copy(to=file.path(workspace,cook$result.fname),
                          from=file.path(workspace,paste0('BACKUP_',cook$result.fname)),
                          overwrite=TRUE))
      # cleanup
      invisible(file.remove(file.path(workspace,paste0('BACKUP_',cook$result.fname))))
    }
  }
}

#*******************************************************************************
#' MCMC Reader
#'
#' Read raw MCMC samples, return cooked (burnt & sliced) ones
#'
#' @param file Character, full path to MCMC file.
#' @param burnFactor Numeric, burn factor. 0.1 means the first 10% iterations
#'    are discarded.
#' @param slimFactor Integer, slim factor. 10 means that only one iteration
#'    every 10 is kept.
#' @param sep Character, separator used in MCMC file.
#' @param reportFile Character, full path to pdf report file, not created if NULL
#' @param panelPerCol Integer, max number of panels per column
#' @param panelHeight Numeric, height of each panel
#' @param panelWidth Numeric, width of each panel
#' @return A data frame containing the cooked mcmc samples.
#' @examples
#'# Create Monte Carlo samples and write them to file
#' n=4000
#' sim=data.frame(p1=rnorm(n),p2=rlnorm(n),p3=runif(n))
#' workspace=tempdir()
#' write.table(sim,file=file.path(workspace,'MCMC.txt'),row.names=FALSE)
#' # Read file, burn the first half and keep every other row
#' M=readMCMC(file=file.path(workspace,'MCMC.txt'),burnFactor=0.5,slimFactor=2)
#' dim(M)
#' @export
readMCMC <- function(file='Results_Cooking.txt',burnFactor=0,slimFactor=1,sep='',
                     reportFile=NULL,
                     panelPerCol=10,panelHeight=3,panelWidth=23/panelPerCol){
  # read file
  raw <- utils::read.table(file,header=T,sep=sep)
  n <- NROW(raw);p <- NCOL(raw)
  keep <- seq(max(floor(n*burnFactor),1),n,slimFactor)
  cooked <- raw[keep,]
  if(!is.null(reportFile)){
    if (requireNamespace("gridExtra", quietly = TRUE)) {
      ncol <- ifelse(p>=panelPerCol,panelPerCol,p)
      grDevices::pdf(file=reportFile,width=ncol*panelWidth,
          height=ceiling(p/panelPerCol)*panelHeight,useDingbats=FALSE)
      gList <- vector("list",p)
      colors <- rep('black',p)
      for(i in 1:p){
        gList[[i]] <- tracePlot(sim=raw[,i],ylab=names(raw)[i],
                                keep=keep,col=colors[i])
      }
      gridExtra::grid.arrange(grobs=gList,ncol=ncol)
      grDevices::dev.off()
    } else {
      warning(paste('Package gridExtra is required for reporting.',
              'Report file is not created'))
    }
  }
  return(cooked)
}

#***************************************************************************----
# Plotting functions  ----

#*******************************************************************************
#' MCMC reporting
#'
#' 2DO (adapt from STooDs): Generate pdf report files summarizing mcmc samples
#'

#*******************************************************************************
#' tracePlot
#'
#' returns a trace plot ggplot (or a list thereof if several columns in sim)
#'
#' @param sim vector or matrix or data frame, MCMC simulations
#' @param ylab Character, label of y-axis to be used if sim has no names
#' @param keep Integer vector, indices of samples to be kept in cooked MCMC sample
#' @param col Color
#' @param psize Numeric, point size
#' @return A ggplot (or a list thereof if several columns in sim)
#' @examples
#' # Create Monte Carlo samples
#' n=1000
#' sim=data.frame(p1=rnorm(n),p2=rlnorm(n),p3=runif(n))
#' # create trace plot for each component
#' figures=tracePlot(sim)
#' @export
#' @import ggplot2
#' @importFrom rlang .data
tracePlot <- function(sim,ylab='values',keep=NULL,col='black',psize=0.5){
  p <- NCOL(sim)
  g <- vector("list",p)
  yl <- names(sim)
  if(is.null(yl)) {yl <- rep(ylab,p)}
  for(i in 1:p){
    DF <- data.frame(x=1:NROW(sim),y=as.data.frame(sim)[,i])
    if(is.null(keep)){lcol <- col} else {lcol <- 'lightgray'}
    g[[i]] <- ggplot()+
      scale_x_continuous('Iteration')+
      scale_y_continuous(yl[i])+
      geom_line(data=DF,aes(x=.data$x,y=.data$y),col=lcol)
    if(!is.null(keep)){
      g[[i]] <- g[[i]] + geom_point(data=DF[keep,],aes(x=.data$x,y=.data$y),col=col,size=psize) +
        geom_vline(xintercept=keep[1],col='red')
    }
    g[[i]] <- g[[i]] + theme_bw()
  }
  if(p==1){return(g[[1]])} else {return(g)}
}

#*******************************************************************************
#' densityPlot
#'
#' returns a histogram+density ggplot (or a list thereof if several columns in sim)
#'
#' @param sim vector or matrix or data frame, MCMC simulations
#' @param xlab Character, label of x-axis to be used if sim has no names
#' @param col Color
#' @return A ggplot (or a list thereof if several columns in sim)
#' @examples
#' # Create Monte Carlo samples
#' n=1000
#' sim=data.frame(p1=rnorm(n),p2=rlnorm(n),p3=runif(n))
#' # create density plot for each component
#' figures=densityPlot(sim)
#' @export
#' @import ggplot2
#' @importFrom rlang .data
#' @importFrom stats density
densityPlot <- function(sim,xlab='values',col='black'){
  p <- NCOL(sim)
  g <- vector("list",p)
  xl <- names(sim)
  if(is.null(xl)) {xl <- rep(xlab,p)}
  for(i in 1:p){
    DF <- data.frame(val=as.data.frame(sim)[,i])
    g[[i]] <- ggplot(DF,aes(.data$val))+
      scale_x_continuous(xl[i])+
      scale_y_continuous('posterior pdf')+
      geom_histogram(aes(y=after_stat(density)),fill=NA,col='black',
                     binwidth=(max(DF$val)-min(DF$val))/30)+
      geom_density(fill=col,alpha=0.5,col=NA)

    g[[i]] <- g[[i]] + theme_bw()
  }
  if(p==1){return(g[[1]])} else {return(g)}
}

#*******************************************************************************
#' violinPlot
#'
#' returns a violinplot ggplot
#'
#' @param sim vector or matrix or data frame, MCMC simulations
#' @param ylab Character, label of y-axis
#' @param col Color
#' @return A ggplot
#' @examples
#' # Create Monte Carlo samples
#' n=1000
#' sim=data.frame(p1=rnorm(n),p2=rlnorm(n),p3=runif(n))
#' # create violin plot comparing all components
#' figure=violinPlot(sim)
#' @export
#' @import ggplot2
#' @importFrom rlang .data
violinPlot <- function(sim,ylab='values',col='black'){
  if (requireNamespace("tidyr",quietly=TRUE)) {
    DF=tidyr::gather(as.data.frame(sim))
    DF$key2 <- stats::reorder(DF$key, 1:NROW(DF)) # avoids violins being re-ordered alphabetically
    g <- ggplot(DF,aes(x=.data$key2,y=.data$value))+
      geom_violin(fill=col)+
      scale_x_discrete('Index',labels=1:NCOL(sim))+
      scale_y_continuous(ylab)
    g <- g + theme_bw()
    return(g)
  } else {
    warning('Package tidyr is required to create this plot.')
  }
}

#***************************************************************************----
# Catalogue ----

#*******************************************************************************
#' Get parameter names
#'
#' Get parameter names for a distribution d
#'
#' @param d Character (possibly vector), distribution (possibly distributions)
#' @return A character vector with parameter names.
#' @examples
#' parnames <- getParNames('GEV')
#' npar <- length(getParNames('Gumbel'))
#' @export
getParNames<-function(d){
  names=c()
  for(i in 1:length(d)){
    name=switch(d[i],
                'Gaussian'=c('mean','sd'),
                'Uniform'=c('lowBound','highBound'),
                'Triangle'=c('peak','lowBound','highBound'),
                'LogNormal'=c('meanlog','sdlog'),
                'LogNormal3'=c('threshold','meanlogexcess','sdlogexcess'),
                'Exponential'=c('threshold','scale'),
                'GPD'=c('threshold','scale','shape'),
                'Gumbel'=c('location','scale'),
                'GEV'=c('location','scale','shape'),
                'GEV_min'=c('location','scale','shape'),
                'Inverse_Chi2'=c('dof','scale'),
                'PearsonIII'=c('location','scale','shape'),
                'Geometric'=c('prob'),
                'Poisson'=c('rate'),
                'Bernoulli'=c('prob'),
                'Binomial'=c('prob','ntrials'),
                'NegBinomial'=c('prob','nfails'),
                'FlatPrior'=c(),
                'FlatPrior+'=c(),
                'FlatPrior-'=c(),
                'FIX'=c(),
                'VAR'=c(),
                NA)
    names=c(names,name)
  }
  return(names)
}

#***************************************************************************----
# Misc. ----

#*******************************************************************************
#' Get object names
#'
#' getNames from an object or a list of objects having a $name field (e.g. parameters)
#' @param loo List Of Objects
#' @param name character, string denoting the name field
#' @return A character vector containing names
#' @examples
#' pars <- list(parameter(name='par1',init=0),
#'              parameter(name='par2',init=0),
#'              parameter(name='Third parameter',init=0))
#' getNames(pars)
#' @export
getNames<-function(loo,name='name'){
  if(is.null(loo)) {return(NULL)}
  if(!is.null(loo[[name]])){return(loo[[name]])}
  n=length(loo)
  if(n==0){return(NULL)}
  txt=vector("character",n)
  for(i in 1:n){
    foo=tryCatch(loo[[i]][[name]],error=function(e){NaN})
    if(is.null(foo)){txt[i]=NaN} else {txt[i]=foo}
  }
  return(txt)
}
BaM-tools/RBaM documentation built on April 11, 2025, 10:01 p.m.