R/core.R

Defines functions est_sbetel eval_sbetel init_sbetel

Documented in est_sbetel eval_sbetel init_sbetel

#'@useDynLib sbetel, .registration = TRUE
#'@importFrom Rcpp evalCpp

#' @title Initialize a smoothed BETEL model
#' @description Initialize a smoothed BETEL model by providing the model defining 
#' function \code{g()}, the prior generating function \code{prior_fun()} and the 
#' data \code{y} to be used for the estimation of the model. Alternatively 
#' ready-made functions can be used (e.g. by setting \code{g = "var"} and 
#' \code{prior_fun() = "var"}). \code{init_sbetel()} returns 
#' a list that defines the model and can be passed on either to \code{eval_sbetel()} or 
#' \code{est_sbetel()}.
#' 
#' #' @param y A data matrix. Input for \code{g()} below. E.g. for \code{g = "var"} this
#' would be a \code{T x k} matrix with \code{T} observations of \code{k} variables.
#' @param g A function of the form \code{g(th, y, args)} defining the model.
#' Inputs should include a numerical parameter vector \code{th}, a data matrix 
#' \code{y} and optional further arguments through a list called \code{args}.
#' Output shoud be a numerical matrix with its columns corresponding to the sample
#' moment conditions of the model. Defaults to \code{"var"} in which case a 
#' ready-made function for vector autoregressive models is used. For more, 
#' see details.
#' @param prior_fun A function that defines the prior data generating process. 
#' The only inputs should be the model generated by \code{init_sbetel()}. Further 
#' arguments to this function should be passed through \code{model$args}. 
#' Output should be a list with to elements called \code{yy} for dependent 
#' variables and \code{xx} for explanatory variables or a data matrix with the 
#' same number of columns as \code{y}, depending on the model.
#' @param bw Integer \code{(>= 0)}. The bandwidth parameter controlling smoothing 
#' of the moment conditions. Defaults to "auto" in which case the optimal parameter 
#' value is chosen according to \code{bwAndrews()}. For highly autocorrelated moment 
#' conditions higher values of \code{bw} are needed, while for independent data this 
#' should be set to zero, in which case no smoothing takes place. For more, see details.
#' @param initial A function that returns an initial estimate for the parameters 
#' and their covariance matrix as a list with two elements called \code{th} and \code{cov}. 
#' By default OLS estimates are used and should be used whenever possible.
#' @param args A list containing optional further arguments to \code{g()} and 
#' \code{prior_fun()}. For a var model, the lag length can be set for example 
#' by \code{args = list(p = 5)}. For more, see details.
#' @param verbose Logical. Defaults to \code{TRUE} in which case messages are produced.
#' @return \code{init_sbetel()} returns a list that defines a sbetel model 
#' and can be passed on to \code{eval_sbetel()} or \code{est_sbetel()}.
#' @details TBA
#' @examples
#'\dontrun{
#' model <- init_sbetel(y = y, bw = 1, args = list(p = 2))
#'}
#' @export
init_sbetel <- function(y,
                        g = c("var", "svar")[1],
                        prior_fun = c("var", "svar")[1],
                        bw = "auto",
                        initial = NULL,
                        args = list(),
                        verbose = TRUE
                        ) {
  
  if(is.character(g)) {
    
    if(g == "var") {
      g <- sbetel:::g_var
      type <- "var"
      if(verbose == TRUE) cat("Initializing reduced form vectorautoregressive model (g = 'var')... \n")
      
      if(is.null(args$constant)) args$constant <- TRUE
      
      if(is.null(args$p)) {
        args$p <- 1
        if(verbose == TRUE) cat("Lag length ('args$p') not selected. Defaults to 1. \n")
      }
      
      args$xy <- sbetel:::build_xy_var(y, args$p)
      
      if(is.null(initial)) initial <- sbetel:::initial_var
      if(verbose == TRUE) cat("OLS estimates will be used as initial values. \n")
      
      if(bw == "auto") {
        g_gmm <- function(th, x) {
          g(th = th, y = y, args = args)
        }
        if(verbose == TRUE) cat("Choosing the optimal bandwidth parameter... \n")
        gmm_fs <- gmm::gmm(g_gmm, y, t0 = initial(args$xy)$th, type = "twoStep", 
                           wmatrix = "ident", optfct = "nlminb")
        bw <- sandwich::bwAndrews(gmm_fs, kernel = "Bartlett", prewhite = 0)
        bw <- floor(bw/2)
        if(verbose == TRUE) cat(paste0("bw = ", bw, "\n"))
      }
      
      if(is.character(prior_fun)) {
        if(prior_fun == "var") {
          prior_fun <- sbetel:::prior_fun_var
          if(is.null(args$stat)) {
            args$stat <- rep(0, ncol(y))
            if(verbose == TRUE) cat("Prior mean for first own lags (args$stat) not selected. Defaults to 0. \n")
          } 
        } else {
          stop("For g = 'var', 'prior_fun' needs to be either a function or a character string 'var'.")
        }
      }
      
    } else if(g == "svar") {
      g <- sbetel:::g_svar
      type <- "svar"
      if(verbose == TRUE) cat("Initializing structural vectorautoregressive model (g = 'svar')... \n")
      
      if(is.null(args$constant)) args$constant <- TRUE
      
      if(is.null(args$p)) {
        args$p <- 1
        if(verbose == TRUE) cat("Lag length ('args$p') not selected. Defaults to 1. \n")
      }
      
      args$xy <- sbetel:::build_xy_var(y, args$p)
      
      if(is.null(args$stat)) {
        args$stat <- rep(0, ncol(y))
        if(verbose == TRUE) cat("Prior mean for first own lags (args$stat) not selected. Defaults to 0. \n")
      } 
      args$epsilon <- rep(NA, ncol(y))
      for(i in 1:ncol(y)) args$epsilon[i] <- sqrt(ar(y[,i], aic = F, order.max = 1)$var.pred)
      if(is.null(args$B0)) {
        args$B0 <- diag(args$epsilon)
        if(verbose == TRUE) cat("B0 not provided. Defaults as diag(epsilon). \n")
      } else if(length(args$B0) == 1) { 
        cat(paste0("Off-diagonal for B0 provided (", args$B0, "), diagonal set according to epsilon. \n"))
        B0 <- matrix(args$B0, ncol = ncol(y), nrow = ncol(y))
        diag(B0) <- args$epsilon
        args$B0 <- B0
      } else if(length(args$B0) == ncol(y)^2){
        if(sum(is.na(diag(args$B0))) == ncol(args$B0)) diag(args$B0) <- args$epsilon
        if(verbose == TRUE) cat("Diagonal of B0 set to epsilon. \n")
      } else {
        stop("B0 misspecified.")
      }
      
      if(is.null(initial)) initial <- sbetel:::initial_svar
      if(verbose == TRUE) cat("GMM estimates will be used as initial values. \n")
      
      if(is.null(args$wmatrix)) args$wmatrix <- "optimal"
      if(is.null(args$gmm_verbose)) args$gmm_verbose <- FALSE
      if(bw == "auto") {
        if(verbose == TRUE) cat("Choosing the optimal bandwidth parameter via GMM... \n")
        init_gmm <- initial(xy = args$xy, args = args, bw = TRUE, verbose = args$gmm_verbose)
        bw <- init_gmm$bw
        if(verbose == TRUE) cat(paste0("bw = ", bw, "\n"))
      }
      
      if(is.character(prior_fun)) {
        if(prior_fun == "svar") {
          prior_fun <- sbetel:::prior_fun_svar
        } else {
          stop("For g = 'svar', 'prior_fun' needs to be either a function or a character string 'svar'.")
        }
      }
      
    } else {
      stop("Argument 'g' needs to be either function or character string in c('var', 'svar')")
    }

  } else {
    type <- "custom"
    stop("Custom models are not yet implemented in 'sbetel'. Sorry!")
  }
  
  #Collects the model parameters etc.
  model <- list(y = y,
                g = g,
                prior_fun = prior_fun,
                bw = bw,
                initial = initial,
                args = args,
                type = type)
  model
}

#' @title Evaluate a smoothed BETEL density
#' @description Evaluate a (unnormalized) smoothed BETEL density at parameter
#' values \code{th} given the \code{model} (list) generated by \code{init_sbetel()} 
#' and the data \code{xy}. 
#' A wrapper for c++ implementation.
#' 
#' @param th A numerical vector of parameter values.
#' @param model A list returned by \code{init_sbetel()} defining the model.
#' @param xy The data for which the density is coniditoned on. Defaults to the 
#' data passed to \code{init_sbetel()} without the prior.
#' @param td The number of rows of prior data, i.e. the number of rows from 
#' the beginning of the data that should not be smoothed.
#' @param itermax Maximum number of Newton-Rhapson iterations within the evaluation 
#' of the likelihood. Defaults to 20 which should be more than enough.
#' @return \code{eval_sbetel()} returns a numerical value that is the unnormalized 
#' density at \code{th} conditional on \code{xy}.
#' @examples
#'\dontrun{
#' eval_sbetel(th = model$initial(model$args$xy)$th, model = model, xy = model$args$xy)
#'}
#' @export
eval_sbetel <- function(th, 
                        model, 
                        xy = model$args$xy, 
                        td = 0,
                        itermax = 20) {
  args <- model$args
  args$xy <- xy
  sbetel:::etel_rcpp(th = th, 
                     g = model$g, 
                     y = model$y, 
                     bw = model$bw, 
                     td = td,
                     itermax = itermax,
                     args = args
  )
}

#' @title Estimate a smoothed BETEL model
#' @description Estimate a smoothed BETEL model by generating a sample
#' from the parameter posterior density with a RWMH algorithm. Subchains 
#' are used for integrating over the stochastic prior defined by the prior 
#' data generating process \code{prior_fun()}.
#' 
#' @param model A list returned by \code{init_sbetel()} defining the model.
#' @param chain_length The length of one subchain.
#' @param chain_number The number of subchains. The total (not independent) 
#' size of the sample equals \code{chain_length x chain_number}.
#' @param tune A parameter that controls the step size of the RWMH algorithm.
#' Should be chosen to accomplish an acceptance rate of approximately 20-25%. 
#' \code{sbetel:::auto_tune(model, type = "posterior")} might be of help.
#' @param type Should be either "posterior", "likelihood" or "prior". 
#' Defaults to "posterior".
#' @param burn The length of the burn-in period. Should be enough for correlation 
#' of the subchain from its initial value to vanish.
#' @param parallel Specifies the number of logical processors used for parallel 
#' processing of the subchains. Defaults to 1 in which case no parallel processing 
#' is used. \code{parallel::detectCores()} can be used to find out the number 
#' of logical processors on your machine.
#' @param itermax Maximum number of Newton-Rhapson iterations within the evaluation 
#' of the likelihood. Defaults to 20 which should be more than enough.
#' @param backup \code{NULL} or a character string that names the directory in which the 
#' backups of the subchains are saved as they are completed. Defaults to \code{NULL} 
#' in which case no backups are created.
#' @param verbose Logical. Defaults to \code{TRUE} in which case messages are produced.
#' @return \code{est_sbetel()} returns a list containing the output of the 
#' RWMH algorithm.
#' @details TBA
#' @examples
#' \dontrun{
#' output <- est_sbetel(model, tune = 0.1)
#' }
#' @export
est_sbetel <- function(model,
                       chain_length = 100,
                       chain_number = 1,
                       tune,
                       type = c("posterior", "likelihood", "prior")[1],
                       burn = 50,
                       parallel = 1,
                       itermax = 20,
                       trys = 1,
                       backup = NULL,
                       verbose = TRUE) {
  
  starttime <- Sys.time()
  
  if(!is.null(backup)) {
    if(!is.character(backup)) stop("'backup' needs to be NULL or character string.")
    dir.create(backup, showWarnings = FALSE)
    wd <- getwd()
  }
  wd <- getwd()
  
  if(verbose == TRUE) cat(paste0("Estimating the ", type, " with RWMH algorithm... \n"))
  
  if(parallel == 1) {
    
    if(verbose == TRUE) cat(paste0("No parallelization, using only one core. \n"))
    subchains <- list()
    for(i in 1:chain_number) {
      if(verbose == TRUE) cat(paste0("Subchain ", i, "/", chain_number, "\n"))
      if(verbose == TRUE) progressbar <- TRUE else progressbar <- FALSE
      subchains[[i]] <- sbetel:::run_chain(chain_name = i,
                                           type = type,
                                           model = model,
                                           chain_length = chain_length,
                                           tune = tune,
                                           burn = burn,
                                           backup = backup,
                                           itermax = itermax,
                                           trys = trys,
                                           progressbar = progressbar)
    }
    
  } else {
    
    if(verbose == TRUE) cat(paste0("Subchains run in parallel using ", parallel," cores. \n"))
    if(verbose == TRUE) cat(paste0("With parallel procesing the progress can be tracked \n", "by using the 'backup' argument. \n"))
    
    cl <- parallel::makeCluster(parallel)
    parallel::clusterExport(cl, 
                            list("type", "model", "chain_length",
                                 "tune", "burn", "backup", 
                                 "trys", "itermax", "wd"),
                            envir = environment())
    subchains <- tryCatch({
      parallel::parLapply(cl,
                          1:chain_number,
                          function(i) {
                            sbetel:::run_chain(chain_name = i,
                                               type = type,
                                               model = model,
                                               chain_length = chain_length,
                                               tune = tune,
                                               burn = burn,
                                               backup = backup,
                                               itermax = itermax,
                                               wd = wd,
                                               trys = trys)
                          })
    }, error = function(e) {
      parallel::stopCluster(cl)
      stop(e)
    })
    parallel::stopCluster(cl)
    
  } 
  
  #Collect the chains and likelihoods
  allchains <- matrix(NA, 
                      ncol = ncol(subchains[[1]]$chain),
                      nrow = chain_length*chain_number
  )
  likelihoods <- c()
  for(i in 1:length(subchains)) {
    rows <- (i*chain_length+1):((i+1)*chain_length)-chain_length
    allchains[rows,] <- subchains[[i]]$chain
    burn <- length(subchains[[i]]$likelihoods) - nrow(subchains[[i]]$chain)
    if(burn > 0) {
      likelihoods <- c(likelihoods, subchains[[i]]$likelihoods[-c(1:burn)])
    } else {
      likelihoods <- c(likelihoods, subchains[[i]]$likelihoods)
    }
  }

  #Average acceptance rate
  accrates <- rep(NA, chain_number)
  for(i in 1:chain_number) accrates[i] <- subchains[[i]]$accrate
  
  totaltime <- Sys.time() - starttime
  if(verbose == TRUE) cat(paste0("Average acceptance rate: ", round(mean(accrates), 3), "\n"))
  if(verbose == TRUE) cat(paste0("Total time: ", round(totaltime, 3), " ", attributes(totaltime)$units, "\n"))
  
  #Collect the output
  ret <- list(sample = allchains,
              subchains = subchains,
              accrates = accrates,
              likelihoods = likelihoods,
              totaltime = totaltime,
              tune = tune,
              itermax = itermax,
              trys = trys,
              type = type)
  ret
}
jetroant/sbetel documentation built on Oct. 20, 2020, 1:11 a.m.