R/estimate.R

#' Estimates the Bayesian probability of informed trading
#' 
#' @description
#' Calling [estimate_pin()] on trades data estimates the probability of 
#' informed trading from the compressed EKOP model presented in Grammig, 
#' Theissen and Zehnder (2015). 
#' 
#' @details 
#' Internally this function uses the `finmix` package for estimation of the 
#' finite mixture parameters. The `finmix` package performs MCMC sampling for 
#' the parameters of the compressed EKOP model and computes three parameter 
#' estimates namely
#' * Maximum a posterior (MAP) the mode of the joint posterior of parameters 
#'   and indicators,
#' * Bayesian maximum likelihood (BML) the mode of the joint posterior of 
#'   parameters and indicators in case of a flat prior distribution, 
#' * Identified ergodic average (IEAVG) is the ergodic average over the MCMC
#'   parameter traces after relabeling the parameters.
#' 
#' ## Relabeling
#' The MCMC sampling uses a socalled random permutation algorithm to force the 
#' labels of the two distributions in the mixture to switch randomly between 
#' iterations. Label switching occurs naturally in estimation of finite mixture 
#' distributions and is documented in the literature. Random permutation tries 
#' not to eliminate label switching, but to bring it into a more controlled 
#' setting. As a result each MCMC parameter trace contains parameter values 
#' of the other trace. For this reason re-labeling is performed after sampling. 
#' 
#' The `finmix` package offers three different re-labeling algorithms: 
#' * `kmeans` re-labels parameters by applying K-means clustering to the 
#'   merged component parameters,
#' * `Stephens1997a` applies the algorithm suggested by Stephens (1997a) that 
#'   tries to make the posterior marginal distributions of parameters uni-modal,
#' * `Stephens1997b` uses the algorithm presented in Stephens (1997b) that 
#'   tries to re-label parameter values by minimizing the Kullback-Leibler 
#'   distance between a parameter clustering via assignment probabilities and 
#'   the classification probabilities defined by Poisson distributions. 
#' 
#' To offer the user more flexibility the function can also return the MCMC 
#' samples for further inspection. Setting `return_mcmc` to `TRUE` (default) 
#' returns also the MCMC traces. 
#' 
#' @param pin_data A `vector` of trades data (number of trades per day).
#' @param return_mcmc A logical indicating, if MCMC parameter samples should 
#'   be returned in addition to the final PIN estimates.
#' @param with_stephens A logical indicating, if in addition to `kmeans` re-
#' labeling also re-labeling via the algorithms from Stephens (1997a) and 
#' Stephens (1997b) should be applied.
#' @return A `data.frame()` with PIN estimates, if `return_mcmc` is `FALSE`. 
#'   Otherwise, a `list` containing the `data.frame` of PIN estimates together 
#'   with a `finmix::mcmcoutput` (see \code{\link[finmix]{mcmcoutput-class}}) 
#'   object containing the MCMC traces of the component paramaters.
#' @export
#' @import finmix
#' 
#' @examples 
#' # Simulate trades data
#' trades_data <- simulate_ekop()
#' # Estimate the Bayesian PIN.
#' estimate_pin(trades_data$Trades)
#' 
#' @seealso 
#' * \code{\link[finmix]{mixturemcmc}} for performing MCMC sampling
#' * \code{\link[finmix]{mcmcestimate}} for estimating parameters from MCMC samples
#' * [compute_bayespin()] for computing the Bayesian PIN from Bayesian 
#'   estimates of the component parameters of the compressed EKOP model
#'
#' @references 
#' * Easley, D., Kiefer, N., O’Hara, M., Paperman, J., 1996. Liquidity, 
#'   information, and infrequently traded stocks. Journal of Finance 51, 
#'   1405–1436.
#' * Grammig, J., Theissen, E., Zehnder, L.S., 2015. Bayesian Estimation of the 
#'   Probability of Informed Trading. Conference on Financial Econometrics & 
#'   Empirical Asset Pricing 2016, Lancaster University
#' * Stephens, M., 1997a. Discussion 'On Bayesian analysis of mixtures with an
#'   unknown number of components' (by S. Richardson and P.J. Green), J. R. 
#'   Statist. Soc., B, 59, 768-769.
#' * Stephens, M., 1997b. Bayesian methods for mixtures of normal 
#'   distributions. DPhil. Thesis. University of Oxford, Oxford.
"estimate_pin" <- function(pin_data, return_mcmc=FALSE, with_stephens=FALSE)
  {
    # Create `fdata` object with dimension `r=1`.
    mcmc_fdata <- finmix::fdata(matrix(pin_data), type='discrete', r=1)
    
    # Create a finite mixture model of two Poisson distributions.
    mcmc_model <- finmix::model(dist='poisson', K=2, r=1)
    
    # Define the prior distribution as a conditional conjugate
    # prior (a Gamma distribution) with a hierarchical prior 
    # (also Gamma).
    mcmc_prior <- finmix::priordefine(fdata=mcmc_fdata, model=mcmc_model)
    
    # Set up the MCMC algorithm (a Gibbs sampler) with 10,000 iterations
    # and a burnin of 1,000. Allow random permutation of labels and do 
    # not store the indicators `S`.
    mcmc_mcmc <- finmix::mcmc(burnin=1000, M=10000, ranperm=TRUE, 
                             storeS=FALSE, storepost=FALSE)
    if (with_stephens) mcmc_mcmc@storepost <- TRUE
    
    # Generate starting parameters for the indicators `S`.
    (mcmc_fdata~mcmc_model~mcmc_mcmc) %=% finmix::mcmcstart(fdata=mcmc_fdata, 
                                                            model=mcmc_model, 
                                                            mcmc_mcmc)
    
    # Start the MCMC sampling.
    mcmc_results <- finmix::mixturemcmc(fdata=mcmc_fdata, model=mcmc_model, 
                                       prior=mcmc_prior, mcmc=mcmc_mcmc)
    
    # Estimate the parameters. 
    mcmc_parest    <- finmix::mcmcestimate(mcmc_results)
    # Calculate the PIN estimates.
    pin_estimates <- compute_bayespin(mcmc_parest)
    if (with_stephens) 
    {
      # Relabel with Stephens1997a.
      tryCatch(
        {
          mcmc_parest_st1997a <- finmix::mcmcestimate(mcmc_results, 
                                                      method='Stephens1997a')
          pin_est_st1997a    <- compute_bayespin(mcmc_parest_st1997a)[1,9:12]
          colnames(pin_est_st1997a) <- c('alpha', 'epsilon', 'mu', 'pin')
          row.names(pin_est_st1997a) <- "IEAVG_St1997a"
          pin_estimates      <- rbind(pin_estimates, pin_est_st1997a)
        },
        error= function(err)
        {
          print(err)
          pin_estimates <- rbind(pin_estimates, rep(NA, 4))
          row.names(pin_estimates)[4] <- 'IEAVG_St1997a'
        },
        warning=function(warn) {}
      )
      # Relabel with Stephens1997b.
      tryCatch(
        {
          # Stephens1997b needs the data to draw conclusion about the labels.
          mcmc_parest_st1997b <- finmix::mcmcestimate(mcmc_results, 
                                                      method='Stephens1997b', 
                                                      fdata=mcmc_fdata)
          pin_est_st1997b    <- compute_bayespin(mcmc_parest_st1997b)[1,9:12]
          colnames(pin_est_st1997b) <- c('alpha', 'epsilon', 'mu', 'pin')
          row.names(pin_est_st1997b) <- c('IEAVG_St1997b')
          pin_estimates      <- rbind(pin_estimates, pin_est_st1997b)
        },
        error = function(err)
        {
          print(err)
          pin_estimates <- rbind(pin_estimates, rep(NA, 4))
          row.names(pin_estimates)[4] <- 'IEAVG_St1997b'
        },
        warning=function(warn) {}
      )
    }
    
    if (return_mcmc) 
    {
      return(list("pin_estimates"=pin_estimates, "mcmc_results"=mcmc_results))
    } else 
    {
      return(pin_estimates)
    }
}

#' Calculates estimates of the probability of informed trading
#' 
#' @description 
#' Calling [compute_bayespin()] calculates the probability of informed trading
#' (PIN) from the paper of Easley et al. (1996). The input argument is an
#' `mcmcest` (see \code{\link[finmix]{mcmcest-class}}) object from the `finmix`
#' package containing all parameters estimates from the finite mixture
#' distribution of the compressed EKOP model in Grammig, Theissen and Zehnder
#' (2015).
#' 
#' @param mcmcest An `mcmcest` object of the `finmix` package containing 
#'   all estimated parameters from the finite mixture distribution of the 
#'   compressed EKOP model.
#' @return A `data.frame` with estimated PINs from the maximum a posterior, the 
#'   Bayesian maximum likelihood and the identified ergodic average parameter
#'   estimates of the underlying finite mixture distribution of the compressed 
#'   EKOP model.
#' @export
#'
#' @seealso 
#' * \code{\link[finmix]{mcmcest-class}} for the definition of the `mcmcest` class union
#' * [estimate_pin()] for estimating the PIN with the Bayesian approach 
#'   described in Grammig, Theissen and Zehnder (2015)
#' * [compute_pin()] for calculating the PIN for provided parameters
#' 
#' @references 
#' * Easley, D., Kiefer, N., O’Hara, M., Paperman, J., 1996. Liquidity, 
#'   information, and infrequently traded stocks. Journal of Finance 51, 
#'   1405–1436.
#' * Grammig, J., Theissen, E., Zehnder, L.S., 2015. Bayesian Estimation of the 
#'   Probability of Informed Trading. Conference on Financial Econometrics & 
#'   Empirical Asset Pricing 2016, Lancaster University
"compute_bayespin" <- function(mcmcest)
{
  # Calculate PIN
  ## Get the order of the Poisson parameters as the larger one
  ## will be (mu + epsilon). 
  ordered_map   <- sort.int(mcmcest@map$par$lambda, index.return=TRUE, 
                            decreasing=TRUE)
  ordered_bml   <- sort.int(mcmcest@bml$par$lambda, index.return=TRUE, 
                            decreasing=TRUE)
  ordered_ieavg <- sort.int(mcmcest@ieavg$par$lambda, index.return=TRUE, 
                            decreasing=TRUE)
  
  ## Note that we use T = 390 minutes.
  ## Furthermore, note that lambda_2 = 2 * epsilon * T.
  epsilon_map   <- ordered_map$x[2] / (2 * 390)
  epsilon_bml   <- ordered_bml$x[2] / (2 * 390)
  epsilon_ieavg <- ordered_ieavg$x[2] / (2 * 390)

  ## Note that lambda_1 = (2 * epsilon + mu) * T
  mu_map   <- ordered_map$x[1] / 390 - 2 * epsilon_map
  mu_bml   <- ordered_bml$x[1] / 390 - 2 * epsilon_bml
  mu_ieavg <- ordered_ieavg$x[1] / 390 - 2 * epsilon_ieavg
  
  ## Get the weight for the first component. 
  alpha_map   <- mcmcest@map$weight[ordered_map$ix[1]]
  alpha_bml   <- mcmcest@bml$weight[ordered_bml$ix[1]]
  alpha_ieavg <- mcmcest@ieavg$weight[ordered_ieavg$ix[1]]
  
  ## Calculate the PIN
  pin_map   <- compute_pin(alpha_map, epsilon_map, mu_map)
  pin_bml   <- compute_pin(alpha_bml, epsilon_bml, mu_bml)
  pin_ieavg <- compute_pin(alpha_ieavg, epsilon_ieavg, mu_ieavg)

  estimates <- data.frame('alpha'   = c(alpha_map, alpha_bml, alpha_ieavg), 
                          'epsilon' = c(epsilon_map, epsilon_bml, 
                                        epsilon_ieavg),
                          'mu'      = c(mu_map, mu_bml, mu_ieavg),
                          'pin'     = c(pin_map, pin_bml, pin_ieavg),
                          row.names = c('MAP', 'BML', 'IEAVG'))

  return(estimates)
}

#' Computes the probability of informed trading from ML estimates
#' 
#' @description
#' Calling [compute_mlpin()] returns the probability of informed trading as 
#' presented in the paper of Easley et al. (1996). 
#' 
#' The estimation procedure in [estimate_mlekop()] uses the logits of the 
#' rates `alpha` and `delta` to ensure a more stable optimization. To arrive
#' at the original parameter estimates the logistic transformation from 
#' [logistic()] is applied. 
#' 
#' @param par A vector containing the parameter estimates from the optimization 
#'   procedure. 
#' @return A double holding the PIN estimate.
#' @export
#'
#' @examples 
#' # Simulate trades data.
#' trades_data <- simulate_ekop()
#' # Estimate the EKOP model.
#' pin_optml <- estimate_mlekop(trades_data, methodLik="approx", 
#'                              fnLik="compute_ekop_orig_lik")
#' # Estimate the PIN from the parameter estimates.
#' compute_mlpin(pin_optml$par)
#' 
#' @seealso
#' * [estimate_mlekop()] for the calling function.
#' 
#' @references 
#' * Easley, D., Kiefer, N., O’Hara, M., Paperman, J., 1996. Liquidity, 
#'   information, and infrequently traded stocks. Journal of Finance 51, 
#'   1405–1436.
"compute_mlpin" <- function(par)
{
  # Transform logits.
  alpha   <- logistic(par[1])
  epsilon <- par[2]
  if (length(par) == 4)
  {
    # Original EKOP model.
    delta <- logistic(par[3])
    mu    <- par[4]
    estimates  <- data.frame('alpha'   = c(alpha), 
                             'epsilon' = c(epsilon),
                             'delta'   = c(delta),
                             'mu'      = c(mu),
                             row.names = c('ML'))
  }
  else {
    mu   <- par[3]
    estimates  <- data.frame('alpha'   = c(alpha), 
                             'epsilon' = c(epsilon),
                             'mu'      = c(mu),
                             row.names = c('ML'))
  }
  pin <- data.frame('pin' = compute_pin(alpha, epsilon, mu))
  
  estimates            <- cbind(estimates, pin)
  
  return(estimates)
}
#' Computes the probability of informed trading
#' 
#' @description 
#' Calling [compute_pin()] computes the probability of informed trading (PIN)
#' from the EKOP model for provided parameters. See Easley et al. (1996) for
#' details.
#' 
#' @param alpha A double specifying the probability of an information event. 
#' @param epsilon A double specifying the arrival rate of uninformed traders.
#' @param mu A double specifying the arrival rate of informed traders.
#' @return The probability of informed trading.
#' @export
#' 
#' @examples 
#' compute_pin(.2, .25, .01)
#' 
#' @seealso
#' * [compute_bayespin()] for computing the PIN from bayesian parameter 
#'   estimates
#' 
#' @references 
#' * Easley, D., Kiefer, N., O’Hara, M., Paperman, J., 1996. Liquidity, 
#'   information, and infrequently traded stocks. Journal of Finance 51, 
#'   1405–1436. 
"compute_pin" <- function(alpha, epsilon, mu)
{
  pin <- alpha * mu / ( alpha * mu + 2 * epsilon )
  return(pin)
}

#' Estimates the probability of informed trading with maximum likelihood
#' 
#' @description 
#' Calling [estimate_mlekop()] estimates the parameters from the model of Easley
#' et al. (1996, EKOP) together with the probability of informed trading (PIN).
#' As described in the EKOP model trade data for buy and sell trades are needed,
#' respectively. Note that in contrast the compressed EKOP model needs only data
#' for the number of trades per day (see the function references below).
#' 
#' @details 
#' ## Maximum Likelihood functions
#' There exist different likelihood variants that can be used in maximum 
#' likelihood estimation of the EKOP model.
#' * `compute_ekop_orig_lik` uses the likelihood function proposed in the original 
#'   paper of Easley et al. (1996).
#' * `compute_ekop_lik` uses a likelihood that is modified in regard to deal 
#'   better with computational overflow. This likelihood function had been 
#'   presented in slightly modified version by Easley et al. (2002).
#' Furthermore, to deal with undefined function values like `NaN` or `inf` an
#' approximation method can be chosen by the argument `methodLik`. Choosing
#' `"approx"` then approximates values of `NaN`, `-inf` or `inf` by values
#' `1e+6`, `-1e+6`, and `1e+6`, respectively and basically ignores the
#' occurrence of undefined values.
#' 
#' ## Optimization algorithms 
#' Optimization is performed gradient-based by using
#' the \code{\link[stats]{optim}} function. The algorithm used is the `L-BFGS-B`
#' that allows for parameter restrictions. This is necessary because we use for
#' the probabilities `alpha` and `delta` the logistic transformation
#' `exp()/(1+exp())` to produce valid probabilities in optimization.
#' 
#' In case that the gradient-based algorithm does not converge in between 100
#' steps, a gradient-free optimization is applied. For gradient-free
#' optimization \code{\link[dfoptim]{nmkb}}, a bounded `Nelder-Mead` algorithm
#' is used. Derivative-free optimization is only performed after gradient-based
#' optimization did not converge and the argument `grad_free` is `TRUE`.
#' 
#' The argument `fnscale` can be used to scale the likelihood function in case 
#' of very large values (e.g. very large volumes) that might lead to number 
#' overflow during computation. Note, `fnscale` must always be negative as the 
#' likelihood function should be maximized.  
#' 
#' @param data A `data.frame` containing the number of buyer- and
#'   seller-initiated trades. The data must be ordered in columns beginning with
#'   the number of mis-specified buys, mis-specified sells, number of buys,
#'   number of sells, and finally the sum of trades per day. See for an example
#'   [simulate_ekop()].
#' @param startpar A vector containing start parameters for maximum likelihood 
#'   estimation. These must be starting values for the logit of alpha, epsilon,
#'   the logit of delta, and mu. If no starting values are provided the function 
#'   chooses the values (0, mean(trades)x.75/2, 0, mean(trades)x.25/2). 
#' @param T A double specifying the minutes of a trading day.
#' @param methodLik A character specifying, if undefined function values in 
#'   optimization should be approximated by large defined values (`1e+6`).
#'   This can help to make maximum likelihood estimation more stable. 
#' @param fnLik A character specifying which likelihood function to use. Either
#'   the original function by Easley et al. (1996) or the slightly modified 
#'   variant of Easley et al. (2002) can be used. The latter one is known to 
#'   also work better with large trading volumes. 
#' @param fnscale A negatve double specifying a scaling factor for the
#'   likelihood function. This can in some cases help when the algorithm does
#'   not reach convergence or suffer from number overflow.
#' @param trace An integer specifying which level of tracing should be used. 
#'   see `?optim` for more details. 
#' @param grad_free A logical indicating if gradient-free optimization should 
#'   be used when gradient descent did not converge. If `TRUE` the optimization 
#'   procedure \code{\link[dfoptim]{nmkb}} is used. 
#' @param return_opt A logical indicating, if in addition to the PIN estimates 
#'   also the results from the optimization procedure should be returned. If 
#'   `TRUE` a list is returned with an element `pin_estimates` holding the 
#'   PIN estimates and an element `opt_results` holding the output of the 
#'   optimization procedure.
#' @param opt_out (Deprecated) A logical indicating if only the output of the 
#'   optimization procedure should be returned. Some older applications still 
#'   rely on this output. In the next version this feature will be removed. 
#'   Note that the default value is `TRUE`.  
#' @return A `list` with all components as returned by 
#'   \code{\link[stats]{optim}} or \code{\link[dfoptim]{nmkb}}.
#' @export
#' @importFrom stats optim
#' @importFrom dfoptim nmkb
#'
#' @examples 
#' # Simulate data from the EKOP model. 
#' trades_data <- simulate_ekop()
#' # Estimate the EKOP model by maximum likelihood.
#' pin_estml <- estimate_mlekop(trades_data, methodLik="approx", 
#'                              fnLik="compute_ekop_orig_lik", opt_out=FALSE)
#'                    
#' @seealso
#' * [estimate_pin()] for estimating the PIN with a Bayesian approach that 
#'   needs only the total number of trades
#' * [estimate_compml()] for estimating the PIN with the compressed EKOP model 
#'   that needs only the total number of trades
#' * [compute_ekop_lik()] for the implementation of the likelihood function of 
#'   the paper of Easley et al. (2002)
#' * [compute_ekop_orig_lik()] for the implementation of the likelihood function 
#'   of the paper of Easley et al. (1996)
#'   
#' @references 
#' * Easley, D., Kiefer, N., O’Hara, M., Paperman, J., 1996. Liquidity, 
#'   information, and infrequently traded stocks. Journal of Finance 51, 
#'   1405–1436.
#' * Easley, David, Hvidkjaer, Soeren, and O’Hara, Maureen (2002). 
#'   “Is Information Risk a Determinant of Asset Returns?” In: The Journal of 
#'   Finance 57.5, pp. 2185–2221. DOI: 10.1111/1540-6261.00493.
"estimate_mlekop" <- function(data, startpar, T = 390, 
                              methodLik = c("precise", "approx"),
                              fnLik = c("compute_ekop_lik", 
                                        "compute_ekop_orig_lik"), 
                              fnscale=-1, trace=0, grad_free=TRUE,
                              return_opt=FALSE, opt_out = TRUE) 
{
  fnLik <- match.arg(fnLik)
  if (missing(startpar)) {
    if (trace > 0)
    {
      cat("Using default starting values...\n")
    }
    tmp         <- mean(data[, 5], na.rm = TRUE)/T
    startpar 	  <- c(0, tmp * 0.75/2, 0, tmp * 0.25/2)
  }
  
  ## optimization settings ##
  optim_fn 	    <- match.fun(fnLik)
  optim_Method 	<- "L-BFGS-B"
  optim_lower 	<- c(-1e+2, 1e-6, -1e+2, 1e-6)
  optim_upper 	<- c(1e+2, 1e+6, 1e+2, 1e+6)
  optim_fnscale <- fnscale 
  optim_maxit 	<- 200
  optim_ctrl	  <- list(fnscale=optim_fnscale, maxit=optim_maxit, trace=trace)
  
  optim_res <- optim(par=startpar, fn=optim_fn, data=data, T=T, 
                     methodLik=methodLik, method=optim_Method, 
                     lower=optim_lower, upper=optim_upper, 
                     control=optim_ctrl, hessian=TRUE)
  
  conv_msg  <- "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH" 
  
  if ((optim_res$convergence > 0 || conv_msg != optim_res$message) && grad_free)
  {
    dfoptim_ctrl  <- list(maximize=TRUE)
    # Bounded Nelder-Mead needs lower absolute bounds for the 
    # alpha and delta , otherwise the log-likelihood becomes NaN.
    dfoptim_lower <- c(-1e+1, 1e-6, -1e+1, 1e-6)
    dfoptim_upper <- c(1e+1, 1e+6, 1e+1, 1e+6)
    optim_res     <- dfoptim::nmkb(par=startpar, fn=optim_fn, data=data, T=T, 
                                   methodLik=methodLik, lower=dfoptim_lower,
                                   upper=dfoptim_upper, control=dfoptim_ctrl)
  }
  
  # If the old version of the function is expected 
  # return immediately. 
  if (opt_out) return(optim_res)
  
  # Compute the PIN from the parameter estimates.
  pin_estimates <- compute_mlpin(optim_res$par)
  pin_estimates
  if (return_opt) {
    return(list(pin_estimates=pin_estimates, opt_results=optim_res))
  } else
  {
    return(pin_estimates)
  }
}

#' Estimated the probability of informed trading of the compressed EKOP model
#' 
#' @description 
#' Calling [estimate_compml()] estimates the probability of informed trading 
#' (PIN) from the model of Jackson (2007) using maximum likelihood (ML). As 
#' described in the paper by Jackson (2007) and similar to the Bayesian 
#' estimation used in Grammig et al. (2015) this model needs only the number 
#' of trades per day provided in the parameter `data`.
#' 
#' @details
#' ## Optimization algorithms 
#' Optimization is performed gradient-based by using the
#' \code{\link[stats]{optim}} function. The algorithm used is the `L-BFGS-B`
#' that allows for parameter restrictions. This is necessary because we use for
#' the probability `alpha` the logistic transformation `exp()/(1+exp())` to
#' produce valid probabilities in optimization.
#' 
#' In case that the gradient-based algorithm does not converge in between 100
#' steps, a gradient-free optimization is applied. For gradient-free
#' optimization \code{\link[dfoptim]{nmkb}}, a bounded `Nelder-Mead` algorithm
#' is used. Derivative-free optimization is only performed after gradient-based
#' optimization did not converge and the argument `grad_free` is `TRUE`.
#' 
#' The argument `fnscale` can be used to scale the likelihood function in case 
#' of very large values (e.g. very large volumes) that might lead to number 
#' overflow during computation. Note, `fnscale` must always be negative as the 
#' likelihood function should be maximized.
#'
#' @param data A `vector` containing the number of trades per day.
#' @param startpar A vector containing start parameters for maximum likelihood
#'   estimation. These must be starting values for the logit of alpha, epsilon,
#'   and mu. If no starting values are provided the function chooses the values
#'   `(0, mean(trades)x.75/2, mean(trades)x.25/2)`.
#' @param T A double specifying the minutes of a trading day.
#' @param methodLik A character specifying, if undefined function values in 
#'   optimization should be approximated by large defined values (`1e+6`).
#'   This can help to make maximum likelihood estimation more stable. 
#' @param fnscale A negatve double specifying a scaling factor for the
#'   likelihood function. This can in some cases help when the algorithm does
#'   not reach convergence or suffer from number overflow.
#' @param trace An integer specifying which level of tracing should be used. 
#'   see `?optim` for more details. 
#' @param grad_free A logical indicating if gradient-free optimization should 
#'   be used when gradient descent did not converge. If `TRUE` the optimization 
#'   procedure \code{\link[dfoptim]{nmkb}} is used. 
#' @param return_opt A logical indicating, if in addition to the PIN estimates 
#'   also the results from the optimization procedure should be returned. If 
#'   `TRUE` a list is returned with an element `pin_estimates` holding the 
#'   PIN estimates and an element `opt_results` holding the output of the 
#'   optimization procedure.
#' @param opt_out (Deprecated) A logical indicating if only the output of the 
#'   optimization procedure should be returned. Some older applications still 
#'   rely on this output. In the next version this feature will be removed. 
#'   Note that the default value is `TRUE`.  
#' @return A `list` with all components as returned by 
#'   \code{\link[stats]{optim}} or \code{\link[dfoptim]{nmkb}}.
#' @export
#' @importFrom stats optim
#' @importFrom dfoptim nmkb
#'
#' @examples 
#' # Simulate data from the EKOP model. 
#' trades_data <- simulate_ekop()
#' # Estimate the EKOP model by maximum likelihood.
#' pin_estml <- estimate_compml(trades_data$Trades, methodLik="approx", 
#'                              opt_out=FALSE)
#'                    
#' @seealso
#' * [estimate_pin()] for estimating the PIN with a Bayesian approach that 
#'   needs only the total number of trades
#' * [estimate_mlekop()] for estimating the PIN with the original EKOP model 
#'   that needs the numbers of buyer- and seller-initiated trades per day
#' * [compute_comp_lik()] for the implementation of the likelihood function of 
#'   the paper of Jackson (2007).
#'   
#' @references
#' * Jackson, D., 2007. Infering trader behavior from transaction data: A trade 
#'   count model. Journal of Computational and Graphical Statistics 12, 55-79.
#' * Grammig, J., Theissen, E., Zehnder, L.S., 2015. Bayesian Estimation of the 
#'   Probability of Informed Trading. Conference on Financial Econometrics & 
#'   Empirical Asset Pricing 2016, Lancaster University
"estimate_compml" <- function(data, startpar, T = 390, 
                              methodLik = c("precise", "approx"),
                              fnscale=-1, trace=0, grad_free=TRUE,
                              return_opt=FALSE, opt_out=TRUE) 
{
  if (missing(startpar)) {
    if (trace > 0)
    {
      cat("Using default starting values...\n")
    }
    tmp         <- mean(data, na.rm = TRUE)/T
    startpar 	  <- c(0, tmp * 0.75/2, tmp * 0.25/2)
  }
  
  ## optimization settings ##
  optim_fn 	    <- compute_comp_lik
  optim_Method 	<- "L-BFGS-B"
  optim_lower 	<- c(-1e+6, 1e-6, 1e-6)
  optim_upper 	<- c(1e+2, 1e+6, 1e+6)
  optim_fnscale <- fnscale 
  optim_maxit 	<- 200
  optim_ctrl	  <- list(fnscale=optim_fnscale, maxit=optim_maxit, trace=trace)
  
  optim_res <- optim(par=startpar, fn=optim_fn, data=data, T=T, 
                     methodLik=methodLik, method=optim_Method, 
                     lower=optim_lower, upper=optim_upper, 
                     control=optim_ctrl, hessian=TRUE)
  
  conv_msg  <- "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
  
  if ((optim_res$convergence > 0 || conv_msg != optim_res$message) && grad_free)
  {
    dfoptim_ctrl  <- list(maximize=TRUE)
    dfoptim_lower <- c(-1e+1, 1e-6, 1e-6)
    dfoptim_upper <- c(1e+1, 1e+6, 1e+6)
    optim_res     <- dfoptim::nmkb(par=startpar, fn=optim_fn, data=data, T=T, 
                                   methodLik=methodLik, lower=dfoptim_lower,
                                   upper=dfoptim_upper, control=dfoptim_ctrl)
  }
  
  # If the old version of the function is expected 
  # return immediately. 
  if (opt_out) return(optim_res)
  
  # Compute the PIN from the parameter estimates.
  pin_estimates <- compute_mlpin(optim_res$par)
  if (return_opt) {
    return(list(pin_estimates=pin_estimates, opt_results=optim_res))
  } else
  {
    return(pin_estimates)
  }
}
simonsays1980/bayespin documentation built on Dec. 23, 2021, 2:25 a.m.