R/pin_max.R

Defines functions pin_est_core

Documented in pin_est_core

#' Optimization in PIN models
#'
#' Core function for optimization routines
#'
#' Grants the most control over optimization procedure.
#' User-friendly wrappers for estimation with trading data of arbitrary length and quarterly data
#' are implemented with \code{\link{pin_est}} and \code{\link{qpin}}, respectively.
#' \code{\link{nlminb}} function in the \pkg{stats} package is used for maximization.
#' Vectors for \code{numbuys} and \code{numsells} need to have same length. \cr \cr
#' Confidence intervals for the probability of informed trading are calculated via Monte-Carlo-Simulation
#' if \code{confint = TRUE}. Settings of the confidence interval simulation can be specified via a named list for \code{ci_control}.
#' Valid list names are \code{n}, \code{seed}, \code{level} and \code{ncores} which pass
#' number of simulation runs (defaults to 10000), seed for RNG (defaults to \code{seed = NULL}),
#' confidence level (defaults to 0.95) and number of cpu cores utilized (defaults to 1).
#'
#' @inheritParams pin_ll
#' @param init_vals \emph{numeric}: matrix of initial values: either generated by \code{\link{initial_vals}} or
#'                   user-defined matrix with five columns for which colnames must consist of:
#'                   \code{alpha}, \code{delta}, \code{epsilon_b}, \code{epsilon_s}, \code{mu}
#' @param factorization \emph{character}: factorization of likelihood function: either 'EHO' or 'Lin_Ke', defaults to: 'Lin_Ke'
#' @param lower \emph{numeric}: lower bounds for optimization, must have length of 5
#' @param upper \emph{numeric}: upper bounds for optimization, must have length of 5
#' @param num_best_res Number of optimization runs for which results should be returned, either numeric or 'all',
#'                     only relevant if \code{init_vals} = 'Grid', defaults to 1
#' @param only_converged \emph{logical}: Return only results for which the likelihood converged?
#'                       Defaults to \code{TRUE}
#' @param nlminb_control \emph{list}: Control list for \code{\link[stats]{nlminb}}
#' @param confint \emph{logical}: Compute confidence intervals for PIN?
#'                 Defaults to \code{FALSE}
#' @param ci_control \emph{list}: see \strong{Details}
#' @param posterior \emph{logical}: Should posterior probabilities for conditions of trading days be computed?
#'
#' @seealso \code{\link{nlminb}},
#'          \code{\link{initial_vals}}
#'          \code{\link{pin_est}}
#'          \code{\link{qpin}}
#'          \code{\link{pin_confint}}
#'
#' @importFrom parallel detectCores
#' @importFrom stats nlminb
#'
#' @references
#' Easley, David et al. (2002) \cr
#' Is Information Risk a Determinant of Asset Returns? \cr
#' \emph{The Journal of Finance}, Volume 57, Number 5, pp. 2185 - 2221 \cr
#' \doi{10.1111/1540-6261.00493}
#'
#' Easley, David et al. (1996) \cr
#' Liquidity, Information, and Infrequently Traded Stocks\cr
#' \emph{The Journal of Finance}, Volume 51, Number 4, pp. 1405 - 1436 \cr
#' \doi{10.1111/j.1540-6261.1996.tb04074.x}
#'
#' Easley, David et al. (2010) \cr
#' Factoring Information into Returns \cr
#' \emph{Journal of Financial and Quantitative Analysis}, Volume 45, Issue 2, pp. 293 - 309 \cr
#' \doi{10.1017/S0022109010000074}
#'
#' Ersan, Oguz and Alici, Asli (2016) \cr
#' An unbiased computation methodology for estimating the probability of informed trading (PIN) \cr
#' \emph{Journal of International Financial Markets, Institutions and Money}, Volume 43, pp. 74 - 94 \cr
#' \doi{10.1016/j.intfin.2016.04.001}
#'
#' Gan, Quan et al. (2015) \cr
#' A faster estimation method for the probability of informed trading
#' using hierarchical agglomerative clustering \cr
#' \emph{Quantitative Finance}, Volume 15, Issue 11, pp. 1805 - 1821 \cr
#' \doi{10.1080/14697688.2015.1023336}
#'
#' Lin, Hsiou-Wei William and Ke, Wen-Chyan (2011) \cr
#' A computing bias in estimating the probability of informed trading \cr
#' \emph{Journal of Financial Markets}, Volume 14, Issue 4, pp. 625 - 640 \cr
#' \doi{10.1016/j.finmar.2011.03.001}
#'
#' Revolution Analytics and Steve Weston (2015) \cr
#' doParallel: Foreach Parallel Adaptor for the 'parallel' Package \cr
#' \emph{R package version 1.0.10}
#'
#' Revolution Analytics and Steve Weston (2015) \cr
#' foreach: Provides Foreach Looping Construct for R \cr
#' \emph{R package version 1.4.3}

#' Yan, Yuxing and Zhang, Shaojun (2012) \cr
#' An improved estimation method and empirical properties of the probability of informed trading \cr
#' \emph{Journal of Banking & Finance}, Volume 36, Issue 2, pp. 454 - 467 \cr
#' \doi{10.1016/j.jbankfin.2011.08.003}
#'
#' @return
#' If \code{num_best_res} = 1, a list with following elements is returned:
#' \describe{
#' \item{Results}{Matrix containing the parameter estimates as well as their estimated standard errors,
#'  t-values and p-values.}
#'  \item{ll}{Value of likelihood function returned by \code{nlminb}}
#'  \item{pin}{Estimated probability of informed trading}
#'  \item{conv}{Convergence code for nlminb optimization}
#'  \item{message}{Convergence message returned by the nlminb optimizer}
#'  \item{iterations}{Number of iterations until convergence of nlminb optimizer}
#'  \item{init_vals}{Vector of initial values}
#'  \item{confint}{If \code{confint = TRUE}; confidence interval for the probability of informed trading}
#'  }
#'
#'  If \code{num_best_res} > 1, a named list of lists is returned. Each component of the outer list
#'  is again a list structured as shown above.
#'  Naming scheme for the outer list is 'Best1',..., 'Best\code{num_best_res}'.
#'
#' @examples
#' # Loading simulated data for frequently traded stock
#'
#' data("BSfrequent")
#'
#' # Generate several matrices of initial values utilizing all methods implemented
#'
#' inits_grid <- initial_vals(numbuys = BSfrequent[,"Buys"],
#'                            numsells = BSfrequent[,"Sells"],
#'                            method = "Grid")
#'
#' inits_hac <- initial_vals(numbuys = BSfrequent[,"Buys"],
#'                           numsells = BSfrequent[,"Sells"],
#'                           method = "HAC")
#'
#' inits_hac_ref <- initial_vals(numbuys = BSfrequent[,"Buys"],
#'                               numsells = BSfrequent[,"Sells"],
#'                               method = "HAC_Ref")
#'
#' # Optimization with different matrices of initial values
#'
#' pin_core_grid <- pin_est_core(numbuys = BSfrequent[,"Buys"],
#'                               numsells = BSfrequent[,"Sells"],
#'                               factorization = "Lin_Ke", init_vals = inits_grid,
#'                               lower = rep(0,5), upper = c(1,1, rep(Inf,3)),
#'                               num_best_res = 5)
#'
#' pin_core_hac <- pin_est_core(numbuys = BSfrequent[,"Buys"],
#'                              numsells = BSfrequent[,"Sells"],
#'                              factorization = "Lin_Ke", init_vals = inits_hac,
#'                              lower = rep(0,5), upper = c(1,1, rep(Inf,3)))
#'
#' pin_core_hac_ref <- pin_est_core(numbuys = BSfrequent[,"Buys"],
#'                                  numsells = BSfrequent[,"Sells"],
#'                                  factorization = "Lin_Ke", init_vals = inits_hac_ref,
#'                                  lower = rep(0,5), upper = c(1,1, rep(Inf,3)))
#'
#' \dontrun{
#' pin_core_hac <- pin_est_core(numbuys = BSfrequent[,"Buys"],
#'                              numsells = BSfrequent[,"Sells"],
#'                              factorization = "Lin_Ke", init_vals = inits_hac,
#'                              lower = rep(0,5), upper = c(1,1, rep(Inf,3)),
#'                              confint = TRUE)
#' }
#' @export pin_est_core

pin_est_core <- function(numbuys = NULL, numsells = NULL,
                         factorization = "Lin_Ke",
                         init_vals = NULL, lower = rep(0,5), upper = c(1,1,rep(Inf,3)),
                         num_best_res = 1, only_converged = TRUE,
                         nlminb_control = list(),
                         confint = FALSE, ci_control = list(), posterior = TRUE) {
  if(is.null(init_vals)) stop("No initial values provided!")
  if(is.null(lower) || is.null(upper)) stop("Lower or upper bounds missing!")
  if(length(numbuys) != length(numsells)) stop("Unequal lengths for 'numbuys' and 'numsells'")

  factr <- match.arg(factorization, choices = c("Lin_Ke", "EHO"))

  mat <- matrix(data = NA, nrow = nrow(init_vals), ncol = ncol(init_vals) + 4)
  colnames(mat) <- c(colnames(init_vals), "loglike", "PIN", "Convergence", "Iterations")

  opt_message <- numeric(nrow(init_vals))

  fn <- function(x) pin_ll(param = x,
                           numbuys = numbuys, numsells = numsells,
                           factorization = factr)

  par_names <- c("alpha", "delta", "epsilon_b", "epsilon_s", "mu")

  if(nrow(mat) == 1) num_best_res <- 1

  ci_con <- list(n = 10000, seed = NULL, level = 0.95, ncores = 1)
  names_ci <- names(ci_con)

  ci_con[(nam_ci <- names(ci_control))] <- ci_control

  if (length(noNms <- nam_ci[!nam_ci %in% names_ci]))
    warning("unknown names in control: ", paste(noNms, collapse = ", "))
  if(ci_con$ncores < 1)
    stop("Set valid number of cpu cores for 'ncores'")

  nlminb_con <- list(eval.max = 1000,
                     iter.max = 500,
                     trace = 0,
                     abs.tol = 0,
                     rel.tol = 1e-10,
                     x.tol = 1.5e-8,
                     xf.tol = 2.2e-14,
                     step.min = 1, step.max = 1,
                     sing.tol = 1e-10)
  names_nlminb <- names(nlminb_con)
  nlminb_con[(nam_nlminb <- names(nlminb_control))] <- nlminb_control

  if (length(noNms_nlminb <- nam_nlminb[!nam_nlminb %in% names_nlminb]))
    warning("unknown names in control: ", paste(noNms_nlminb, collapse = ", "))

  for(i in 1:nrow(mat)) {
    tmp <- nlminb(start = init_vals[i,], objective = function(x) -fn(x),
                  lower = lower, upper = upper,
                  control = nlminb_con)

    mat[i, par_names] <- tmp$par
    mat[i,"loglike"] <- -tmp$objective
    mat[i,"PIN"] <- pin_calc(param = tmp$par)
    mat[i,"Convergence"] <- as.integer(tmp$convergence)
    mat[i,"Iterations"] <- as.integer(tmp$iterations)
    opt_message[i] <- tmp$message
  }
  if(nrow(mat) > 1) {
    mat <- mat[order(mat[,"loglike"], decreasing = TRUE),]
    start_vals <- init_vals[order(mat[,"loglike"], decreasing = TRUE),]
    if(only_converged) {
      # Convergence code 0 means succesful optimization
      converged <- as.logical(!mat[,"Convergence"])
      mat <- mat[converged,]
      start_vals <- start_vals[converged,]
    }
  }
  else start_vals <- matrix(init_vals[1,], nrow = 1)
  # num_best_res sets the number of returned parameter sets
  if(num_best_res == "all") num_best_res <- nrow(mat)

  if(num_best_res > 1) {
    mat_list <- vector("list", num_best_res)
    names(mat_list) <- paste0("Best", 1:num_best_res)
    for(i in 1:num_best_res) {
      mat_list[[paste0("Best",i)]] <- vector("list", 7)
      names(mat_list[[paste0("Best",i)]]) <- c("Results",
                                               "ll",
                                               "pin",
                                               "conv",
                                               "message",
                                               "iterations",
                                               "init_vals")
      if(confint) mat_list[[paste0("Best",i)]][["confint"]] <- numeric(2)

      tmp_res <- summary_car(param = mat[i,par_names],
                             numbuys = numbuys, numsells = numsells,
                             factorization = factorization,
                             lower = lower, upper = upper)
      if(!is.null(tmp_res)) {
        mat_list[[paste0("Best",i)]][["Results"]] <- tmp_res
      } else {
        mat_list[[paste0("Best",i)]][["Results"]] <- matrix(data = NA, ncol = 4, nrow = 5)
        colnames(mat_list[[paste0("Best",i)]][["Results"]]) <- c("Estimate", "Std. error", "t value", "Pr(> t)")
        rownames(mat_list[[paste0("Best",i)]][["Results"]]) <- c("alpha", "delta", "epsilon_b", "epsilon_s", "mu")
        mat_list[[paste0("Best",i)]][["Results"]][,"Estimate"] <- mat[i,par_names]
      }

      mat_list[[paste0("Best",i)]][["ll"]] <- mat[i,"loglike"]
      mat_list[[paste0("Best",i)]][["pin"]] <- mat[i,"PIN"]
      mat_list[[paste0("Best",i)]][["conv"]] <- mat[i,"Convergence"]
      mat_list[[paste0("Best",i)]][["message"]] <- opt_message[i]
      mat_list[[paste0("Best",i)]][["iterations"]] <- mat[i,"Iterations"]
      mat_list[[paste0("Best",i)]][["init_vals"]] <- start_vals[i,]
      if(confint) {
        mat_list[[paste0("Best",i)]][["confint"]] <- pin_confint(param = mat[i,par_names],
                                                                 numbuys = numbuys, numsells = numsells,
                                                                 lower = lower, upper = upper,
                                                                 n = ci_con$n, seed = ci_con$seed,
                                                                 level = ci_con$level, ncores = ci_con$ncores)
      }
      if(posterior) {
        mat_list[[paste0("Best",i)]][["posterior"]] <- posterior(param = mat[i,par_names],
                                                                 numbuys = numbuys, numsells = numsells)
      }
    }
  } else {
    mat_list <- vector("list", 7)
    names(mat_list) <- c("Results",
                         "ll",
                         "pin",
                         "conv",
                         "message",
                         "iterations",
                         "init_vals")
    if(confint) mat_list[["confint"]] <- numeric(2)

    tmp_res <- summary_car(param = mat[1,par_names],
                           numbuys = numbuys, numsells = numsells,
                           factorization = factorization,
                           lower = lower, upper = upper)
    if(!is.null(tmp_res)) {
      mat_list[["Results"]] <- tmp_res
    } else {
      mat_list[["Results"]] <- matrix(data = NA, ncol = 4, nrow = 5)
      colnames(mat_list[["Results"]]) <- c("Estimate", "Std. error", "t value", "Pr(> t)")
      rownames(mat_list[["Results"]]) <- c("alpha", "delta", "epsilon_b", "epsilon_s", "mu")
      mat_list[["Results"]][,"Estimate"] <- mat[i,par_names]
    }

    mat_list[["ll"]] <- mat[1,"loglike"]
    mat_list[["pin"]] <- mat[1,"PIN"]
    mat_list[["conv"]] <- mat[1,"Convergence"]
    mat_list[["message"]] <- opt_message[1]
    mat_list[["iterations"]] <- mat[1,"Iterations"]
    mat_list[["init_vals"]] <- start_vals[1,]
    names(mat_list[["init_vals"]]) <- c("alpha", "delta", "epsilon_b", "epsilon_s", "mu")
    if(confint) {
      mat_list[["confint"]] <- pin_confint(param = mat[1,par_names],
                                           numbuys = numbuys, numsells = numsells,
                                           lower = lower, upper = upper,
                                           n = ci_con$n, seed = ci_con$seed,
                                           level = ci_con$level, ncores = ci_con$ncores)
    }
    if(posterior) {
      mat_list[["posterior"]] <- posterior(param = mat[1,par_names],
                                           numbuys = numbuys, numsells = numsells)
    }
  }
  mat_list
}
anre005/pinbasic documentation built on May 6, 2022, 4:40 a.m.