trexreg: Truncated Expectation Regression

#' Double-Truncated Expectation Regression
#'
#' TBA
#'
#' @export
dter <- function(...) UseMethod('dter')

dter.default <- function(...) {
  stop('This method should not be called directly.
       Use the formula interface instead.')
}

dter.formula <- function(formula, data = parent.frame(), lq, uq, early_stopping = 10, ...) {
  chkDots(...)
  if(is.matrix(data)) {
    data <- as.data.frame(data)
  }

  # Preparation steps
  formula <- Formula::Formula(formula)
  formula <- dter.check_formula(formula)
  mf <- stats::model.frame(formula = formula, data = data, na.action = stats::na.pass)
  terms <- attr(mf, 'terms')

  # Extract data
  xm <- stats::model.matrix(formula, mf, rhs=1)
  xl <- stats::model.matrix(formula, mf, rhs=2)
  xu <- stats::model.matrix(formula, mf, rhs=3)
  y <- stats::model.response(mf)

  # Fit model
  fit <- dter.fit(xm = xm, xl = xl, xu = xu, y = y,
                  lq = lq, uq = uq, early_stopping = early_stopping)

  # Store multiple objects
  fit$formula <- formula
  fit$call <- match.call()
  fit$terms <- terms
  fit$xlevels <- stats::.getXlevels(terms, mf)
  fit$data <- data
  fit$y <- y
  fit$xm <- xm
  fit$xl <- xl
  fit$xu <- xu
  fit$lq <- lq
  fit$uq <- uq

  fit
}

#' @export
dter.fit <- function(xm, xl, xu, y, lq, uq, early_stopping, ...) {
  chkDots(...)
  xm <- check.x(xm)
  xl <- check.x(xl)
  xu <- check.x(xu)
  y <- check.y(xm, y)
  fit <- do.dter.fit(xm = xm, xl = xl, xu = xu, y = y,
                     lq = lq, uq = uq, early_stopping = early_stopping)
  class(fit) <- c('ter', 'dter')

  fit
}

do.dter.fit <- function(xm, xl, xu, y, lq, uq, early_stopping) {
  k1 <- ncol(xm)
  k2 <- ncol(xl)
  k3 <- ncol(xu)

  # Get starting values
  par0 <- dter.starting_values(xm = xm, xl = xl, xu = xu, y = y,
                               lq = lq, uq = uq)

  # Optimize the model
  fun <- function(par) dter.loss(par = par, xm = xm, xl = xl, xu = xu, y = y, lq = lq, uq = uq)
  fit <- try(stats::optim(par = par0, fn = fun, method = 'Nelder-Mead'), silent = FALSE)

  # Counts the iterations without decrease of the loss
  counter <- 0

  while (counter < early_stopping) {
    # Perturbe b
    part <- fit$par + stats::rnorm(length(fit$par), sd=1)

    # Fit the model with the perturbed parameters
    tmp_fit <- try(stats::optim(par = part, fn = fun, method = 'Nelder-Mead'),
                   silent = FALSE)

    # Replace the fit if the new loss is smaller than the old. Otherwise increase the counter.
    if (!inherits(tmp_fit, 'try-error')) {
      if (tmp_fit$value < fit$value) {
        fit <- tmp_fit
        counter <- 0
      } else {
        counter <- counter + 1
      }
    }
  }

  # Retun results
  list(
    coefficients  = list(
      coef_mean=fit$par[(k1+k2+1):(k1+k2+k3)],
      coef_lq=fit$par[1:k1],
      coef_uq=fit$par[(k1+1):(k1+k2)]
    ),
    fitted.values = 0,
    loss          = fit$value
  )
}

#' @title dter starting values
#' @description TBA
#' @inheritParams dter.fit
#' @return TBA
#' @keywords internal
#' @export
dter.starting_values <- function(y, xm, xl, xu, lq, uq) {
  # First stage: quantile regressions
  fit_rqa <- quantreg::rq(y ~ xl-1, tau = lq)
  fit_rqb <- quantreg::rq(y ~ xu-1, tau = uq)
  qa <- fit_rqa$fitted.values
  qb <- fit_rqb$fitted.values
  par_qa <- fit_rqa$coefficients
  par_qb <- fit_rqb$coefficients

  # Second stage: weighted least squares regression
  fit_m <- stats::lm(y ~ xm-1, weights = (qa <= y) * (y <= qb))
  par_m <- fit_m$coefficients

  # Stack coefficients and return them
  unname(c(par_qa, par_qb, par_m))
}

vcov.dter <- function(object, ...) {

  mab <- ter.lin_mod(par = object$coefficients$coef_mean, x = object$xm)
  qa <- ter.lin_mod(par = object$coefficients$coef_lq, x = object$xl)
  qb <- ter.lin_mod(par = object$coefficients$coef_uq, x = object$xu)

  gmab <- cbind(
    object$xm,
    matrix(0, nrow(object$xl), ncol(object$xl)),
    matrix(0, nrow(object$xu), ncol(object$xu))
  )

  gqa <- cbind(
    matrix(0, nrow(object$xm), ncol(object$xm)),
    object$xl,
    matrix(0, nrow(object$xu), ncol(object$xu))
  )

  gqb <- cbind(
    matrix(0, nrow(object$xm), ncol(object$xm)),
    matrix(0, nrow(object$xl), ncol(object$xl)),
    object$xu
  )

  p <- psi_m(y = object$y, qa = qa, qb = qb, mab = mab,
             gqa = gqa, gqb = gqb, gmab = gmab,
             lq = object$lq, uq = object$uq, return_mean = FALSE)

  n <- nrow(p)
  c <- n^{-1/3}
  Sigma <- crossprod(p) / n

  idx_lq <- (abs(object$y - qa) < c) * 1
  idx_uq <- (abs(object$y - qb) < c) * 1

  k <- ncol(object$xm) + ncol(object$xl) + ncol(object$xu)
  lq <- object$lq
  uq <- object$uq

  Lambda <- matrix(0, k, k)
  for (i in 1:n) {
    tmp <- 1/(2*c) * idx_lq[i] * tcrossprod(gqa[i,]) * (g1_prime(qa[i], lq, uq) - phi_prime(mab[i]) / (uq-lq)) +
      1/(2*c) * idx_uq[i] * tcrossprod(gqb[i,]) * (g2_prime(qb[i]) + phi_prime(mab[i]) / (uq-lq)) +
      tcrossprod(gmab[i,]) * phi_prime2(mab[i,])
    Lambda <- Lambda + tmp
  }
  Lambda <- Lambda / n
  vcov <- solve(Lambda) %*% Sigma %*% solve(Lambda) / n
  # colnames(vcov) <- rownames(vcov) <- c(colnames(object$xm),
  #                                       colnames(object$xl),
  #                                       colnames(object$xu))

  vcov
}