trexreg: Truncated Expectation Regression

#' Upper-Truncated Expectation Regression
#'
#' TBA
#'
#' @export
uter <- function(...) UseMethod('uter')

uter.default <- function(...) {
  stop('This method should not be called directly.
       Use the formula interface instead.')
}

uter.formula <- function(formula, data = parent.frame(), uq, early_stopping=10, ...) {
  chkDots(...)
  if(is.matrix(data)) {
    data <- as.data.frame(data)
  }

  # Preparation steps
  formula <- Formula::Formula(formula)
  formula <- uter.check_formula(formula)
  mf <- stats::model.frame(formula = formula, data = data, na.action = stats::na.pass)
  terms <- attr(mf, 'terms')

  # Extract data
  xm <- stats::model.matrix(formula, mf, rhs=1)
  xu <- stats::model.matrix(formula, mf, rhs=2)
  y <- stats::model.response(mf)

  # Fit model
  fit <- uter.fit(xm = xm, xu = xu, y = y,
                  uq = uq, early_stopping = early_stopping)

  # Store multiple objects
  fit$formula <- formula
  fit$call <- match.call()
  fit$terms <- terms
  fit$xlevels <- stats::.getXlevels(terms, mf)
  fit$data <- data
  fit$y <- y
  fit$xm <- xm
  fit$xu <- xu
  fit$lq <- 0
  fit$uq <- uq

  fit
}

#' @export
uter.fit <- function(xm, xu, y, uq, early_stopping, ...) {
  chkDots(...)
  xm <- check.x(xm)
  xu <- check.x(xu)
  y <- check.y(xm, y)
  fit <- do.uter.fit(xm = xm, xu = xu, y = y,
                     uq = uq, early_stopping = early_stopping)
  class(fit) <- c('ter', 'uter')

  fit
}

do.uter.fit <- function(xm, xu, y, uq, early_stopping) {
  # Transform the dependent variable
  max_y <- max(y)
  y <- y - max_y

  # Get starting values
  par0 <- uter.starting_values(xm = xm, xu = xu, y = y, uq = uq)

  # Optimize the model
  fun <- function(par) uter.loss(par = par, xm = xm, xu = xu, y = y, uq = uq)
  fit <- try(stats::optim(par = par0, fn = fun, method = 'Nelder-Mead'), silent = FALSE)

  # Counts the iterations without decrease of the loss
  counter <- 0

  while (counter < early_stopping) {
    # Perturbe b
    k1 <- ncol(xm)
    k2 <- ncol(xu)
    part <- fit$par + stats::rnorm(k1+k2, sd=1)

    # Ensure that x'be < 0 by moving the es intercept
    max_xe <- max(xu %*% part[1:k1])
    part[1] <- part[1] - (max_xe + 0.1) * (max_xe > 0)

    # Fit the model with the perturbed parameters
    tmp_fit <- try(stats::optim(par = part, fn = fun, method = 'Nelder-Mead'), silent = FALSE)

    # Replace the fit if the new loss is smaller than the old. Otherwise increase the counter.
    if (!inherits(tmp_fit, 'try-error')) {
      if (tmp_fit$value < fit$value) {
        fit <- tmp_fit
        counter <- 0
      } else {
        counter <- counter + 1
      }
    }
  }

  # Store relevant parameters
  par <- fit$par
  value <- fit$value

  # Undo the transformation
  y <- y + max_y
  par[1] <- par[1] + max_y
  par[k1+1] <- par[k1+1] + max_y
  par0[1] <- par0[1] + max_y
  par0[k1+1] <- par0[k1+1] + max_y

  coef <- par[(k1+1):(k1+k2)]
  #fitted.values <- as.numeric(x1 %*% par[(k1+1):(k1+k2)])
  fitted.values <- 0
  loss <- value

  # Retun results
  list(
    coefficients  = list(
      coef_mean=par[(k1+1):(k1+k2)],
      coef_lq=NA,
      coef_uq=par[1:k1]
    ),
    fitted.values = fitted.values,
    loss          = loss
  )
}

#' @title lter starting values
#' @description TBA
#' @inheritParams lter.fit
#' @return TBA
#' @keywords internal
#' @export
uter.starting_values <- function(xm, xu, y, uq) {
  # First stage: quantile regression
  fit_rq <- quantreg::rq(y ~ xu-1, tau = uq)
  q <- fit_rq$fitted.values
  par_q <- fit_rq$coefficients

  # Second stage: weighted least squares regression
  fit <- stats::lm(y ~ xm-1, weights = (y <= q) * 1)
  par_m <- fit$coefficients

  # Stack coefficients and return them
  unname(c(par_q, par_m))
}

vcov.uter <- function(object, ...) {

  mb <- ter.lin_mod(par = object$coefficients$coef_mean, x = object$xm)
  qb <- ter.lin_mod(par = object$coefficients$coef_uq, x = object$xu)

  gmb <- cbind(
    object$xm,
    matrix(0, nrow(object$xu), ncol(object$xu))
  )

  gqb <- cbind(
    matrix(0, nrow(object$xm), ncol(object$xm)),
    object$xu
  )

  p <- psi_e(y = object$y, qb = qb, gqb = gqb,
             mb = mb, gmb = gmb, uq = object$uq, return_mean = FALSE)

  n <- nrow(p)
  c <- n^{-1/3}
  Sigma <- crossprod(p) / n

  idx <- (abs(object$y - qb) < c) * 1

  k <- ncol(object$xm) + ncol(object$xu)
  uq <- object$uq

  Lambda <- matrix(0, k, k)
  for (i in 1:n) {
    tmp <- 1/(2*c) * idx[i] * tcrossprod(gqb[i,]) * (-1/mb[i]) / uq + tcrossprod(gmb[i,]) * 1/mb[i]^2
    Lambda <- Lambda + tmp
  }
  Lambda <- Lambda / n
  vcov <- solve(Lambda) %*% Sigma %*% solve(Lambda) / n
  # colnames(vcov) <- rownames(vcov) <- c(colnames(object$xm),
  #                                       colnames(object$xu))

  vcov
}