Nothing
#' Linear Model by Maximum Likelihood (with optional heteroskedasticity)
#'
#' @title Linear Regression via MLE
#'
#' @description
#' \code{lm.mle} fits a linear regression model by maximum likelihood, allowing
#' for optional multiplicative heteroskedasticity in the disturbance variance via
#' a log-linear specification provided through \code{ln.var.v}.
#'
#' @param formula
#' an object of class \code{formula} specifying the regression:
#' typically \code{y ~ x1 + ...}, where \code{y} is the dependent variable and
#' the \code{x}'s are regressors.
#'
#' @param data
#' an optional \code{data.frame} containing the variables referenced in
#' \code{formula}. If not found in \code{data}, variables are taken from
#' \code{environment(formula)}.
#'
#' @param subset
#' an optional logical or numeric vector specifying the subset of observations
#' to be used in estimation.
#'
#' @param ln.var.v
#' optional one-sided formula; e.g. \code{ln.var.v ~ z1 + z2}. When provided,
#' the error variance is modeled as \eqn{\log(\sigma_i^2) = w_i^\top \gamma_v}.
#' If \code{NULL}, the variance is homoskedastic.
#'
#' @param technique
#' character vector specifying the preferred optimization routine(s) in order of
#' preference. Recognized keywords (for future implementation) include \code{"bfgs"}
#' \code{"bhhh"}, \code{"nm"} (Nelder–Mead), \code{"bfgs"},
#' and \code{"cg"}. Default is \code{"bfgs"}. This scaffold records but does not
#' execute the chosen routine.
#'
#' @param lmtol
#' numeric. Convergence tolerance based on scaled gradient (when applicable).
#' Default \code{1e-5}.
#'
#' @param reltol
#' numeric. Relative convergence tolerance for likelihood maximization.
#' Default \code{1e-12}.
#'
#' @param maxit
#' integer. Maximum number of iterations for the optimizer. Default \code{199}.
#'
#' @param optim.report
#' integer. Verbosity level for reporting progress (if implemented). Default \code{1}.
#'
#' @param optim.trace
#' integer. Trace level for optimization (if implemented). Default \code{1}.
#'
#' @param print.level
#' integer. Printing level for summaries. Default \code{0}.
#'
#' @param digits
#' integer. Number of digits for printing. Default \code{4}.
#'
#'
#' @param only.data
#' logical. If \code{TRUE}, returns only constructed data/matrices without
#' estimation. Default \code{FALSE}.
#'
#' @param ...
#' additional arguments reserved for future methods (e.g., bounds, penalties).
#'
#' @details
#' This function fits a maximum-likelihood linear model.
#'
#' The model is
#' \deqn{y_i = x_i^\top \beta + \varepsilon_i,\quad \varepsilon_i \sim \mathcal{N}(0, \sigma_i^2).}
#' When \code{ln.var.v} is supplied, the variance follows
#' \deqn{\log(\sigma_i^2) = w_i^\top \gamma_v,}
#' otherwise \eqn{\sigma_i^2 = \sigma^2} is constant (homoskedastic).
#'
#' This function:
#' \itemize{
#' \item Builds the model frame and \code{X}, \code{y}.
#' \item Builds \code{Zv} for the log-variance index when \code{ln.var.v} is provided.
#' \item Returns a structured object with placeholders for \code{coef}, \code{vcov}, \code{loglik}.
#' }
#' Insert your MLE engine to estimate \eqn{\beta}, and (optionally) \eqn{\sigma^2} or
#' \eqn{\gamma_v}; compute standard errors via AIM/OPG as required by \code{vcetype}.
#'
#' @return A list of class \code{"snreg"} containing:
#' \item{\code{par}}{Numeric vector of MLE parameter estimates.}
#' \item{\code{value}}{Maximized log-likelihood.}
#' \item{\code{ll}}{Maximized log-likelihood (alias).}
#' \item{\code{counts}}{Number of function evaluations (from \code{optim}).}
#' \item{\code{convergence}}{Convergence code from \code{optim}.}
#' \item{\code{message}}{Message returned by \code{optim}.}
#' \item{\code{hessian}}{Observed Hessian matrix at optimum.}
#' \item{\code{coef}}{Named coefficient vector; equal to \code{par}.}
#' \item{\code{vcov}}{Variance–covariance matrix \code{solve(-hessian)}.}
#' \item{\code{sds}}{Standard errors: \code{sqrt(diag(vcov))}.}
#' \item{\code{ctab}}{Coefficient table with columns: \code{Estimate},
#' \code{Std.Err}, \code{Z value}, \code{Pr(>z)}.}
#' \item{\code{esample}}{Logical vector: observations used in estimation.}
#' \item{\code{n}}{Number of observations in estimation sample.}
#'
#' The object inherits the default
#' \code{optim} components and is assigned class \code{"snreg"}.
#'
#'
#' @seealso
#' \code{\link{snsf}}, \code{\link{snreg}}
#'
#' @examples
#'
#' library(snreg)
#'
#' data("banks07")
#' head(banks07)
#'
#' # Translog cost function specification
#'
#' spe.tl <- log(TC) ~ (log(Y1) + log(Y2) + log(W1) + log(W2))^2 +
#' I(0.5 * log(Y1)^2) + I(0.5 * log(Y2)^2) +
#' I(0.5 * log(W1)^2) + I(0.5 * log(W2)^2)
#'
#' # Specification 1: homoskedastic noise (ln.var.v = NULL)
#'
#' formSV <- NULL # variance equation; constant variance
#'
#' m1 <- lm.mle(
#' formula = spe.tl,
#' data = banks07,
#' ln.var.v = formSV
#' )
#'
#' summary(m1)
#'
#' # Specification 2: heteroskedastic
#'
#' formSV <- ~ log(TA) # variance equation; heteroskedastic noise (variance depends on TA)
#'
#' m2 <- lm.mle(
#' formula = spe.tl,
#' data = banks07,
#' ln.var.v = formSV
#' )
#'
#' summary(m2)
#'
#' @keywords regression maximum-likelihood heteroskedasticity
#' @export
lm.mle <- function (
formula, data, subset,
ln.var.v = NULL,
technique = c('bfgs'),#,'bhhh','nm', 'bfgs', 'cg'),
lmtol = 1e-5,
reltol = 1e-12,
maxit = 199,
optim.report = 1,
optim.trace = 10,
print.level = 0,
digits = 4,
only.data = FALSE,
...){
# handle technique
technique <- technique[1]
if( !technique %in% c('nr','bhhh','nm', 'bfgs','cg') ){
stop("'technique' is invalid")
}
if(technique == 'nm') technique <- 'Nelder-Mead'
if(technique == 'bfgs') technique <- 'BFGS'
if(technique == 'cg') technique <- 'CG'
mf0 <- match.call(expand.dots = FALSE, call = sys.call(sys.parent(n = 0)))
if(is.null(ln.var.v)){
ln.var.v <- ~ 1
ksv <- 1
# cat.print(ksv)
} else {
ksv <- 17
}
form1 <- as.Formula(formula, ln.var.v)
data.order <- as.numeric(rownames(data)) # seq_len(nrow(data))
mf <- mf0
mf$formula <- form1 #formula( form )
# cat("05\n")
m <- match(c("formula", "data", "subset"), names(mf), 0L)
# cat("06\n")
# cat.print(m)
mf <- mf[c(1L, m)]
# cat("07\n")
# cat.print(mf)
mf[[1L]] <- as.name("model.frame")
# cat("08\n")
# cat.print(mf)
# cat.print(needed.frame)
# mf <- eval(mf, sys.frame(sys.parent(n = needed.frame+2)))
mf <- eval(mf, parent.frame())
# cat.print(mf)
# cat("09\n")
# cat.print(mf)
mt <- attr(mf, "terms")
x <- as.matrix(model.matrix(mt, mf))
# print(x)
# now get the names in the entire data
# esample <- seq_len( nrow(data) ) %in% as.numeric(rownames(x))
esample <- rownames(data) %in% rownames(x)
# cat("10\n")
# cat.print(mt)
X <- model.matrix(mt, mf)
# cat("11\n")
# cat.print(X)
esample.nu <- as.numeric(rownames(X))
esample <- data.order %in% esample.nu
Y <- as.matrix(model.part(form1, data = mf, lhs = 1, drop = FALSE))
# print(length(Y))
# print(Y)
X <- as.matrix( model.matrix(formula(form1, lhs = 0, rhs = 1), data = mf))
# print(X)
n <- nrow(Y)
k <- ncol(X)
# ksv: noise, scale/variance
# zsv
if(ksv == 1){
zsv <- matrix(1, nrow = sum(esample), ncol = 1)
}
else {
zsv <- as.matrix( model.matrix(formula(form1, lhs = 0, rhs = 2), data = mf))
ksv <- ncol(zsv)
}
# from `sf`
colnames(X)[1] <- "(Intercept)"
colnames(zsv)[1] <- "(Intercept)"
abbr.length <- 34
names_x <- abbreviate(colnames(X), abbr.length+6, strict = TRUE, dot = FALSE, method = "both.sides")
# names_zu <- abbreviate(colnames(Zu), 9, strict = TRUE, dot = FALSE)
# names_zv <- abbreviate(colnames(Zv), 9, strict = TRUE, dot = FALSE)
# Zu_colnames <- abbreviate(colnames(Zu), abbr.length, strict = TRUE, dot = FALSE, method = "both.sides")
names_zv <- paste0("lnVARv0i_", abbreviate(colnames(zsv), abbr.length, strict = TRUE, dot = FALSE, method = "both.sides"))
coef.names.full <- c(
names_x,
paste("lnVARv0i_",c("(Intercept)", names_zv[-1]),"", sep = "")
)
# cat.print(coef.names.full)
coef.names.full <- c(
names_x, names_zv
)
# starting values ---------------------------------------------------------
time.05 <- proc.time()
tymch2 <- lm(Y ~ X - 1)
olsres <- resid(tymch2)
if(ksv==1){
sv.initial <- log(mean(olsres^2))
} else {
sv.initial <- c(log(mean(olsres^2)), rep(0,ksv-1))
}
theta0 <- c( coef(tymch2), sv.initial)
names(theta0) <- coef.names.full
if(print.level >= 3){ cat.print(theta0) }
if(print.level >= 2){
max.name.length <- max(nchar(names(theta0) ))
est.rez.left <- floor( (max.name.length+42-33) / 2 )
est.rez.right <- max.name.length+42-33 - est.rez.left
cat("\n",rep("-", est.rez.left)," Regression with normal errors: ",rep("-", est.rez.right),"\n\n", sep ="")
}
# mytrace <- ifelse(print.level < 2, 0, 1)
if(print.level <= 2){
trace <- trace1 <- 0
} else {
trace1 <- optim.trace
}
tymch <- optim(
par = theta0, fn = .ll.lm.mle, gr = .gr.lm.mle, y = Y, x = X, zsv = zsv, k = k, ksv = ksv,
method = technique, control = list(fnscale = -1, trace = trace1, REPORT = optim.report, maxit = 10000),
hessian = TRUE)
tymch$gradient <- .gr.lm.mle(theta=tymch$par, y = Y, x = X, zsv = zsv, k = k, ksv = ksv)
tymch$coef <- tymch$par
tymch$resid <- Y - as.vector( X %*% tymch$par[1:ncol(X)] )
# tymch$vcov <- tryCatch(solve(-tymch$hessian), tol = .Machine$double.xmin * 10, error = function(e) e )
# Compute vcov with tryCatch
tymch$vcov <- tryCatch(
solve(-tymch$hessian),
error = function(e) e,
tol = .Machine$double.xmin * 10
)
# If no error, compute gHg
if (!inherits(tymch$vcov, "error")) {
# print(tymch$gradient)
# print(tymch$hessian)
tymch$gHg <- tymch$gradient %*% tymch$vcov %*% tymch$gradient
tymch$lmtol <- lmtol
}
# cat.print(tymch1)
# table with coefs
# tymch$par <- c(tymch$par, sv = unname(sqrt(exp(tymch$par[k+1]))) )
# tymch$sds <- suppressWarnings( sqrt(diag(solve(-tymch$hessian))) )
# tymch$sds <- c(tymch$sds, tymch$sds[k+1]*exp(tymch$par[k+1]))
tymch$sds <- suppressWarnings( sqrt(diag(solve(-tymch$hessian))) )
time.06 <- proc.time()
est.time.sec <- (time.06-time.05)[3]
names(est.time.sec) <- "sec"
if(print.level >= 2){
.timing(est.time.sec, "\nLog likelihood maximization completed in\n")
# cat("___________________________________________________\n")
}
tymch$ctab <- cbind(tymch$par, tymch$sds, tymch$par/tymch$sds,2*(1-pnorm(abs(-tymch$par/tymch$sds))))
colnames(tymch$ctab) <- c("Estimate", "Std.Err", "Z value", "Pr(>z)")
if(print.level >= 1){
printCoefmat(tymch$ctab)
cat("",rep("_", max.name.length+42-1),"", "\n", sep = "")
}
# cat.print(tymch1)
# return(tymch1)
# cat('Auxiliary parameters \n')
# Auxiliary parameters
# tymch $ resid <- as.vector(unname(eps0))
# shat2 <- var( tymch$resid )
# tymch $ shat2 <- shat2
# tymch $ shat <- sqrt(shat2)
# tymch $ RSS <- crossprod(eps0)
# tymch $ aic <- log((n-1)/n*shat2)+1+2*(k.all+1)/n
# tymch $ bic <- log((n-1)/n*shat2)+1+(k.all+1)*log(n)/n
# tymch $ aic <- 2*k.all - 2*tymch$ll
# tymch $ bic <- log(n)*k.all - 2*tymch$ll
# tymch $ Mallows <- tymch $ RSS/shat2 - n + 2*k.all
# tymch $ coef <- tymch$par
tymch $ esample <- esample
tymch$ll <- unname(tymch$value)
# tymch $ sv <- as.vector(unname(sv))
tymch $ n <- n
tymch $ K <- ncol(X)
tymch $ Ksv <- ksv
# if(distribution == "e"){
# tymch$ su <- 1/as.vector(unname(lam))
# } else {
# tymch$ su <- unname(su)
# }
# tymch$ skewness <- as.vector(unname(zsk %*% tymch$par[(k + ksv + 1) :(k + ksv + ksk)]))
# tymch$ u <- as.vector(unname(u))
# tymch$ eff <- exp(-tymch$ u)
# cat.print(tymch$coef)
# cat.print(tymch$par)
# cat.print(tymch$vcov)
# cat.print(coef.names.full)
tymch$esttime <- est.time.sec
names(tymch$coef) <- names(tymch$par) <- colnames(tymch$vcov) <- rownames(tymch$vcov) <-
coef.names.full
class(tymch) <- c("snreg","lm.mle")
return(tymch)
}
#
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.