Nothing
#' Fitting Ordinal Probit Switching Regression Models
#'
#' High-level formula interface to the workhorse [`opsr.fit`].
#'
#' @param formula an object of class `"Formula" "formula"`: A symbolic description
#' of the model to be fitted. The details of model specification are given under
#' 'Details'.
#' @param data an optional data frame, list or environment (or object coercible by
#' [`as.data.frame`] to a data frame) containing the variables in the model. If
#' not found in `data`, the variables are taken from `environment(formula)`,
#' typically the environment from which `opsr` is called.
#' @param subset an optional vector specifying a subset of observations to be used
#' in the fitting process. (See additional details in the 'Details' section of
#' the [`model.frame`] documentation.).
#' @param weights an optional vector of weights to be used in the fitting process.
#' Should be `NULL` or a numeric vector. If non-NULL, then observation-specific
#' log-likelihood contributions are multiplied by their corresponding weight
#' before summing.
#' @param na.action a function which indicates what should happen when the data
#' contain `NA`s. The default is set by the `na.action` setting of [`options`],
#' and is [`na.fail`] if that is unset. The 'factory-fresh' default is [`na.omit`].
#' Another possible value is `NULL`, no action. Value [`na.exclude`] can be useful.
#' @param start a numeric vector with the starting values (passed to [`maxLik::maxLik`]).
#' If no starting values are provided, reasonable values are auto-generated via
#' the Heckman 2-step procedure [`opsr_2step`]. The structure of `start` has to
#' conform with `opsr`'s expectations. See [`opsr_check_start`] for further details.
#' @param fixed parameters to be treated as constants at their `start` values. If
#' present, it is treated as an index vector of `start` parameters (passed to
#' [`maxLik::maxLik`]).
#' @param method maximzation method (passed to [`maxLik::maxLik`]).
#' @param iterlim maximum number of iterations (passed to [`maxLik::maxLik`]).
#' @param printLevel larger number prints more working information (passed to [`maxLik::maxLik`]).
#' @param nThreads number of threads to be used. Do not pass higher number than
#' number of ordinal outcomes. See also [`opsr_check_omp`] and [`opsr_max_threads`].
#' @param .get2step if `TRUE`, returns starting values as generated by [`opsr_2step`]. Will
#' not proceed with the maximum likelihood estimation.
#' @param .useR if `TRUE` usese [`loglik_R`]. Go grab a coffe.
#' @param .censorRho if `TRUE`, rho starting values are censored to lie in the
#' interval \[-0.85, 0.85\].
#' @param ... further arguments passed to [`maxLik::maxLik`].
#'
#' @return An object of class `"opsr" "maxLik" "maxim"`.
#'
#' @details
#' Models for `opsr` are specified symbolically. A typical model has the form
#' `ys | yo ~ terms_s | terms_o1 | terms_o2 | ...`. `ys` is the ordered (numeric)
#' response vector (starting from 1, in integer-increasing fashion). For the `terms`
#' specification the rules of the regular formula interface apply (see also [stats::lm]).
#' The intercept in the `terms_s` (selection process) is excluded automatically
#' (no need to specify `-1`). If the user wants to specify the same process for
#' all continuous outcomes, two processes are enough (`ys | yo ~ terms_s | terms_o`).
#' Note that the model is poorly identifiable if `terms_s == terms_o` (same regressors
#' are used in selection and outcome processes).
#'
#' @example R/examples/ex-basic_workflow.R
#'
#' @export
opsr <- function(formula, data, subset, weights, na.action, start = NULL,
fixed = NULL, method = "BFGS", iterlim = 1000, printLevel = 2,
nThreads = 1, .get2step = FALSE, .useR = FALSE, .censorRho = TRUE, ...) {
start_time <- Sys.time()
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data", "subset", "weights", "na.action"), names(mf), 0)
mf <- mf[c(1, m)]
f <- Formula::Formula(formula)
mf[[1]] <- as.name("model.frame")
mf$formula <- f
mf <- eval(mf, parent.frame())
## prep args for opsr.fit()
l <- length(f)
nRes <- l[1]
if (nRes != 2) {
stop("formula accepts two responses (selection and continuous outcome).",
" However, ", nRes, " were specified.")
}
nParts <- l[2]
Z <- Formula::model.part(f, data = mf, lhs = 1, drop = TRUE)
if (is.factor(Z)) {
stop("Selection outcome has to be numeric (and not a 'factor').")
}
Y <- Formula::model.part(f, data = mf, lhs = 2, drop = TRUE)
nReg <- length(unique(Z))
nObs <- length(Y)
if (any(sort(unique(Z)) != 1:max(Z))) {
stop("Selection outcome must be ordered starting from 1 in increasing fashion",
" without any gaps. However, unique levels are ", unique(Z))
}
if (nParts != 2 && nParts != nReg + 1) { # +1 for W (selection)
stop("formula parts must be of length ", nReg + 1, " or 2 (if the same",
" specification is used for all continuous outcomes. However, ", nParts,
" were specified.")
}
w <- as.vector(model.weights(mf))
if (!is.null(w) && !is.numeric(w)) {
stop("'weights' must be a numeric vector")
}
if (is.null(w)) {
w <- rep(1, length(Y))
}
## reorder weights to match with shuffling in opsr.fit() where we compute
## likelihood values for all elements Z == 1, then Z == 2, etc. and then
## stack them
weights <- w # keep a copy to attach to output
w <- w[order(Z)]
W <- model.matrix(update(f, ~ . -1), mf, rhs = 1) # no intercept (identification threshold)!
Ws <- lapply(seq_len(nReg), function(i) {
as.matrix(W[Z == i, ])
})
Xs <- lapply(seq_len(nReg), function(i) {
## if the same outcome equation applies
rhs <- ifelse(nParts == 2, 2, i + 1) # first is for selection process
X <- model.matrix(f, mf, rhs = rhs)
x_mat <- as.matrix(X[Z == i, ])
intercept_only <- length(attr(stats::terms(f, rhs = rhs), "term.labels")) == 2
if (intercept_only) colnames(x_mat) <- "(Intercept)"
x_mat
})
Ys <- lapply(seq_len(nReg), function(i) {
Y[Z == i]
})
if (.get2step) {
return(opsr_2step(W, Xs, Z, Ys))
}
## check or generate starting values (theta)
if (!is.null(start)) {
start <- opsr_check_start(start, W, Xs)
} else {
start <- opsr_2step(W, Xs, Z, Ys)
## censor rho to [-0.85, 0.85]
if (.censorRho) {
rho <- grepl("^rho", names(start))
start[rho] <- censor(start[rho], lower = -0.85, upper = 0.85)
}
}
fit <- opsr.fit(Ws, Xs, Ys, start, fixed, w,
method, iterlim, printLevel, nThreads, .useR, ...)
runtime <- Sys.time() - start_time
## return also some other useful information
fit$call <- match.call()
fit$formula <- f
fit$loglik <- function(theta) fit$objectiveFn(theta)[order(Z)] ## ll func with correct order
fit$runtime <- runtime
fit$start <- start
fit$nReg <- nReg
fit$nObs <- c(Total = nObs, stats::setNames(c(table(Z)), paste0("o", seq_len(nReg))))
fit$nParams <- length(fit$estimate) - sum(fit$fixed)
fit$df <- fit$nObs[["Total"]] - fit$nParams
fit$nParts <- nParts
fit$weights <- weights
class(fit) <- c("opsr", class(fit))
fit
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.