#' Support vector regression and inference under l1 regularization
#'
#' This function performs the support vector regression under l1
#' regularization. A regression rankscore test is performed to
#' conduct inference and construct confidence intervals.
#'
#' @param formula Regression formula, e.g., \code{y ~ x1 + x2}.
#' @param data Data frame.
#' @param epsilon Scalar defining soft-thresholding rule.
#' @param lambda Scalar parameter to scale l1 penalty.
#' @param h Scalar determining the quantiles of a standard normal
#' distribution used to define the bandwidth for nonparametric
#' density estimation. If no argument is provided, then errors are
#' assumed to be homoskedastic, and error density estimation is
#' not performed. See
#' \href{https://econpapers.repec.org/paper/attwimass/8818.htm}{Powell
#' (1991)} and
#' \href{https://yuehaob.github.io/assets/svml1.pdf}{Bai
#' et. al. (2021)} for details.
#' @param kappa Scalar that directly scales the bandwidth for
#' nonparametric density estimation. If no argument is provided,
#' then errors are assumed to be homoskedastic, and error density
#' estimation is not performed. See
#' \href{https://econpapers.repec.org/paper/attwimass/8818.htm}{Powell
#' (1991)} and
#' \href{https://yuehaob.github.io/assets/svml1.pdf}{Bai
#' et. al. (2021)} for details.
#' @param solver Character, name of the linear programming package in
#' R used to obtain the bounds on the treatment effect. The
#' function supports \code{'gurobi'}, \code{'cplexapi'},
#' \code{'lpsolveapi'}. The name of the solver should be provided
#' with quotation marks.
#' @param inference Boolean, determines whether or not p-values and
#' confidence intervals will be estimated. Set to \code{TRUE} by
#' default.
#' @param confidence.level Scalar between 0 and 1. Determines the
#' confidence level for estimating confidence intervals.
#' @param confidence.iter Integer determining maximum number of
#' iterations performed when estimating the confidence interval.
#' Set to 25 by default.
#' @param confidence.tol Scalar between 0 and 1. Determines the
#' tolerance for the confidence level when estimating the
#' confidence interval.
#' @param confidence.same Scalar between 0 and 1. Sets the tolerance
#' for determining when the confidence interval estimate may no
#' longer be improved.
#' @return A list containing the coefficient estimates from the
#' regression, the p-values, and the confidence intervals.
#' @export
l1svr <- function(formula, data, epsilon, lambda,
h = NULL, kappa = NULL,
solver = 'gurobi',
inference = TRUE,
confidence.level = NULL,
confidence.iter = 25,
confidence.tol = 1e-3,
confidence.same = 1e-06) {
origCall <- match.call()
## Check that the solver is valid
solver <- tolower(solver)
if (! solver %in% c("gurobi",
"cplexapi",
"lpsolveapi")) {
stop(gsub("\\s+", " ",
paste0("Estimator is incompatible with linear
programming package '", solver,"'. Please
install one of the
following linear programming packages instead:
gurobi (version 7.5-1 or later);
cplexAPI (version 1.3.3 or later);
lpSolveAPI (version 5.5.2.0 or later).")),
call. = FALSE)
}
## Check that the user actually has the solver
missingSolver <- FALSE
if (solver == 'gurobi') {
if (!requireNamespace("gurobi", quietly = TRUE)) {
missingSolver <- TRUE
}
}
if (solver == 'lpsolveapi') {
if (!requireNamespace("lpSolveAPI", quietly = TRUE)) {
missingSolver <- TRUE
}
}
if (solver == 'cplexapi') {
if (!requireNamespace("cplexAPI", quietly = TRUE)) {
missingSolver <- TRUE
}
}
if (missingSolver) {
stop(gsub("\\s+", " ",
paste0("The solver '", solver, "' is not installed.
Please install one of the following packages required
for estimation:
gurobi (version 7.5-1 or later);
cplexAPI (version 1.3.3 or later);
lpSolveAPI (version 5.5.2.0 or later).")),
call. = FALSE)
}
## Check that a response variable is included in the formula
tmpTerms <- terms(formula)
if (attr(tmpTerms, 'response') == 0) {
stop('Dependent variable not declared in formula.',
call. = FALSE)
}
## Check whether there is intercept
tmpInt <- as.logical(attr(tmpTerms, 'intercept'))
## Determine whether confidence intervals should be constructed
if (!is.null(confidence.level)) {
confidence <- TRUE
if (!is.numeric(confidence.level)) {
stop(gsub('\\s+', ' ',
"The argument 'confidence.level' must be numeric and
lie between 0 and 1."),
call. = FALSE)
}
if (confidence.level <= 0 | confidence.level >= 1) {
stop(gsub('\\s+', ' ',
"The argument 'confidence.level' must
lie between 0 and 1."),
call. = FALSE)
}
} else {
confidence <- FALSE
}
if (!confidence) {
if (hasArg(confidence.iter) |
hasArg(confidence.tol) |
hasArg(confidence.same)) {
warning(gsub('\\s+', ' ',
"To enable estimation of confidence intervals,
set 'confidence.level' to a value between 0 and 1.
Otherwise, the arguments 'confidence.iter',
'confidence.tol', and 'confidence.same' are
ignored."),
call. = FALSE)
}
}
## Determine heteroskedasticity. Heteroskedasticity is assumed as
## long as the user inputs an argument into either h or kappa.
if (!is.null(h) | !is.null(kappa)) {
heteroskedastic <- TRUE
} else {
heteroskedastic <- FALSE
}
if (heteroskedastic) {
if (is.null(h) | is.null(kappa)) {
stop(gsub('\\s+', ' ',
"If the errors are homoskedastic, then both arguments
'h' and 'kappa' must not be provided.
If the errors are heteroskedastic, then
both arguments 'h' and 'kappa' must be provided."),
call. = FALSE)
}
if (!(is.numeric(h) && h > 0) | !(is.numeric(kappa) && kappa > 0)) {
stop(gsub('\\s+', ' ',
"Arguments 'h' and 'kappa'
must be strictly positive scalars."),
call. = FALSE)
}
}
if (!inference && (heteroskedastic == TRUE |
confidence == TRUE)) {
warning(gsub('\\s+', ' ',
"Arguments 'h', 'kappa', and those beginning with
'confidence.' are all ignored if
'inference = FALSE'."),
call. = FALSE)
heteroskedastic <- FALSE
confidence <- FALSE
}
## Construct design matrix
tmpMf <- model.frame(formula = formula, data = data)
tmpMt <- terms(x = formula, data = data, rhs = 1)
fullX <- model.matrix(tmpMt, tmpMf)
n <- nrow(fullX)
Y <- as.vector(tmpMf[, all.vars(formula)[1]])
origNames <- colnames(fullX)
## Remove vector of constants/intercept from design matrix
if (tmpInt) {
X <- matrix(fullX[, -1], ncol = ncol(fullX) - 1)
colnames(X) <- origNames[-1]
} else {
X <- fullX
}
## Estimate coefficients
coefEstimates <- svmRegress(Y, X, epsilon, lambda, tmpInt, solver)
coefEstimates$N <- n
coefEstimates$call <- origCall
class(coefEstimates) <- 'l1svr'
## Generate output without inference
if (!inference) {
return(coefEstimates)
} else {
## Conduct inference
## Obtain residuals
U <- Y - fullX %*% coefEstimates$coef
## Conduct inference for each term
pVec <- NULL
for (i in 1:ncol(fullX)) {
if (tmpInt) {
if (i == 1) {
tmpP <- svmInference(Xr = X, Zr = rep(1, n), Y = Y, U = U,
epsilon = epsilon, lambda = lambda,
solver = solver,
intercept = FALSE,
nullGamma = 0,
heteroskedastic = heteroskedastic,
hc = h, kappa = kappa)
pVec <- c(pVec, tmpP)
} else {
Xr <- matrix(X[, -(i - 1)], ncol = ncol(X) - 1)
colnames(Xr) <- colnames(X)[-(i - 1)]
tmpP <- svmInference(Xr = Xr, Zr = X[, (i - 1)],
Y = Y, U = U,
epsilon = epsilon, lambda = lambda,
solver = solver,
intercept = TRUE, nullGamma = 0,
heteroskedastic = heteroskedastic,
hc = h, kappa = kappa)
pVec <- c(pVec, tmpP)
}
} else {
Xr <- matrix(X[, -i], ncol = ncol(X) - 1)
colnames(Xr) <- colnames(X)[-i]
tmpP <- svmInference(Xr = Xr, Zr = X[, i],
Y = Y, U = U,
epsilon = epsilon, lambda = lambda,
solver = solver,
intercept = FALSE, nullGamma = 0,
heteroskedastic = heteroskedastic,
hc = h, kappa = kappa)
pVec <- c(pVec, tmpP)
}
}
names(pVec) <- origNames
coefEstimates$pvalues <- pVec
if (confidence) {
confidenceMatLb <- NULL
confidenceMatUb <- NULL
## ## Below is code for an alternative method for
## ## determining the confidence intervals.
## if (optimAlt) {
## args <- list(f = ciIterOptim,
## target = 1 - confidence.level,
## intercept = tmpInt, Y = Y,
## X = X, U = U, epsilon = epsilon,
## lambda = lambda,
## heteroskedastic = heteroskedastic, h = h,
## kappa = kappa)
## }
args <- list(FUN = ciIter,
target = 1 - confidence.level,
iter.max = confidence.iter,
tol = confidence.tol,
tol.same = confidence.same,
intercept = tmpInt, Y = Y,
X = X, U = U, epsilon = epsilon,
lambda = lambda,
heteroskedastic = heteroskedastic,
hc = h,
kappa = kappa)
cat('\n')
for (i in 1:ncol(fullX)) {
cat(paste0('Estimating confidence interval for ',
colnames(fullX)[i], '...\n'))
## ## Below is code for an alternative method for
## ## determining the confidence intervals.
## if (optimAlt) {
## args$index <- i
## ## Lower bound
## t0 <- Sys.time()
## args$interval <-
## c(coefEstimates$coef[i] - 5 * abs(coefEstimates$coef[i]),
## coefEstimates$coef[i])
## lb <- do.call(optimize, args)
## ## Upper bound
## t1 <- Sys.time()
## args$interval <-
## c(coefEstimates$coef[i],
## coefEstimates$coef[i] + 5 * abs(coefEstimates$coef[i]))
## ub <- do.call(optimize, args)
## }
## Find lower bound
args$index <- i
args$init <- coefEstimates$coef[i]
args$left <- TRUE
t0 <- Sys.time()
args$increment <- abs(coefEstimates$coef[i])
lb <- do.call(gridSearch, args)
## Find the upper bound
t1 <- Sys.time()
args$left <- FALSE
ub <- do.call(gridSearch, args)
## Store results
confidenceMatLb <- rbind(confidenceMatLb, lb)
confidenceMatUb <- rbind(confidenceMatUb, ub)
}
cat('\n')
confidenceMatLb <- data.frame(confidenceMatLb)
confidenceMatUb <- data.frame(confidenceMatUb)
confidenceMatLb$status <- factor(confidenceMatLb$status,
levels = c(1, 2, 3),
labels = c('Optimal',
'Iter. limit',
'Prec. limit'))
confidenceMatUb$status <- factor(confidenceMatUb$status,
levels = c(1, 2, 3),
labels = c('Optimal',
'Iter. limit',
'Prec. limit'))
rownames(confidenceMatLb) <- colnames(fullX)
rownames(confidenceMatUb) <- colnames(fullX)
coefEstimates$ci <- list(lower = confidenceMatLb,
upper = confidenceMatUb,
level = confidence.level,
tol = confidence.tol,
heteroskedastic = heteroskedastic)
if ('Iter. limit' %in% confidenceMatLb$status |
'Iter. limit' %in% confidenceMatUb$status) {
warning(gsub('\\s+', ' ',
'Estimation for some confidence intervals was
terminated,
iteration maximum reached.'),
call. = FALSE)
}
if ('Prec. limit' %in% confidenceMatLb$status |
'Prec. limit' %in% confidenceMatUb$status) {
warning(gsub('\\s+', ' ',
'Estimation for some confidence intervals was
terminated,
precision limit reached.'),
call. = FALSE)
}
}
return(coefEstimates)
}
}
#' l1-SVR regression function
#'
#' This function carries out an SVR regression under l1 penalty.
#'
#' @param Y Vector, dependent variable.
#' @param X 2d array, independent variables, excluding column of
#' constants for the intercept.
#' @param epsilon Parameter defining soft-thresholding rule.
#' @param lambda Parameter to scale l1 penalty.
#' @param intercept Boolean, set to \code{TRUE} if an intercept
#' should be included in the regression.
#' @param solver character, name of the linear programming package in
#' R used to obtain the bounds on the treatment effect. The
#' function supports \code{'gurobi'}, \code{'cplexapi'},
#' \code{'lpsolveapi'}. The name of the solver should be provided
#' with quotation marks.
#' @return A list including the coefficient estimates, and
#' solutions to the primal and dual problem that can be used to
#' identify the support vectors and recover the coefficient
#' estimates.
svmRegress <- function(Y, X, epsilon, lambda, intercept = TRUE, solver) {
X <- as.matrix(X)
Y <- as.vector(Y)
## Primal problem:
## Declare all required maticex
cvecPrimal <- c(0,
rep(lambda, ncol(X)),
rep(lambda, ncol(X)),
rep(1, nrow(X)),
rep(1, nrow(X)))
AmatPrimal <- cbind(
c(rep(1, nrow(X)), rep(-1, nrow(X))),
rbind(-X, X),
rbind(X, -X),
rbind(-diag(nrow(X)), matrix(0, nrow = nrow(X), ncol = nrow(X))),
rbind(matrix(0, nrow = nrow(X), ncol = nrow(X)), -diag(nrow(X))))
bvecPrimal <- c(rep(epsilon, nrow(X)) + Y,
rep(epsilon, nrow(X)) - Y)
ubPrimal <- c(Inf, rep(Inf, ncol(X)), rep(Inf, ncol(X)),
rep(Inf, nrow(X)), rep(Inf, nrow(X)))
lbPrimal <- c(-Inf, rep(0, ncol(X)), rep(0, ncol(X)),
rep(0, nrow(X)), rep(0, nrow(X)))
sensePrimal <- rep("<=", 2 * nrow(X))
if (!intercept) {
AmatPrimal <- AmatPrimal[, -1]
cvecPrimal <- cvecPrimal[-1]
ubPrimal <- ubPrimal[-1]
lbPrimal <- lbPrimal[-1]
}
## Pass matrices into Gurobi
modelPrimal = list()
modelPrimal$modelsense <- "min"
modelPrimal$obj <- cvecPrimal
modelPrimal$A <- AmatPrimal
modelPrimal$rhs <- bvecPrimal
modelPrimal$sense <- sensePrimal
modelPrimal$ub <- ubPrimal
modelPrimal$lb <- lbPrimal
if (solver == 'gurobi') {
resultPrimal <- runGurobi(modelPrimal)
} else {
modelPrimal <- lpSetupSolver(modelPrimal, solver)
if (solver == 'lpsolveapi') {
resultPrimal <- runLpSolveAPI(modelPrimal, 'min')
}
if (solver == 'cplexapi') {
resultPrimal <- runCplexAPI(modelPrimal, cplexAPI::CPX_MIN)
}
}
## Prepare primal solutions
solutionPrimal <- resultPrimal$optx
tmpShift <- 0
if (intercept) {
tmpShift <- 1
betaIntercept <- solutionPrimal[1]
}
betaMinus <- solutionPrimal[(1 + tmpShift):(ncol(X) + tmpShift)]
betaPlus <- solutionPrimal[(1 + tmpShift + ncol(X)):(tmpShift + 2 * ncol(X))]
auxiliaryPrimal <- solutionPrimal[(2 * ncol(X) + 1 + tmpShift):length(solutionPrimal)]
primalNegative <- auxiliaryPrimal[1:nrow(X)] ## xi minus
primalPositive <- auxiliaryPrimal[(nrow(X) + 1):length(auxiliaryPrimal)] ## xi plus
if (intercept) {
beta <- c(betaIntercept, betaPlus - betaMinus)
names(beta) <- c('(Intercept)', colnames(X))
} else {
beta <- betaPlus - betaMinus
names(beta) <- colnames(X)
}
## Prepare dual solutions
if (solver != 'lpsolveapi') {
solutionDual <- resultPrimal$opty
dualNegative <- solutionDual[1:nrow(X)] ## alpha minus
dualPositive <- solutionDual[(nrow(X) + 1):(2 * nrow(X))] ## alpha plus
} else {
## lpsolveAPI has rather odd dual solutions, and the
## documentation is not good enough to determine why. To avoid
## inconsistency, manually declare and solve the dual problem.
cvecDual <- bvecPrimal
AmatDual <- t(AmatPrimal)
bvecDual <- cvecPrimal
ubDual <- rep(0, 2 * nrow(X))
lbDual <- rep(-Inf, 2 * nrow(X))
senseDual <- c("=",
rep("<=", ncol(X)),
rep("<=", ncol(X)),
rep("<=", nrow(X)),
rep("<=", nrow(X)))
if (!intercept) {
senseDual <- senseDual[-1]
}
## Pass matrices into Gurobi
modelDual = list()
modelDual$modelsense <- "max"
modelDual$obj <- cvecDual
modelDual$A <- AmatDual
modelDual$rhs <- bvecDual
modelDual$sense <- senseDual
modelDual$ub <- ubDual
modelDual$lb <- lbDual
resultDual <- runLpSolveAPI(modelDual, 'min')
## Prepare solutions
dualPositive <- resultDual$optx[1:nrow(X)] ## alpha plus
dualNegative <- resultDual$optx[(nrow(X) + 1):(2 * nrow(X))] ## alpha minus
}
## Return output
return(list(coefficients = beta,
primalNegative = primalNegative,
primalPositive = primalPositive,
dualNegative = dualNegative,
dualPositive = dualPositive))
}
#' l1-SVR regression rankscore test
#'
#' This function performs the l1-SVR regression rankscore test for a single coefficient, and returns the p-value.
#'
#' @param Xr Matrix of covariates, excluding the covariate of interest
#' for which inference is being conducted.
#' @param Zr Vector of the covariate of interest for which inference
#' is being conducted.
#' @param Y Vector of the outcome.
#' @param U Vector of the residuals.
#' @param epsilon Real scalar, bandwidth for SVR.
#' @param lambda Real scalar, parameter for SVR that scales the
#' penalty.
#' @param solver Character, name of the linear programming package in
#' R used to obtain the bounds on the treatment effect. The
#' function supports \code{'gurobi'}, \code{'cplexapi'},
#' \code{'lpsolveapi'}. The name of the solver should be provided
#' with quotation marks.
#' @param intercept Boolean, set to \code{TRUE} if an intercept should
#' be included in the regression.
#' @param nullGamma Real scalar, the coefficient on \code{Zr} under
#' the null.
#' @param heteroskedastic Boolean, indicate whether data has
#' heteroskedastic errors. If set to \code{TRUE}, then density
#' estimation is performed when conducting inference.
#' @param hc Scalar determing the quantiles of a standard normal
#' distribution used to define the bandwidth for nonparametric
#' density estimation. See
#' \href{https://econpapers.repec.org/paper/attwimass/8818.htm}{Powell
#' (1991)} and
#' \href{https://yuehaob.github.io/assets/svml1.pdf}{Bai
#' et. al. (2021)} for details.
#' @param kappa Scalar that directly scales the bandwdith for
#' nonparametric density estimation. See
#' \href{https://econpapers.repec.org/paper/attwimass/8818.htm}{Powell
#' (1991)} and
#' \href{https://yuehaob.github.io/assets/svml1.pdf}{Bai
#' et. al. (2021)} for details.
#' @return A p-value.
svmInference <- function(Xr, Zr, Y, U, epsilon, lambda = 0,
solver = 'gurobi',
intercept = TRUE, nullGamma = 0,
heteroskedastic = FALSE, hc = 0.5, kappa = 1) {
Yr <- Y - Zr * nullGamma
tau <- 0.5
n <- length(Yr)
uHat <- U + epsilon
if (intercept) Xrc <- cbind(1, Xr)
if (!intercept) Xrc <- Xr
if (!(heteroskedastic & epsilon == 0)) { ## i.e., if not hetero.
## median regression
concResults <- svmRegress(Y = Yr, X = Xr, epsilon = epsilon,
lambda = lambda,
solver = solver,
intercept = intercept)
alphaPlus <- concResults$dualNegative - concResults$dualPositive
}
if (epsilon != 0) {
if (!heteroskedastic) {
rho1 <- mean(Yr <= (Xrc %*% concResults$coefficients +
nullGamma * Zr - epsilon))
rho2 <- mean(Yr >= (Xrc %*% concResults$coefficients +
nullGamma * Zr + epsilon))
rho <- rho1 + rho2
} else {
testStatNumer <- t(Zr) %*% alphaPlus / sqrt(n)
hn <- hc * n ^ (-1 / 3)
cn <- kappa * (qnorm(tau + hn) - qnorm(tau - hn))
fX <- sweep(x = Xrc, MARGIN = 1,
STATS = as.vector(abs(uHat) < cn),
FUN = "*")
fZX <- t(Zr) %*% fX / n
fXX <- t(Xrc) %*% fX / n
A <- fZX %*% solve(fXX)
Ztilde <- Zr - Xrc %*% t(A)
testStatDenom <- sqrt(2 * t(Ztilde * as.vector(uHat < 0)) %*%
Ztilde / n)
testStat <- testStatNumer / testStatDenom
pvalue <- as.numeric(pnorm(-abs(testStat)) * 2)
return(pvalue)
}
}
if (epsilon == 0) {
if (!heteroskedastic) {
rho <- 1
} else {
if (intercept) rqFit <- quantreg::rq(Yr ~ 1 + Xr)
if (!intercept) rqFit <- quantreg::rq(Yr ~ 0 + Xr)
bhat <- rqFit$dual - (1 - tau)
testStatNumer <- t(bhat) %*% Zr / sqrt(n)
hn <- hc * n ^ (-1 / 3)
cn <- kappa * (qnorm(tau + hn) - qnorm(tau - hn))
fX <- sweep(x = Xrc, MARGIN = 1, STATS = as.vector(abs(uHat) < cn),
FUN = "*")
fZX <- t(Zr) %*% fX / n
fXX <- t(Xrc) %*% fX / n
A <- solve(fXX) %*% t(fZX)
testStatDenom <- sqrt(tau * (1 - tau) * t(Zr - Xrc %*% A) %*%
(Zr - Xrc %*% A) / n)
testStat <- testStatNumer / testStatDenom
pvalue <- as.numeric(pnorm(-abs(testStat)) * 2)
return(pvalue)
}
}
M <- diag(rep(1, nrow(Xrc))) - Xrc %*% solve(t(Xrc) %*% Xrc) %*% t(Xrc)
T <- (1 / sqrt(nrow(Xrc))) * t(Zr) %*% alphaPlus /
sqrt((1 / nrow(Xrc)) * t(Zr) %*% M %*% Zr * rho)
pvalue <- as.numeric(pnorm(-abs(T)) * 2)
return(pvalue)
}
#' Format result for display
#'
#' This function simply takes a number and formats it for being
#' displayed. Numbers less than 1 in absolute value are rounded to 6
#' significant figure. Numbers larger than
#'
#' @param x The scalar to be formated
#' @return A scalar.
fmtResult <- function(x) {
if (abs(x) < 1) {
fx <- signif(x, digits = 7)
} else if (abs(x) >= 1 & abs(x) < 1e+7) {
fx <- signif(round(x, digits = 4), digits = 7)
} else {
fx <- formatC(x, format = "e", digits = 7)
}
if (is.numeric(fx)) fx <- as.character(fx)
return(fx)
}
#' Print results
#'
#' This function uses the print method on the l1svr return list.
#'
#' @param x an object returned from '\code{l1svr}'.
#' @param ... additional arguments.
#' @return basic set of results.
#' @export
print.l1svr <- function(x, ...) {
stopifnot(inherits(x, "l1svr"))
if (!is.null(x$coefficients)) {
cat('\nCall:\n')
print(x$call)
cat('\nCoefficient estimates:\n')
print(x$coefficients)
cat('\n')
}
}
#' Summarize results
#'
#' This function uses the summary method on the l1svr return list.
#'
#' @param object an object returned from '\code{l1svr}'.
#' @param ... additional arguments.
#' @return summarized results.
#' @export
summary.l1svr <- function(object, ...) {
stopifnot(inherits(object, "l1svr"))
if (!is.null(object$coefficients) && is.null(object$pvalues)) {
cat('\nCall:\n')
print(object$call)
cat('\nCoefficient estimates:\n')
print(object$coefficients)
## cat('\nObs: ', object$N, '\n')
}
if (!is.null(object$coefficients) && !is.null(object$pvalues)) {
cat('\nCall:\n')
print(object$call)
cat('\nCoefficient estimates:\n')
resultsMat <- rbind(object$coefficients, object$pvalues)
resultsMat <- data.frame(t(resultsMat))
signifVec <- rep('', times = length(object$pvalues))
signifVec[which(object$pvalues < 1e-1)] <- '. '
signifVec[which(object$pvalues < 5e-2)] <- '* '
signifVec[which(object$pvalues < 1e-2)] <- '** '
signifVec[which(object$pvalues < 1e-3)] <- '***'
resultsMat$.tmp.sig.vec <- signifVec
colnames(resultsMat) <- c('Estimate', 'Pr(>|t|)', '')
print(resultsMat)
cat('---\n')
cat("Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\n")
## cat('Obs: ', object$N, '\n')
}
if (!is.null(object$ci)) {
ciTable <- data.frame(cbind(object$ci$lower$bound,
object$ci$upper$bound))
statusLower <- rep(' ', nrow(object$ci$lower))
statusLower[which(object$ci$lower$status == 'Iter. limit')] <- ' *'
statusLower[which(object$ci$lower$status == 'Prec. limit')] <- '**'
statusUpper <- rep(' ', nrow(object$ci$upper))
statusUpper[which(object$ci$upper$status == 'Iter. limit')] <- '* '
statusUpper[which(object$ci$upper$status == 'Prec. limit')] <- '**'
ciTable <- cbind(statusLower, ciTable, statusUpper)
colnames(ciTable) <- c('', 'Lower', 'Upper', '')
rownames(ciTable) <- rownames(object$ci$lower)
cat(paste0(round(object$ci$level * 100, digits = 2),
'% confidence intervals:\n'))
print(ciTable)
cat('---\n')
cat("Conf. interval codes:\n")
cat("'*' Iteration limit reached\n")
cat("'**' Precision limit reached\n")
}
}
#' Function for determining confidence intervals
#'
#' This function performs one iteration of estimating the confidence
#' interval. That is, it performs the l1-SVR regression rankscore test
#' under a single null hypothesis.
#'
#' @param target Target, equal to the size of the test.
#' @param gamma0 Scalar, a conjectured value of the coefficient for
#' the covariate of interest, i.e. the covariate for which the
#' confidence interval is being constructed.
#' @param index Integer, indexes which covariate is of interest.
#' @param intercept Boolean, indicates whether an intercept term is
#' included in the regression.
#' @param Y Vector of dependent variable.
#' @param X Matrix of covariates.
#' @param U Vector of residuals from regressing \code{Y} on \code{X}
#' using l1svr.
#' @inheritParams svmInference
ciIter <- function(gamma0, index, intercept, Y, X, U, epsilon, lambda,
heteroskedastic, hc, kappa) {
args <- list(nullGamma = gamma0, Y = Y, U = U,
epsilon = epsilon, lambda = lambda,
heteroskedastic = heteroskedastic,
hc = hc, kappa = kappa)
n <- length(Y)
if (intercept) {
if (index == 1) {
args$intercept <- FALSE
args$Xr <- X
args$Zr <- rep(1, n)
} else {
Xr <- matrix(X[, -(index - 1)], ncol = ncol(X) - 1)
colnames(Xr) <- colnames(X)[-(index - 1)]
args$intercept <- TRUE
args$Xr <- Xr
args$Zr = X[, (index - 1)]
}
} else {
Xr <- matrix(X[, -index], ncol = ncol(X) - 1)
colnames(Xr) <- colnames(X)[-index]
args$intercept <- FALSE
args$Xr <- Xr
args$Zr <- X[, index]
}
do.call(svmInference, args)
}
#' Function to perform grid search
#'
#' Perform a grid search to find the value at which argument
#' \code{FUN} is equal to argument \code{target}.
#'
#' @param FUN Scalar function of scalar argument.
#' @param init Scalar, initial solution.
#' @param target Scalar, target p-value.
#' @param increment Scalar, determines the first increment of the
#' grid, and is refined over the iterations of the grid search.
#' @param tol Scalar, tolerance for determining when \code{FUN}
#' evaluated at the solution is sufficiently close to
#' \code{target}. Set to \code{2.5e-3} by default.
#' @param tol.same Scalar, tolerance for determining when solution can
#' no longer be improved. Set to \code{1e-06} by default.
#' @param left Boolean, set to \code{TRUE} if solution is left of the
#' initial solution declared in \code{init}.
#' @param iter.max Integer, maximum number of iterations to perform.
#' @return Scalar, the value of the argument at which \code{FUN} is
#' equal to \code{target}.
gridSearch <- function(FUN, init = 0, target, increment = 2,
tol = 2.5e-3, tol.same = 1e-06,
left = FALSE, iter.max = 25,
...) {
exponent <- 0
if (left) {
farDirection <- -1
}
if (!left) {
farDirection <- +1
}
x <- init + farDirection * abs(increment)
value <- FUN(x, ...)
diff <- target - value
if (diff < 0) { ## i.e. haven't gone far enough
tooFar <- FALSE
dirMod <- 1
}
if (diff > 0) { ## i.e. gone too far
tooFar <- TRUE
dirMod <- -1
exponent <- exponent - 1
}
iter.count <- 1
sameX <- 0
sameX.lim <- 3
minDiff <- Inf
while (abs(diff) > tol & iter.count <= iter.max & sameX < sameX.lim) {
newX <- x + dirMod * farDirection * abs(increment) * 2.01 ^ exponent
if (abs(newX - x) < tol.same) sameX <- sameX + 1
x <- newX
if (left) {
if (x > init) x <- init
} else {
if (x < init) x <- init
}
value <- FUN(x, ...)
diff <- target - value
if (diff <= 0) { ## i.e. haven't gone far enough from initial point
if (tooFar) {
dirMod <- 1
exponent <- exponent - 1
}
tooFar <- FALSE
}
if (diff >= 0) { ## i.e. gone too far from initial point
if (!tooFar) {
dirMod <- -1
exponent <- exponent - 1
}
tooFar <- TRUE
}
if (abs(diff) < minDiff) {
minDiff <- abs(diff)
bestX <- x
bestPvalue <- value
}
iter.count <- iter.count + 1
}
iter.count <- iter.count - 1
status <- 1
if (iter.count == iter.max && abs(diff) > tol) {
## warning(gsub('\\s+', ' ',
## 'Calculation of confidence intervals terminated,
## iteration maximum reached.'),
## call. = FALSE)
status <- 2
}
if (sameX == sameX.lim && abs(diff) > tol) {
## warning(gsub('\\s+', ' ',
## 'Calculation of confidence intervals terminated,
## precision limit reached.'),
## call. = FALSE)
status <- 3
}
return(c(bound = unname(bestX),
pvalue = bestPvalue,
iterations = iter.count,
status = status))
}
#' Running Gurobi LP solver
#'
#' This function solves the LP problem using the Gurobi package. The
#' object generated by \code{\link{lpSetup}} is compatible with the
#' \code{gurobi} function. See \code{\link{runCplexAPI}} for
#' additional error code labels.
#' @param lpobj list of matrices and vectors defining the linear
#' programming problem.
#' @param solver.options list, each item of the list should
#' correspond to an option specific to the LP solver selected.
#' @return a list of the output from Gurobi. This includes the
#' objective value, the solution vector, and the optimization
#' status (status of \code{1} indicates successful optimization) .
runGurobi <- function(lpobj, solver.options = list(outputflag = 0)) {
result <- gurobi::gurobi(lpobj, solver.options)
status <- 0
if (result$status == "OPTIMAL") status <- 1
if (result$status == "INFEASIBLE") status <- 2
if (result$status == "INF_OR_UNBD") status <- 3
if (result$status == "UNBOUNDED") status <- 4
if (result$status == "NUMERIC") status <- 5
if (result$status == "SUBOPTIMAL") status <- 6
optx <- result$x
return(list(objval = result$objval,
optx = result$x,
opty = result$pi,
status = status))
}
#' Running cplexAPI LP solver
#'
#' This function solves the LP problem using the cplexAPI package. The
#' object generated by \code{\link{lpSetup}} is not compatible with
#' the \code{cplexAPI} functions. This function adapts the object to
#' solve the LP problem. See \code{\link{runGurobi}} for additional
#' error code labels.
#' @param lpobj list of matrices and vectors defining the linear
#' programming problem.
#' @param lpdir input either CPX_MAX or CPX_MIN, which sets the LP
#' problem as a maximization or minimization problem.
#' @param solver.options list, each item of the list should
#' correspond to an option specific to the LP solver selected.
#' @return a list of the output from CPLEX. This includes the
#' objective value, the solution vector, and the optimization
#' status (status of \code{1} indicates successful optimization).
runCplexAPI <- function(lpobj, lpdir, solver.options = NULL) {
## Declare environment and set options
env <- cplexAPI::openEnvCPLEX()
prob <- cplexAPI::initProbCPLEX(env)
cplexAPI::chgProbNameCPLEX(env, prob, "sample")
if (!is.null(solver.options)) {
for(i in seq(length(solver.options))) {
eval(parse(text = solver.options[[i]]))
}
}
## Declare LP prblem
sense <- lpobj$sense
cnt <- apply(lpobj$A, MARGIN = 2, function(x) length(which(x != 0)))
beg <- rep(0, ncol(lpobj$A))
beg[-1] <- cumsum(cnt[-length(cnt)])
ind <- unlist(apply(lpobj$A, MARGIN = 2, function(x) which(x != 0) - 1))
val <- c(lpobj$A)
val <- val[val != 0]
cplexAPI::copyLpwNamesCPLEX(env = env,
lp = prob,
nCols = ncol(lpobj$A),
nRows = nrow(lpobj$A),
lpdir = lpdir,
objf = lpobj$obj,
rhs = lpobj$rhs,
sense = sense,
matbeg = beg,
matcnt = cnt,
matind = ind,
matval = val,
lb = lpobj$lb,
ub = lpobj$ub)
cplexAPI::lpoptCPLEX(env, prob)
solution <- cplexAPI::solutionCPLEX(env, prob)
cplexAPI::delProbCPLEX(env, prob)
cplexAPI::closeEnvCPLEX(env)
status <- 0
if (typeof(solution) == "S4") {
if (attr(solution, "class") == "cplexError") {
status <- 5
solution <- list()
solution$objval <- NA
solution$x <- NA
}
} else {
if (solution$lpstat == 1) status <- 1
if (solution$lpstat == 2) status <- 4
if (solution$lpstat == 3) status <- 2
if (solution$lpstat == 4) status <- 3
if (solution$lpstat == 5) status <- 7
if (solution$lpstat == 6) status <- 6
}
cplexDual <- solution$pi
save(cplexDual, file = 'cplexDual.Rdata')
return(list(objval = solution$objval,
optx = solution$x,
opty = solution$pi,
status = status))
}
#' Running lpSolveAPI
#'
#' This function solves the LP problem using the \code{lpSolveAPI}
#' package. The object generated by \code{\link{lpSetup}} is not
#' compatible with the \code{lpSolveAPI} functions. This function
#' adapts the object to solve the LP problem. See
#' \code{\link{runGurobi}} and \code{\link{runCplexAPI}} for
#' additional error code labels.
#' @param lpobj list of matrices and vectors defining the linear
#' programming problem.
#' @param modelsense input either 'max' or 'min' which sets the LP
#' problem as a maximization or minimization problem.
#' @param solver.options list, each item of the list should
#' correspond to an option specific to the LP solver selected.
#' @return a list of the output from \code{lpSolveAPI}. This includes
#' the objective value, the solution vector, and the optimization
#' status (status of \code{1} indicates successful optimization).
runLpSolveAPI <- function(lpobj, modelsense, solver.options = NULL) {
lpmodel <- lpSolveAPI::make.lp(nrow(lpobj$A), ncol(lpobj$A))
for (j in 1:ncol(lpobj$A)) {
lpSolveAPI::set.column(lprec = lpmodel,
column = j,
x = lpobj$A[, j])
}
lpSolveAPI::set.constr.value(lprec = lpmodel,
rhs = lpobj$rhs)
sense <- lpobj$sense
sense[sense == "<"] <- "<="
sense[sense == ">"] <- ">="
sense[sense == "=="] <- "="
lpSolveAPI::set.constr.type(lprec = lpmodel,
types = sense)
lpSolveAPI::set.objfn(lprec = lpmodel,
obj = lpobj$obj)
lpSolveAPI::lp.control(lprec = lpmodel,
sense = modelsense)
if (!is.null(solver.options)) {
eval(solver.options)
}
lpSolveAPI::set.bounds(lprec = lpmodel,
lower = lpobj$lb,
upper = lpobj$ub)
solved <- lpSolveAPI::solve.lpExtPtr(lpmodel)
status <- 0
if (solved == 0) status <- 1
if (solved == 1) status <- 6
if (solved == 2) status <- 2
if (solved == 3) status <- 4
if (solved == 5) status <- 5
## Remove extraneous dual output from lpSolveAPI
lpDual <- lpSolveAPI::get.dual.solution(lpmodel)
lpDual <- lpDual[2:(length(lpSolveAPI::get.variables(lpmodel)) + 1)]
save(lpDual, file = 'lpDual.Rdata')
return(list(objval = lpSolveAPI::get.objective(lpmodel),
optx = lpSolveAPI::get.variables(lpmodel),
opty = lpDual,
status = status))
}
#' Configure LP environment to be compatible with solvers
#'
#' This alters the LP object so the model will be compatible with
#' specific solvers.
#' @param env List, the LP object.
#' @param solver Character, the LP solver.
#' @return Nothing, as this modifies an environment variable to save
#' memory.
lpSetupSolver <- function(model, solver) {
if (solver == "cplexapi") {
model$sense[model$sense == "<"] <- "L"
model$sense[model$sense == "<="] <- "L"
model$sense[model$sense == ">"] <- "G"
model$sense[model$sense == ">="] <- "G"
model$sense[model$sense == "="] <- "E"
model$sense[model$sense == "=="] <- "E"
model$ub[model$ub == Inf] <- cplexAPI::CPX_INFBOUND
model$lb[model$lb == -Inf] <- -cplexAPI::CPX_INFBOUND
}
if (solver == "lpsolveapi") {
model$sense[model$sense == "<"] <- "<="
model$sense[model$sense == ">"] <- ">="
model$sense[model$sense == "=="] <- "="
}
return(model)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.