#' Conducts inference using the Cho-Russell procedure
#'
#' @description This function conducts inference using the procedure
#' proposed by Cho and Russell (2019).
#'
#' @inheritParams estbounds
#' @inheritParams dkqs
#' @param ci A boolean variable that indicates whether a \eqn{p}-value or a
#' \eqn{(1-\alpha)}-confidence interval is returned. If \code{ci} is
#' \code{TRUE}, then a confidence interval is returned. Otherwise, a
#' \eqn{p}-value is returned.
#' @param alpha The significance level. This can be a vector.
#' @param tol The tolerance level in the bisection procedure.
#' @param kappa The tuning parameter used in the second step of the two-step
#' procedure for obtaining the bounds subject to the shape constraints.
#' It can be any nonnegative number or a vector of nonnegative numbers.
#' @param remove.const A boolean variable. This determine whether the
#' constraints are to be removed.
#'
#' @return Returns the following list of output:
#' \item{ub}{The upper bound from original data.}
#' \item{lb}{The lower bound from original data.}
#' \item{ub.bs}{The list of upper bounds from bootstrap data.}
#' \item{lb.bs}{The list of lower bounds from bootstrap data.}
#' \item{test.logical}{An indicator variable for whether the computation has
#' been conducted. If \code{test.logical} is 1, it refers to the case
#' where \code{beta.tgt} is inside the logical bound. If
#' \code{test.logical} is 0, it refers to the case where
#' \code{beta.tgt} is outside the logical bound.}
#' \item{logical.lb}{The logical lower bound.}
#' \item{logical.ub}{The logical upper bound.}
#' \item{df.error}{A table showing the id of the bootstrap replication(s)
#' with error(s) and the corresponding error message(s).}
#' \item{R.succ}{The number of successful bootstrap replications.}
#' \item{ci}{A boolean variable that indicates whether a \eqn{p}-value or a
#' \eqn{(1-\alpha)}-confidence interval is returned.}
#' \item{pval}{\eqn{p}-value (if \code{ci} is set as \code{FALSE}).}
#' \item{c.ub}{The upper bound of the \eqn{(1-\alpha)}-confidence interval.}
#' \item{c.lb}{The lower bound of the \eqn{(1-\alpha)}-confidence interval.}
#' \item{alpha}{The significance level.}
#' \item{iter}{The total number of iterations (if \code{ci} is \code{FALSE}.)}
#' \item{unique}{A boolean variable showing whether the solution is unique.}
#'
#' @details
#' \itemize{
#' \item{See the details section of the \code{\link[lpinfer]{estbounds}}
#' function for a list of strings acceptable for the option \code{norm}.}
#' \item{The following components are required in the \code{lpmodel} for the
#' Cho-Russell procedure:
#' \itemize{
#' \item{\code{A.tgt}}
#' \item{\code{A.obs}}
#' \item{\code{A.shp}}
#' \item{\code{beta.obs}}
#' \item{\code{beta.shp}}
#' }
#' \item{The input \code{beta.tgt} is not required when \code{ci = TRUE}.}
#' }
#' }
#'
#' @section Example:
#' \preformatted{
#' source("./example/dgp_missingdata.R") # Change directory if necessary
#' J <- 5
#' N <- 1000
#' data <- missingdata_draw(J = J, n = N, seed = 1, prob.obs = .5)
#' lpm <- missingdata_lpm(J = J, info = "full", data = data)
#' chorussell(data = data,
#' lpmodel = lpm,
#' beta.tgt = .2,
#' R = 100,
#' norm = 2,
#' ci = TRUE,
#' solver = "gurobi")
#' }
#'
#' @section More examples:
#' More examples can be found in the \code{chorussell_example.R} file
#' under the \code{example} subdirectory of the installation directory for
#' the \code{lpinfer} package.
#'
#' @export
#'
chorussell <- function(data = NULL, lpmodel, beta.tgt = NULL, n = NULL, R = 100,
Rmulti = 1.25, kappa = 0, norm = 2, estimate = TRUE,
solver = NULL, ci = NULL, alpha = 0.05, tol = 1e-4,
progress = TRUE, remove.const = TRUE,
previous.output = NA) {
# ---------------- #
# Step 1: Update call, check and update the arguments; initialize df.error
# ---------------- #
# Obtain call information
call <- match.call()
# Check the arguments
chorussell.return <- chorussell.check(data, lpmodel, beta.tgt, R, Rmulti,
kappa, norm, n, estimate, solver,
ci, alpha, tol, progress)
# Update the arguments
ci <- chorussell.return$ci
data <- chorussell.return$data
solver <- chorussell.return$solver
solver.name <- chorussell.return$solver.name
test.logical <- chorussell.return$test.logical
logical.lb <- chorussell.return$logical.lb
logical.ub <- chorussell.return$logical.ub
n <- chorussell.return$n
# Compute the maximum number of iterations
maxR <- ceiling(R * Rmulti)
# Sort kappa
kappa <- sort(kappa)
# Sort alpha
alpha <- sort(alpha)
### Case 1: test.logical == 1. Proceed with the calculation because
### beta.tgt is inside the logical bounds. test.logical also does not matter
### if the user would like to construct a confidence interval.
if ((test.logical == 1) | (isTRUE(ci))) {
# ---------------- #
# Step 2: Obtain estimated bounds
# ---------------- #
# Initialization
ub <- list()
lb <- list()
delta <- list()
# Loop through each elements in the list
for (i in seq_along(kappa)) {
if (is.list(lpmodel$beta.obs)) {
lpm <- lpmodel
lpm$beta.obs <- lpmodel$beta.obs[[1]]
} else {
lpm <- lpmodel
}
estb.out <- estbounds(data, lpm, kappa[i], norm, estimate,
solver.name)
ub[[i]] <- estb.out$ub
lb[[i]] <- estb.out$lb
delta[[i]] <- ub[[i]] - lb[[i]]
}
# ---------------- #
# Step 3: Obtain estimated bounds from bootstrap data
# ---------------- #
cr.bs.return <- chorussell.bs(data, lpmodel, beta.tgt, R, maxR, kappa,
norm, n, estimate, solver.name,
progress)
ub.bs <- cr.bs.return$ub
lb.bs <- cr.bs.return$lb
df.error <- cr.bs.return$df.error
R.succ <- cr.bs.return$R.succ
# ---------------- #
# Step 4: Solve the optimization problem in Cho-Russell
# ---------------- #
# Initialization
lb.can1 <- list()
lb.can2 <- list()
ub.can1 <- list()
ub.can2 <- list()
cr.lp.return <- list()
## Candidates for the upper and lower bounds. If the bound is inifinte, then
## the correspond (1 - alpha)-bound is also inifinte so the procedure of
## computing the candidates will be skipped.
for (i in seq_along(kappa)) {
# Lower bounds
if (lb[[i]] != -Inf) {
lb.can1[[i]] <- sqrt(n) * (lb.bs[[i]] - lb[[i]])
lb.can2[[i]] <- sqrt(n) * (lb.bs[[i]] - lb[[i]] - delta[[i]])
} else {
lb.can1[[i]] <- NULL
lb.can2[[i]] <- lb.can2[[i]]
}
# Upper bounds
if (ub[[i]] != Inf) {
ub.can1[[i]] <- sqrt(n) * (ub.bs[[i]] - ub[[i]])
ub.can2[[i]] <- sqrt(n) * (ub.bs[[i]] - ub[[i]] + delta[[i]])
} else {
ub.can1[[i]] <- NULL
ub.can2[[i]] <- ub.can1[[i]]
}
# Either find the p-value or the confidence interval
if ((lb[[i]] == Inf) & (ub[[i]] == Inf)) {
cr.lp.return[[i]] <- list()
cr.lp.return[[i]]$pval <- 1
cr.lp.return[[i]]$c.ub <- Inf
cr.lp.return[[i]]$c.lb <- -Inf
cr.lp.return[[i]]$unique <- TRUE
} else {
# Simplify the constraints if needed
if (isTRUE(remove.const)) {
simp.return <- chorussell.simp(lb.can1[[i]], lb.can2[[i]],
ub.can1[[i]], ub.can2[[i]],
progress)
df.lb <- simp.return$df.lb
df.ub <- simp.return$df.ub
} else {
df.lb <- NULL
df.ub <- NULL
}
cr.lp.return[[i]] <- chorussell.eval(beta.tgt, lb.can1[[i]],
lb.can2[[i]], ub.can1[[i]],
ub.can2[[i]], n, R, ci, alpha,
tol, ub[[i]], lb[[i]],
logical.ub, logical.lb,
remove.const, kappa[i],
progress, df.lb, df.ub)
}
}
# ---------------- #
# Step 5; Consolidate the list of outputs
# ---------------- #
output <- list(ub = ub,
lb = lb,
ub.bs = ub.bs,
lb.bs = lb.bs,
test.logical = test.logical,
logical.lb = logical.lb,
logical.ub = logical.ub,
df.error = df.error,
ci = ci,
call = call,
norm = norm,
R.succ = R.succ,
kappa = kappa,
solver = solver.name)
# Append the p-value or confidence interval based on ci
if (isFALSE(ci)) {
# p-values
pval.df <- data.frame(matrix(vector(), nrow = length(kappa), ncol = 2))
colnames(pval.df) <- c("kappa", "pvalue")
for (i in seq_along(kappa)) {
pval.df[i, 1] <- kappa[i]
pval.df[i, 2] <- cr.lp.return[[i]]$pval
}
output$pval <- pval.df
} else {
# Confidence intervals
ci.df <- data.frame(matrix(vector(),
nrow = length(kappa) * length(alpha),
ncol = 4))
colnames(ci.df) <- c("alpha", "kappa", "lb", "ub")
k <- 1
for (i in seq_along(kappa)) {
for (j in seq_along(alpha)) {
ci.df[k, 1] <- alpha[j]
ci.df[k, 2] <- kappa[i]
ci.df[k, 3] <- as.numeric(cr.lp.return[[i]][[j]]$bd[1])
ci.df[k, 4] <- as.numeric(cr.lp.return[[i]][[j]]$bd[2])
k <- k + 1
}
}
output$ci.df <- ci.df
output$alpha <- alpha
}
} else {
output <- list(pval = 0,
solver = solver.name,
call = call,
test.logical = test.logical)
}
# Turn returned lists for bounds as vectors if parameter is not multivalued
if (length(kappa) == 1) {
for (x in c("ub", "lb", "ub.bs", "lb.bs")) {
output[[x]] <- unlist(output[[x]])
}
}
# ---------------- #
# Step 6: Issue warning message for estimate = FALSE
# ---------------- #
if (isFALSE(estimate)) {
if (length(nrow(NULL) > 0)) {
warning(sprintf(paste0("Among the %s bootstrap replications, ",
"%s failed when estimate is set as FALSE"),
R.succ + nrow(df.error), nrow(df.error)))
}
}
# Assign the class of output
attr(output, "class") = "chorussell"
return(output)
}
#' Simplifies the candidates to be considered in
#' \code{\link[lpinfer]{chorussell}}
#'
#' @import furrr progressr
#'
#' @description This function simplifies the list of candidates to be
#' considered in the optimization problem in the
#' \code{\link[lpinfer]{chorussell}} function. In particular, because
#' \deqn{\mathbf{1}\left[\sqrt{n}\left(\hat{\theta}_{\rm lb}^b -
#' \hat{\theta}_{\rm lb}\right) \leq c_{\rm lb}\right]
#' \geq
#' \mathbf{1}\left[\sqrt{n}\left(\hat{\theta}^b_{\rm lb} -
#' \hat{\theta}_{\rm lb}\right) \leq c_{\rm lb} \quad \mathrm{ and } \quad
#' -c_{\rm ub} \leq \sqrt{n} \left(\hat{\theta}^b_{\rm ub} -
#' \hat{\theta}_{\rm ub} + \Delta\right)
#' \right],}
#' the values of \eqn{c_{\rm lb}} that satisfy
#' \deqn{\frac{1}{B}\sum^B_{b=1} \mathbf{1}
#' \left[\sqrt{n}\left(\hat{\theta}_{\rm lb}^b -
#' \hat{\theta}_{\rm lb}\right) \leq c_{\rm lb}\right] < 1 - \alpha}
#' will be removed from the set of the values under consideration. Similarly,
#' the list of \eqn{c_{\rm ub}} that satisfy
#' \deqn{\frac{1}{B}\sum^B_{b=1} \mathbf{1}
#' \left[- \sqrt{n}\left(\hat{\theta}_{\rm ub}^b -
#' \hat{\theta}_{\rm ub}\right) \leq c_{\rm lb}\right] < 1 - \alpha}
#' will be removed from the set of the values under consideration.
#'
#' @param lb.can1 The vector of values that corresponds to
#' \eqn{\sqrt{n}\left(\hat{\theta}^b_{\rm lb} - \hat{\theta}_{\rm lb}\right)}.
#' @param lb.can2 The vector of values that corresponds to
#' \eqn{\sqrt{n}\left(\hat{\theta}^b_{\rm lb} - \hat{\theta}_{\rm lb} -
#' \Delta \right)}.
#' @param ub.can1 The vector of values that corresponds to
#' \eqn{\sqrt{n}\left(\hat{\theta}^b_{\rm ub} - \hat{\theta}_{\rm ub}\right)}.
#' @param ub.can2 The vector of values that corresponds to
#' \eqn{\sqrt{n}\left(\hat{\theta}^b_{\rm ub} - \hat{\theta}_{\rm ub} +
#' \Delta \right)}.
#' @inheritParams chorussell
#'
#' @return Returns the list of updated candidates of lower bounds and upper
#' bounds.
#' \item{lb}{The updated list of candidates of lower bounds.}
#' \item{ub}{The updated list of candidates of upper bounds.}
#'
#' @export
#'
chorussell.simp <- function(lb.can1, lb.can2, ub.can1, ub.can2, progress) {
# ---------------- #
# Step 1: Combine the candidates
# ---------------- #
lb.can <- c(lb.can1, lb.can2)
ub.can <- -c(ub.can1, ub.can2)
# ---------------- #
# Step 2: Obtain the updated candidates
# ---------------- #
# Set the default for progress bar
progressr::handlers("progress")
# Lower bound
progressr::with_progress({
if (isTRUE(progress)) {
pbar <- progressr::progressor(along = seq_along(lb.can))
} else {
pbar <- NULL
}
lb.return <- furrr::future_map(lb.can,
.f = chorussell.simp.fn,
can1 = lb.can1,
pbar = pbar,
bd = "lower",
progress = progress)
df.lb <- data.frame(bd = unlist(sapply(lb.return, "[", "bd"),
use.names = FALSE),
sum = unlist(sapply(lb.return, "[", "sum"),
use.names = FALSE))
})
# Upper bound
progressr::with_progress({
if (isTRUE(progress)) {
pbar <- progressr::progressor(along = seq_along(ub.can))
} else {
pbar <- NULL
}
ub.return <- furrr::future_map(ub.can,
.f = chorussell.simp.fn,
can1 = ub.can1,
pbar = pbar,
bd = "upper",
progress = progress)
df.ub <- data.frame(bd = unlist(sapply(ub.return, "[", "bd"),
use.names = FALSE),
sum = unlist(sapply(ub.return, "[", "sum"),
use.names = FALSE))
})
return(list(df.lb = df.lb,
df.ub = df.ub))
}
#' Checks one candidate in \code{\link[lpinfer]{chorussell}}
#'
#' @description This function checks one candidate for the
#' \eqn{(1-\alpha)}-confidence interval to see whether it should be dropped
#' from the list of considerations. For details, please see the description
#' of the \code{\link[lpinfer]{chorussell.simp}} function.
#'
#' @param x One candidate of the upper or lower bound.
#' @param can1 This refers to either \code{lb.can1} or \code{ub.can1}.
#' @param bd A string indicating which set of candidates that the function is
#' trying to simplify.
#' @inheritParams chorussell
#' @inheritParams dkqs.bs.fn
#'
#' @return Returns the decision to drop or not to drop the candidate. If the
#' decision is to drop the candidate bound, \code{NULL} is returned.
#' Otherwise, the bound is returned back via the \code{future_map}
#' function.
#'
#' @export
#'
chorussell.simp.fn <- function(x, can1, pbar, bd, progress) {
# Message for progress bar
if (isTRUE(progress)) {
pbar(sprintf("(Checking the candidates for the %s bound)", bd))
}
if (bd == "lower") {
return(list(bd = x,
sum = mean(can1 <= x)))
} else if (bd == "upper") {
return(list(bd = x,
sum = mean(-x <= can1)))
}
}
#' Bootstrap procedure for the \code{\link[lpinfer]{chorussell}} procedure
#'
#' @description This function carries out the bootstrap procedure of the
#' \code{\link[lpinfer]{chorussell}} procedure. This function supports
#' parallel programming via the \code{future_map} function.
#'
#' @import furrr progressr
#'
#' @inheritParams chorussell
#' @inheritParams dkqs.bs
#'
#' @return Returns a list of output that are obtained from the Cho-Russell
#' procedure:
#' \item{ub.bs}{The list of upper bounds from bootstrap data.}
#' \item{lb.bs}{The list of lower bounds from bootstrap data.}
#' \item{df.error}{A table showing the id of the bootstrap replication(s)
#' with error(s) and the corresponding error message(s).}
#' \item{error.list}{A list of error messages.}
#' \item{R.eval}{The number of bootstrap replications that have been
#' conducted.}
#' \item{R.succ}{The number of successful bootstrap replications.}
#'
#' @export
#'
chorussell.bs <- function(data, lpmodel, beta.tgt, R, maxR, kappa, norm,
n, estimate, solver, progress) {
# ---------------- #
# Step 1: Initialization
# ---------------- #
R.succ <- 0
R.eval <- 0
eval.count <- 0
ub.list <- list()
lb.list <- list()
error.list <- list()
kappa.error <- list()
# Check if there is any list objects in 'lpmodel'
any.list <- lpmodel.anylist(lpmodel)
# If there is some list objects, set maxR as the max length of the list
if (isTRUE(any.list$list)) {
maxR <- length(any.list$consol)
}
# ---------------- #
# Step 2: Bootstrap replications
# ---------------- #
while ((R.succ < R) & (R.eval != maxR)) {
# Evaluate the list of indices to be passed to 'future_map'
bs.ind <- bs.index(R, R.eval, R.succ, maxR)
i0 <- bs.ind$i0
i1 <- bs.ind$i1
# Obtain results from the bootstrap replications
progressr::with_progress({
if (isTRUE(progress)) {
pbar <- progressr::progressor(along = i0:i1)
} else {
pbar <- NULL
}
chorussell.return <- furrr::future_map(i0:i1,
.f = chorussell.bs.fn,
data = data,
lpmodel = lpmodel,
beta.tgt = beta.tgt,
kappa = kappa,
norm = norm,
n = n,
estimate = estimate,
solver = solver,
pbar = pbar,
eval.count = eval.count,
n.bs = i1 - i0 + 1,
progress = progress,
any.list = any.list,
.options =
furrr::furrr_options(seed = TRUE))
eval.count <- eval.count + 1
})
# Update the list and parameters
post.return <- post.bs(chorussell.return, i0, i1, R.eval, T.list = NULL,
beta.list = NULL, error.list, list.param = kappa,
error.param = kappa.error, ub.list = ub.list,
lb.list = lb.list)
ub.list <- post.return$ub.list
lb.list <- post.return$lb.list
kappa.error <- post.return$error.param
error.list <- post.return$error.list
R.succ <- length(lb.list)
R.eval <- post.return$R.eval
}
# ---------------- #
# Step 3: Consolidate the upper and lower bounds
# ---------------- #
ub.bs <- list()
lb.bs <- list()
# Unlist ub.bs and lb.bs
ubb <- unlist(ub.list, use.names = FALSE)
lbb <- unlist(lb.list, use.names = FALSE)
for (i in seq_along(unlist(kappa))) {
# Make sure that the remainder is 0 if i == length(kappa)
if (i == length(kappa)) {
k <- 0
} else {
k <- i
}
ub.bs[[i]] <- unlist(ubb[seq_along(ubb) %% length(kappa) == k],
use.names = FALSE)
lb.bs[[i]] <- unlist(lbb[seq_along(lbb) %% length(kappa) == k],
use.names = FALSE)
}
# ---------------- #
# Step 4: Consolidate the error messages
# ---------------- #
if (R.eval != R.succ) {
# Create data.frame for error messages
df.error <- data.frame(id = NA,
kappa = unlist(kappa.error),
message = unlist(error.list))
# Match the id of the error messages
df.error <- error.id.match(error.list, df.error)
} else {
df.error <- NULL
}
return(list(ub = ub.bs,
lb = lb.bs,
df.error = df.error,
error.list = error.list,
R.succ = R.succ,
R.eval = R.eval))
}
#' Carries out one bootstrap replication for the Cho-Russell procedure
#'
#' @description This function carries out one bootstrap replication of the
#' Cho-Russell procedure. This function is used in the
#' \code{\link[lpinfer]{chorussell.bs}} function via the \code{future_map}
#' function.
#'
#' @inheritParams chorussell
#' @inheritParams chorussell.bs
#' @inheritParams dkqs.bs
#' @inheritParams dkqs.bs.fn
#' @param x This is either the list of indices that represent the bootstrap
#' replications, or the list of bootstrap components of the \code{lpmodel}
#' object passed from the user.
#'
#' @return Returns a list of output that are obtained from the Cho-Russell
#' procedure:
#' \item{ub}{The bootstrap estimate of the upper bound.}
#' \item{ub}{The bootstrap estimate of the lower bound.}
#' \item{kappa.error}{The \code{kappa} parameter that leads to an error (if
#' applicable).}
#' \item{msg}{An error message (if applicable).}
#'
#' @export
#'
chorussell.bs.fn <- function(x, data, lpmodel, beta.tgt, kappa, norm, n,
estimate, solver, pbar, eval.count, n.bs,
any.list, progress) {
# ---------------- #
# Step 1: Print the progress bar
# ---------------- #
if (isTRUE(progress)) {
if (eval.count == 0) {
pbar(sprintf("(Computing %s bootstrap estimates)", n.bs))
} else {
pbar(sprintf("(Computing %s extra bootstrap estimates)", n.bs))
}
}
# ---------------- #
# Step 2: Initialization
# ---------------- #
# Initialization
ub.list <- list()
lb.list <- list()
kappa.error <- NULL
msg <- NULL
# Replace lpmodel by x if x is a list
lpm <- lpmodel
if (!is.null(any.list)) {
for (nm in any.list$name) {
lpm[[nm]] <- lpmodel[[nm]][[x + 1]]
}
}
# ---------------- #
# Step 3: Conduct one bootstrap replication
# ---------------- #
# Draw data
if (!is.null(data)) {
data.bs <- as.data.frame(data[sample(1:nrow(data), replace = TRUE),])
rownames(data.bs) <- 1:nrow(data.bs)
} else {
data.bs <- NULL
}
for (i in seq_along(kappa)) {
# Bootstrap estimator
result <- tryCatch({
estb.return <- estbounds(data.bs, lpm, kappa[i], norm, estimate, solver)
estb.return$status <- "NOERROR"
estb.return
}, warning = function(w) {
return(list(status = "warning",
msg = w))
}, error = function(e) {
return(list(status = "error",
msg = e))
})
if (!(result$status %in% c("warning", "error"))) {
ub.list[[i]] <- result$ub
lb.list[[i]] <- result$lb
kappa.error <- NULL
msg <- NULL
} else {
ub.list <- NULL
lb.list <- NULL
kappa.error <- kappa[i]
msg <- result$msg$message
break()
}
}
return(list(ub = ub.list,
lb = lb.list,
kappa.error = kappa.error,
msg = msg))
}
#' Computes the required object in the \code{\link[lpinfer]{chorussell}}
#' procedure
#'
#' @description This function computes the required object in the
#' \code{\link[lpinfer]{chorussell}} procedure. If \code{ci} is \code{TRUE},
#' this function returns the \eqn{(1-\alpha)}-confidence interval. Otherwise,
#' it returns the \eqn{p}-value.
#'
#' @inheritParams chorussell
#' @inheritParams chorussell.simp
#' @param logical.ub The logical upper bound.
#' @param logical.lb The logical lower bound.
#' @param ub The sample upper bound.
#' @param lb The sample lower bound.
#' @param df.lb The list of lower bounds that are obtained from the
#' simplified program.
#' @param df.ub The list of upper bounds that are obtained from the
#' simplified program.
#'
#' @return Depending on \code{ci}, this function either returns the
#' \eqn{p}-value or the \eqn{(1-\alpha)}-confidence interval.
#'
#' @export
#'
chorussell.eval <- function(beta.tgt, lb.can1, lb.can2, ub.can1, ub.can2, n, R,
ci, alpha, tol, ub, lb, logical.ub, logical.lb,
remove.const, kappa, progress, df.lb = NULL,
df.ub = NULL) {
if (isFALSE(ci)) {
# ---------------- #
# Case 1: ci == FALSE, i.e. computes the p-value via bisection method
# ---------------- #
# Predefine the two end-points
a <- 0
b <- 1
# Initialization
k <- 2
while (abs(b - a) > tol) {
c <- (a + b)/2
c.lp <- chorussell.lp(lb.can1, lb.can2, ub.can1, ub.can2, n, R, c, ub,
lb, logical.ub, logical.lb, remove.const, ci,
kappa, k, progress, df.lb, df.ub)
c.inout <- chorussell.pt(c.lp, beta.tgt)
if (isFALSE(c.inout)) {
b <- c
} else {
a <- c
}
k <- k + 1
}
return(list(pval = c))
} else {
# ---------------- #
# Case 2: ci == TRUE, i.e. computes the (1 - alpha) confidence interval
# ---------------- #
# Computes the confidence interval for each alpha
cr.bd.return <- list()
for (j in seq_along(alpha)) {
cr.bd.temp <- chorussell.lp(lb.can1, lb.can2, ub.can1, ub.can2, n, R,
alpha[j], ub, lb, logical.ub, logical.lb,
remove.const, ci, kappa, 0, progress, df.lb,
df.ub)
cr.bd.return[[j]] <- cr.bd.temp
}
return(cr.bd.return)
}
}
#' Checks if \code{beta.tgt} is inside the \eqn{(1-\alpha)}-confidence interval
#'
#' @description This function checks whether \code{beta.tgt} is inside the
#' \eqn{(1-\alpha)}-confidence interval from the
#' \code{\link[lpinfer]{chorussell}} procedure.
#'
#' @param cr.lp.return List of objects returned from
#' \code{\link[lpinfer]{chorussell}}.
#' @inheritParams chorussell
#'
#' @return Returns the decision.
#' \item{decision}{A boolean variable that equals to \code{TRUE} if
#' \code{beta.tgt} is inside the \eqn{(1-\alpha)}-confidence interval, and
#' equals to \code{FALSE} otherwise.}
#'
#' @export
#'
chorussell.pt <- function(cr.lp.return, beta.tgt) {
# Obtain the lower and upper bounds
lb <- cr.lp.return$bd[1]
ub <- cr.lp.return$bd[2]
# Obtain the decision
if ((beta.tgt <= ub) & (beta.tgt >= lb)) {
decision <- TRUE
} else {
decision <- FALSE
}
return(decision)
}
#' Computes the \eqn{(1-\alpha)}-confidence interval in the
#' \code{\link[lpinfer]{chorussell}} procedure
#'
#' @description This function computes the \eqn{(1-\alpha)}-confidence
#' interval in the \code{\link[lpinfer]{chorussell}} procedure by solving the
#' optimization problem.
#'
#' @import furrr progressr
#'
#' @inheritParams chorussell
#' @inheritParams chorussell.eval
#' @inheritParams chorussell.simp
#' @param k Iteration number.
#'
#' @return Returns the following list of objects:
#' \item{bd}{A vector that represents the \eqn{(1-\alpha)}-confidence
#' interval.}
#' \item{unique}{An indicator variable of whether the solution is unique.}
#'
#' @export
#'
chorussell.lp <- function(lb.can1, lb.can2, ub.can1, ub.can2, n, R, alpha,
ub, lb, logical.ub, logical.lb, remove.const, ci,
kappa, k = 0, progress, df.lb, df.ub) {
# ---------------- #
# Step 1: Select the candidates of the optimization problem
# ---------------- #
if (isTRUE(remove.const)) {
lb.can <- df.lb[df.lb$sum >= 1 - alpha, "bd"]
ub.can <- df.ub[df.ub$sum >= 1 - alpha, "bd"]
} else {
lb.can <- c(lb.can1, lb.can2)
ub.can <- -c(ub.can1, ub.can2)
}
# Include the logical bounds if none of the candidates satisfy the answer
delta <- ub - lb
if (is.null(lb.can)) {
lb.can <- c(sqrt(n) * (logical.lb - lb),
sqrt(n) * (logical.lb - lb - delta))
}
if (is.null(ub.can)) {
ub.can <- -c(sqrt(n) * (logical.ub - ub),
sqrt(n) * (logical.ub - ub + delta))
}
# ---------------- #
# Step 2: Compute the list of bounds that satisfy the constraints
# of the optimization problem
# ---------------- #
if ((lb == -Inf) | (ub == Inf)) {
# This corresponds to the case where one of the bounds is unbounded based
# on sample data
progressr::with_progress({
if (isTRUE(progress)) {
pbar <- progressr::progressor(along = seq_along(lb.can))
} else {
pbar <- NULL
}
cr.lp.return <- furrr::future_map(lb.can,
.f = chorussell.lp.fn.unbd,
lb.can1 = lb.can1,
lb.can2 = lb.can2,
ub.can1 = ub.can1,
ub.can2 = ub.can2,
ub.can = ub.can,
lb = lb,
ub = ub,
alpha = alpha,
pbar = pbar,
ci = ci,
kappa = kappa,
k = k,
progress = progress)
})
} else {
# This corresponds to the case where both of the bounds are bounded based
# on sample data
progressr::with_progress({
if (isTRUE(progress)) {
pbar <- progressr::progressor(along = seq_along(lb.can))
} else {
pbar <- NULL
}
cr.lp.return <- furrr::future_map(lb.can,
.f = chorussell.lp.fn,
lb.can1 = lb.can1,
lb.can2 = lb.can2,
ub.can1 = ub.can1,
ub.can2 = ub.can2,
ub.can = ub.can,
alpha = alpha,
pbar = pbar,
ci = ci,
kappa = kappa,
k = k,
progress = progress)
})
}
# Merge the list of tables from 'cr.lp.return'
cr.lp.bds <- Reduce(rbind, cr.lp.return)
# ---------------- #
# Step 3: Compute the tightest bounds
# ---------------- #
# Find the smallest length
min.len <- min(cr.lp.bds$len)
# Find the corresponding bounds
min.bd <- unique(cr.lp.bds[cr.lp.bds$len == min.len, 1:2])
# Check if its unique
if (nrow(min.bd) == 1) {
unique.bd <- 1
} else {
unique.bd <- 0
}
# Compute the bounds
bd <- c(lb - min.bd[1,1]/sqrt(n), ub + min.bd[1,2]/sqrt(n))
return(list(bd = bd,
unique = unique.bd))
}
#' Computes whether the candidate bounds satisfy the constraints in the
#' \code{\link[lpinfer]{chorussell}} procedure when the sample estimates are
#' non-infinite
#'
#' @description This function carries out one set of computation in the
#' \code{\link[lpinfer]{chorussell}} procedure to check whether the set of
#' values satisfy the constraints in the \code{\link[lpinfer]{chorussell}}
#' procedure. In particular, given one candidate of the lower bound, it
#' checks all possible pairs with the upper bounds and determine whether they
#' satisfy the constraints.
#'
#' @param x A candidate lower bound.
#' @param ub.can All candidates of the upper bounds.
#' @inheritParams chorussell
#' @inheritParams chorussell.simp
#' @inheritParams chorussell.simp.fn
#' @inheritParams dkqs.bs.fn
#' @inheritParams chorussell.lp
#'
#' @return Returns a data frame.
#' \item{df}{A data frame that contains the pairs of feasible lower bounds
#' and upper bounds, as well as their sum.}
#'
#' @export
#'
chorussell.lp.fn <- function(x, lb.can1, lb.can2, ub.can1, ub.can2, ub.can,
alpha, pbar, ci, kappa, k, progress) {
# ---------------- #
# Step 1: Print the progress bar
# ---------------- #
if (isTRUE(progress)) {
if (isFALSE(ci)) {
pbar(sprintf("(Computing p-value for kappa = %s, iteration %s)",
kappa, k))
} else {
pbar(sprintf("(Constructing confidence interval for kappa = %s)", kappa))
}
}
# ---------------- #
# Step 2: Initialize data.frame
# ---------------- #
df <- data.frame(matrix(vector(), nrow = 0, ncol = 3))
colnames(df) <- c("lb", "ub", "len")
# ---------------- #
# Step 3: Check if the candidate bounds satisfy the inequalities
# ---------------- #
for (i in seq_along(ub.can)) {
ind1 <- (mean((lb.can1 <= x) * (-ub.can[i] <= ub.can2)) >= (1 - alpha))
ind2 <- (mean((lb.can2 <= x) * (-ub.can[i] <= ub.can1)) >= (1 - alpha))
if (isTRUE(ind1) & isTRUE(ind2)) {
j <- nrow(df) + 1
df[j, 1] <- x
df[j, 2] <- ub.can[i]
df[j, 3] <- ub.can[i] + x
}
}
return(df)
}
#' Computes whether the candidate bounds satisfy the constraints in the
#' \code{\link[lpinfer]{chorussell}} procedure when one of the sample
#' estimates of the bounds is infinite
#'
#' @description This function finds the candidate bounds that satisfy
#' the constraints in the \code{\link[lpinfer]{chorussell}} procedure when
#' one of the sample estimates of the bounds is infinite. If the upper bound
#' is infinite, then the third column of the data frame will store the
#' negative of the lower bound so the largest candidate for the lower bound
#' will be chosen in order to minimize the interval length. Similarly, if the
#' lower bound is infinite, then the third column of the data frame will
#' store the upper bound.
#'
#' @inheritParams chorussell
#' @inheritParams chorussell.eval
#' @inheritParams chorussell.lp.fn
#' @inheritParams chorussell.simp
#' @inheritParams chorussell.simp.fn
#' @param lb.can All candidates of the lower bounds.
#'
#' @return Returns a data frame.
#' \item{df}{A data frame that contains the pairs of feasible lower bounds
#' and upper bounds, as well as their sum.}
#'
#' @export
#'
chorussell.lp.fn.unbd <- function(x, lb.can1, lb.can2, ub.can1, ub.can2,
lb.can, ub.can, lb, ub, alpha, pbar, ci, k,
progress) {
# ---------------- #
# Step 1: Print the progress bar
# ---------------- #
if (isTRUE(progress)) {
if (isFALSE(ci)) {
pbar(sprintf("(Computing p-value for kappa = %s, iteration %s)",
kappa, k))
} else {
pbar(sprintf("(Constructing confidence interval for kappa = %s)", kappa))
}
}
# ---------------- #
# Step 2: Initialize data.frame
# ---------------- #
df <- data.frame(matrix(vector(), nrow = 0, ncol = 3))
colnames(df) <- c("lb", "ub", "len")
# ---------------- #
# Step 3: Assign the bounds depending on whether lb or ub is bounded
# ---------------- #
if (lb == -Inf) {
can1 <- -ub.can1
can2 <- -ub.can2
x <- -x
} else {
can1 <- lb.can1
can2 <- lb.can2
}
# ---------------- #
# Step 4: Check if the candidate bounds satisfy the inequalities
# ---------------- #
for (i in seq_along(ub.can)) {
ind1 <- (mean((can1 <= x)) >= (1 - alpha))
ind2 <- (mean((can2 <= x)) >= (1 - alpha))
if (isTRUE(ind1) & isTRUE(ind2)) {
j <- nrow(df) + 1
if (lb == -Inf) {
df[j, 1] <- -Inf
df[j, 2] <- x
} else {
df[j, 1] <- x
df[j, 2] <- Inf
}
df[j, 3] <- x
}
}
return(df)
}
#' Checks and updates the input in the \code{\link[lpinfer]{chorussell}}
#' procedure
#'
#' @description This function checks and updates the input from the user in the
#' \code{\link[lpinfer]{chorussell}} function. If there is any invalid input,
#' the function will be terminated and error messages will be printed.
#'
#' @inheritParams chorussell
#' @inheritParams dkqs
#'
#' @return Returns the updated parameters back to the function
#' \code{chorussell}. The following information are updated:
#' \itemize{
#' \item{\code{ci}}
#' \item{\code{data}}
#' \item{\code{solver}}
#' \item{\code{solver.name}}
#' \item{\code{test.logical}}
#' \item{\code{n}}
#' \item{\code{logical.lb}}
#' \item{\code{logical.ub}}
#' }
#'
#' @export
#'
chorussell.check <- function(data, lpmodel, beta.tgt, R, Rmulti, kappa,
norm, n, estimate, solver, ci, alpha, tol,
progress) {
# ---------------- #
# Step 1: Check data
# ---------------- #
# Check data. If data is NULL, check if n is a positive integer
if (!is.null(data)) {
data <- check.dataframe(data)
n <- nrow(data)
} else {
check.samplesize(n, "n")
}
# ---------------- #
# Step 2: Check lpmodel
# ---------------- #
lpmodel <- check.lpmodel(data = data,
lpmodel = lpmodel,
name.var = "lpmodel",
A.tgt.cat = c("matrix", "function_mat", "list"),
A.obs.cat = c("matrix", "function_mat", "list"),
A.shp.cat = c("matrix", "function_mat", "list"),
beta.obs.cat = c("function_mat",
"list",
"function_obs_var"),
beta.shp.cat = c("matrix", "function_mat", "list"),
R = R)
# ---------------- #
# Step 3: Check solver
# ---------------- #
solver.return <- check.solver(solver, "solver", norm)
solver <- solver.return$solver
solver.name <- solver.return$solver.name
# ---------------- #
# Step 4: Assign ci
# ---------------- #
if (is.null(beta.tgt) & is.null(ci)) {
# If beta.tgt and ci are not passed, build a confidence interval
ci <- TRUE
} else if ((!is.null(beta.tgt)) & is.null(ci)) {
# If beta.tgt is given but ci is not passed, compute the p-value
ci <- FALSE
}
# ---------------- #
# Step 5: Check numerics
# ---------------- #
if (isFALSE(ci)) {
check.numeric(beta.tgt, "beta.tgt")
}
check.positiveinteger(R, "R")
check.nonnegative(tol, "tol")
check.norm(norm, "norm")
check.numrange(Rmulti, "Rmulti", "closed", 1, "open", Inf)
# Alpha can be a vector
for (i in seq_along(alpha)) {
check.numrange(alpha[i], "alpha", "closed", 0, "closed", 1)
}
# Kappa can be a vector
for (i in seq_along(kappa)) {
check.nonnegative(kappa[i], "kappa")
}
# ---------------- #
# Step 6: Check whether beta.tgt is within the logical bounds
# ---------------- #
if (isFALSE(ci)) {
test.return <- check.betatgt(data, lpmodel, beta.tgt, solver)
test.logical <- test.return$inout
logical.lb <- test.return$lb
logical.ub <- test.return$ub
} else {
test.logical <- 1
logical.lb <- check.betatgt.lp(data, lpmodel, "min", solver)
logical.ub <- check.betatgt.lp(data, lpmodel, "max", solver)
}
# ---------------- #
# Step 7: Check Boolean
# ---------------- #
check.boolean(estimate, "estimate")
check.boolean(ci, "ci")
check.boolean(progress, "progress")
# ---------------- #
# Step 8: Return updated items
# ---------------- #
return(list(ci = ci,
data = data,
solver = solver,
solver.name = solver.name,
test.logical = test.logical,
n = n,
logical.lb = logical.lb,
logical.ub = logical.ub))
}
#' Print results from \code{\link[lpinfer]{chorussell}}
#'
#' @description This function either prints the \eqn{p}-values or a
#' \eqn{(1-\alpha)} confidence interval from
#' \code{\link[lpinfer]{chorussell}}.
#'
#' @param x The output objects returned from \code{\link[lpinfer]{chorussell}}.
#' @param ... Additional arguments.
#'
#' @return Nothing is returned.
#'
#' @export
#'
print.chorussell <- function(x, ...) {
# Prints confidence interval if ci is TRUE. Otherwise, prints p-value
digits <- 5
if ((x$test.logical == 1) | isTRUE(x$ci)) {
# Case 1: 'beta.tgt' is within the logical bound
# Prints confidence interval if ci is TRUE. Otherwise, prints p-value
digits <- 5
if (isFALSE(x$ci)) {
# Print the p-values
df.pval <- x$pval
# Print the p-values
if (nrow(df.pval) == 1) {
cat(sprintf("p-value: %s\n", round(df.pval[1, 2], digits = digits)))
} else {
cat("p-values: \n")
colnames(df.pval) <- c("kappa", "p-value")
df.pval[, 2] <- round(df.pval[, 2], digits = digits)
print(df.pval, row.names = FALSE)
}
} else {
# Print the confidence intervals
ci.df <- x$ci.df
n.ci.df <- nrow(ci.df)
if (n.ci.df == 1) {
cat(sprintf("%s%% confidence interval: [%s, %s]\n",
round(100 * (1 - x$alpha), digits = digits),
round(x$ci.df[1, 3], digits = digits),
round(x$ci.df[1, 4], digits = digits)))
} else {
cat("Confidence intervals: \n")
ci.df.int <- data.frame(matrix(vector(), nrow = n.ci.df, ncol = 3))
colnames(ci.df.int) <- c("Significance level",
"kappa",
"Confidence intervals")
ci.df.int[, 1] <- ci.df[, 1]
ci.df.int[, 2] <- ci.df[, 2]
for (i in 1:n.ci.df) {
ci.df.int[i, 3] <- sprintf("[%s, %s]",
round(x$ci.df[i, 3], digits = digits),
round(x$ci.df[i, 4], digits = digits))
}
print(ci.df.int, row.names = FALSE)
}
}
} else {
# Case 2: 'beta.tgt' is outside the logical bound
infeasible.pval.msg()
}
}
#' Summary of results from \code{\link[lpinfer]{chorussell}}
#'
#' @description This function prints a summary of the results obtained from
#' \code{\link[lpinfer]{chorussell}}.
#'
#' @inheritParams print.chorussell
#'
#' @return Nothing is returned.
#'
#' @export
#'
summary.chorussell <- function(x, ...) {
if ((x$test.logical == 1) | isTRUE(x$ci)) {
# Case 1: 'beta.tgt' is within the logical bound
# Print the p-value or the corresponding confidence interval
print(x)
# Print kappa if x$pval or x$ci.df is one dimensional
if ((isTRUE(x$ci) & isTRUE(nrow(x$ci.df) == 1)) |
(isFALSE(x$ci) & isTRUE(nrow(x$pval) == 1))) {
cat(sprintf("kappa: %s\n", x$kappa))
}
# Print test statistic, solver used, norm used, and the number of
# successful bootstrap replications
cat(sprintf("Norm: %s\n", x$norm))
cat(sprintf("Solver: %s\n", x$solver))
cat(sprintf("Number of successful bootstrap replications: %s\n", x$R.succ))
# Number of failed bootstrap replications
if (!is.null(x$df.error)) {
nerr <- nrow(x$df.error)
errstring <- "Number of failed bootstrap"
if (nerr == 1) {
cat(sprintf(paste(errstring, "replication: %s\n"), nerr))
} else {
cat(sprintf(paste(errstring, "replications: %s\n"), nerr))
}
}
} else if (x$test.logical == 0) {
# Case 2: 'beta.tgt' is outside the logical bound
infeasible.pval.msg()
cat(sprintf("\nSolver used: %s\n", x$solver))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.