Nothing
## #' Optimal shrinkage for Empirical Bayes confidence intervals
## #'
## #' Compute linear shrinkage factor \eqn{w_{opt}} to minimize the length
## #' of the resulting Empirical Bayes confidence interval (EBCI).
## #' @param S Signal-to-noise ratio \eqn{\mu_{2}/\sigma^2}{mu_2/sigma^2}, where
## #' \eqn{\mu_{2}}{mu_2} is the second moment of
## #' \eqn{\theta-X'\delta}{theta-X*delta}, and \sigma^2 is the
## #' variance of the preliminary estimator.
## #' @param kappa Kurtosis of \eqn{\theta-X'\delta}{theta-X*delta}.
## #' @param alpha Determines confidence level, \eqn{1-\alpha}{1-alpha}.
## #' @param cv_tbl Optionally, supply a data frame of critical values.
## #' \code{cva(m2, kappa, alpha)}, for different values of \code{m2}, such
## #' as a subset of the data frame \code{\link{cva_tbl}}, that matches the
## #' supplied values of \code{alpha} and \code{kappa}. The data frame needs
## #' to contain two variables, \code{m2}, corresponding to the value of 2nd
## #' moment of the normalized bias, and \code{cv}, with the corresponding
## #' value of \code{cva(m2, kappa, alpha)}. If non \code{NULL}, for the
## #' purposes of optimizing the shrinkage factor, compute the
## #' \code{cva} by interpolating between the critical values in this data
## #' frame, instead of computing them from scratch. This can speed up the
## #' calculations.
## #' @return Returns a list with 3 components: \describe{
## #'
## #' \item{\code{w}}{Optimal shrinkage factor \eqn{w_{opt}}{w_opt}}
## #'
## #' \item{\code{length}}{Normalized half-length of the corresponding conf
## #' interval, so that the interval obtains by taking the estimator based on
## #' shrinkage given by \code{w}, and adding and subtracting \code{length}
## #' times the standard error \eqn{\sigma}{sigma} of preliminary estimator.}
## #'
## #' \item{\code{m2}}{The second moment of the normalized bias,
## #' \eqn{(1/w-1)^{2}S}{(1/w-1)^2*S}}}
## #' @references{
## #'
## #' \cite{Armstrong, Timothy B., Kolesár, Michal, and Plagborg-Møller, Mikkel
## #' (2020): Robust Empirical Bayes Confidence Intervals,
## #' \url{https://arxiv.org/abs/2004.03448}}
## #'
## #' }
## #' @examples
## #' w_opt(4, 3)
## #' ## Use precomputed critical value table
## #' w_opt(4, 3, cv_tbl=cva_tbl[cva_tbl$kappa==3 & cva_tbl$alpha==0.05, ])
w_opt <- function(S, kappa, alpha=0.05, cv_tbl=NULL) {
if (!is.null(cv_tbl)) {
## Linearly interpolate
cv <- function(m2) {
idx <- which.max(cv_tbl$m2>m2)
idx0 <- max(idx-1, 1)
cv_tbl$cv[idx0]+(m2-cv_tbl$m2[idx0])/
(cv_tbl$m2[idx]-cv_tbl$m2[idx0]) *
(cv_tbl$cv[idx]-cv_tbl$cv[idx0])
}
maxbias <- max(cv_tbl$m2)
minbias <- min(cv_tbl$m2)
} else {
cv <- function(m2) cva(m2, kappa=kappa, alpha=alpha, check=FALSE)$cv
## Maxium bias (i.e m2) is 100^2 to prevent numerical issues
maxbias <- 100^2
minbias <- 0
}
ci_length <- function(w) cv((1/w-1)^2*S)*w
if (S > tol) {
r <- stats::optimize(ci_length, c(1/(sqrt(maxbias/S)+1),
1/(sqrt(minbias/S)+1)), tol=tol)
m2 <- (1/r$minimum-1)^2*S
} else {
r <- list(minimum=0, objective=NA)
m2 <- Inf
}
## Recompute critical value, checking solution accuracy. If we're using
## cv_tbl, then optimum will be at one of the values of m2 in the table, so
## solution should always be accurate
len <- cva(m2, kappa=kappa, alpha=alpha, check=TRUE)$cv*r$minimum
if (m2 > maxbias-1e-4)
warning("Corner solution: optimum reached at maximal normalized bias")
list(w=r$minimum, length=len, m2=m2)
}
## #' Empirical Bayes estimator and confidence intervals
## #'
## #' Compute empirical Bayes shrinkage factor \eqn{w_{eb}}{w_eb} and the
## #' normalized half-length of of the associated robust Empirical Bayes
## #' confidence interval (EBCI).
## #' @inheritParams w_opt
## #' @return Returns a list with 3 components:
## #' \describe{
## #'
## #' \item{\code{w}}{Empirical Bayes shrinkage factor \eqn{w_{eb}}{w_eb}}
## #'
## #' \item{\code{length}}{Normalized half-length of the corresponding conf
## #' interval, so that the interval obtains by taking the estimator based on
## #' shrinkage given by \code{w}, and adding and subtracting \code{length}
## #' times the standard error \eqn{\sigma}{sigma} of preliminary estimator.}
## #'
## #' \item{\code{m2}}{Second moment of the normalized bias, \code{1/m2}.}
## #'
## #' }
## #' @references{
## #'
## #' \cite{Armstrong, Timothy B., Kolesár, Michal, and Plagborg-Møller, Mikkel
## #' (2020): Robust Empirical Bayes Confidence Intervals,
## #' \url{https://arxiv.org/abs/2004.03448}}
## #'
## #' }
## #' @examples
## #' w_eb(2, 3)
## #' ## No constraint on kurtosis yields doesn't affect shrinkage, but yields
## #' ## larger half-length
## #' w_eb(2, Inf)
w_eb <- function(S, kappa=Inf, alpha=0.05) {
w <- S/(1+S)
list(w=w, length=cva(1/S, kappa=kappa, alpha=alpha, check=TRUE)$cv*w,
m2=1/S)
}
## (Trimmed) moments estimation
moments <- function(eps, se, wgt, fs_correction="PMT", mu2=NULL, kappa=NULL) {
wgtV <- function(Z, wgt)
sum(wgt^2*(Z^2-stats::weighted.mean(Z, wgt)^2))/(sum(wgt)^2-sum(wgt^2))
tmean <- function(m, V) m + sqrt(V)*stats::dnorm(m/sqrt(V))/
stats::pnorm(m/sqrt(V))
w2 <- eps^2 - se^2
w4 <- eps^4 - 6*se^2*eps^2 + 3*se^4
## \tilde{mu}_2, \tilde{mu}_4)
tmu <- c(stats::weighted.mean(w2, wgt), stats::weighted.mean(w4, wgt))
pmt_trim <- c(2 *mean(wgt^2*se^4) / (sum(wgt)*mean(wgt*se^2)),
32*mean(wgt^2*se^8) / (sum(wgt)*mean(wgt*se^4)))
if (is.null(mu2))
mu2 <- switch(fs_correction,
"none"=max(tmu[1], 0),
"PMT"=max(tmu[1], pmt_trim[1]),
"FPLIB"=tmean(tmu[1], wgtV(w2, wgt)),
stop("Unknown fs_correction method"))
if (is.null(kappa))
kappa <- switch(fs_correction,
"none"=max(tmu[2]/mu2^2, 1),
"PMT"=max(tmu[2]/mu2^2, 1 + pmt_trim[2]/mu2^2),
"FPLIB"= 1+tmean(tmu[2]-tmu[1]^2,
wgtV(w4-2*mu2*w2, wgt))/mu2^2)
c(mu2, kappa, tmu[1], tmu[2]/tmu[1]^2)
}
#' Compute empirical Bayes confidence intervals by shrinking toward regression
#'
#' Computes empirical Bayes estimators based on shrinking towards a regression,
#' and associated robust empirical Bayes confidence intervals (EBCIs), as well
#' as length-optimal robust EBCIs.
#' @param formula object of class \code{"formula"} (or one that can be coerced
#' to that class) of the form \code{Y ~ predictors}, where \code{Y} is a
#' preliminary unbiased estimator, and \code{predictors} are predictors
#' \eqn{X} that guide the direction of shrinkage. For shrinking toward the
#' grand mean, use \code{Y ~ 1}, and for shrinking toward \code{0} use
#' \code{Y ~ 0}
#' @param data optional data frame, list or environment (or object coercible by
#' \code{as.data.frame} to a data frame) containing the preliminary
#' estimator \code{Y} and the predictors. If not found in \code{data}, these
#' variables are taken from \code{environment(formula)}, typically the
#' environment from which the function is called.
#' @param se Standard errors \eqn{\sigma}{sigma} associated with the preliminary
#' estimates \code{Y}
#' @param weights An optional vector of weights to be used in the fitting
#' process in computing \eqn{\delta}{delta}, \eqn{\mu_2}{mu_2} and
#' \eqn{\kappa}{kappa}. Should be \code{NULL} or a numeric vector.
#' @param alpha Determines confidence level, \eqn{1-\alpha}{1-alpha}.
#' @param kappa If non-\code{NULL}, use pre-specified value for the kurtosis
#' \eqn{\kappa}{kappe} of \eqn{\theta-X'\delta}{theta-X*delta} (such as
#' \code{Inf}), instead of computing it.
#' @param fs_correction Finite-sample correction method used to compute
#' \eqn{\mu_2}{mu_2} and \eqn{\kappa}{kappa}. These corrections ensure that
#' we do not shrink the preliminary estimates \code{Y} all the way to zero.
#' If \code{"PMT"}, use posterior mean truncation, if \code{"FPLIB"} use
#' limited information Bayesian approach with a flat prior, and if
#' \code{"none"}, truncate the estimates at \code{0} for \eqn{\mu_2}{mu_2}
#' and \code{1} for \eqn{\kappa}{kappa}.
#' @param wopt If \code{TRUE}, also compute length-optimal robust EBCIs. These
#' are robust EBCIs centered at estimates with the shrinkage factor
#' \eqn{w_i}{w_i} chosen to minimize the length of the resulting EBCI.
#' @return Returns a list with the following components: \describe{
#'
#' \item{\code{mu2}}{Estimated second moment of
#' \eqn{\theta-X'\delta}{theta-X*delta}, \eqn{\mu_2}{mu_2}. Vector
#' of length 2, the first element corresponds to the estimate after the
#' finite-sample correction as specified by \code{fs_correction}, the second
#' element is the uncorrected estimate.}
#'
#' \item{\code{kappa}}{Estimated kurtosis \eqn{\kappa}{kappa} of
#' \eqn{\theta-X'\delta}{theta-X*delta}. Vector of length 2 with the same
#' structure as \code{mu2}.}
#'
#' \item{\code{delta}}{Estimated regression coefficients \eqn{\delta}{delta}}
#'
#' \item{\code{X}}{Matrix of regressors}
#'
#' \item{\code{alpha}}{Determines confidence level \eqn{1-\alpha} used.}
#'
#' \item{\code{df}}{Data frame with components described below.}
#' }
#'
#' \code{df} has the following components:
#'
#' \describe{
#'
#' \item{\code{w_eb}}{EB shrinkage factors,
#' \eqn{\mu_{2}/(\mu_{2}+\sigma^2_i)}{mu_2/(mu_2+sigma^2_i)}}
#'
#' \item{\code{w_opt}}{Length-optimal shrinkage factors}
#'
#' \item{\code{ncov_pa}}{Maximal non-coverage of parametric EBCIs}
#'
#' \item{\code{len_eb}}{Half-length of robust EBCIs based on EB shrinkage, so
#' that the intervals take the form \code{cbind(th_eb-len_eb, th_eb+len_eb)}}
#'
#' \item{\code{len_op}}{Half-length of robust EBCIs based on length-optimal
#' shrinkage, so that the intervals take the form \code{cbind(th_op-len_op,
#' th_op+len_op)}}
#'
#' \item{\code{len_pa}}{Half-length of parametric EBCIs, which take the form
#' \code{cbind(th_eb-len_pa, th_eb+len_a)}}
#'
#' \item{\code{len_us}}{Half-length of unshrunk CIs, which take the form
#' \code{cbind(th_us-len_us, th_us+len_us)}}
#'
#' \item{\code{th_us}}{Unshrunk estimate \eqn{Y}}
#'
#' \item{\code{th_eb}}{EB estimate.}
#'
#' \item{\code{th_op}}{Estimate based on length-optimal shrinkage.}
#'
#' \item{\code{se}}{Standard error \eqn{\sigma}{sigma}, as supplied by the
#' argument \code{se}}
#'
#' \item{\code{weights}}{Weights used}
#'
#' \item{\code{residuals}}{The residuals \eqn{Y_i-X_i\delta}{Y_i-X_i*delta}}
#'
#' }
#' @references{
#'
#' \cite{Armstrong, Timothy B., Kolesár, Michal, and Plagborg-Møller, Mikkel
#' (2020): Robust Empirical Bayes Confidence Intervals,
#' \url{https://arxiv.org/abs/2004.03448}}
#'
#' }
#' @examples
#' ## Same specification as in empirical example in Armstrong, Kolesár
#' ## and Plagborg-Møller (2020), but only use data on NY commuting zones
#' r <- ebci(theta25 ~ stayer25, data=cz[cz$state=="NY", ],
#' se=se25, weights=1/se25^2)
#' @export
ebci <- function(formula, data, se, weights=NULL, alpha=0.1, kappa=NULL,
wopt=FALSE, fs_correction="PMT") {
## Construct model frame
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data", "se", "weights"), names(mf), 0L)
mf <- mf[c(1L, m)]
mf$formula <- stats::as.formula(formula)
mf[[1L]] <- quote(stats::model.frame)
mf <- eval(mf, parent.frame())
Y <- stats::model.response(mf, "numeric")
wgt <- as.vector(stats::model.weights(mf))
## Do not weight
if (is.null(wgt))
wgt <- rep(1L, length(Y))
se <- mf$"(se)"
X <- stats::model.matrix(attr(mf, "terms"), mf)
za <- stats::qnorm(1-alpha/2)
## Compute delta, and moments
r1 <- stats::lm.wfit(X, Y, wgt)
mu1 <- unname(r1$fitted.values)
delta <- r1$coefficients
## Weighted sample variance and mean of truncated normal
mus <- moments(Y-mu1, se, wgt, fs_correction, kappa=kappa)
if (mus[1] == 0) {
warning("mu2 estimate is 0")
df <- data.frame(w_eb=se*0, w_opt=se*0, ncov_pa=se*NA, len_eb=se*NA,
len_op=se*NA, len_pa=se*NA, len_us=za*se, th_us=Y,
th_eb=Y-mu1, th_op=Y-mu1, se=se)
return(list(mu2=c("estimate"=mus[1], "uncorrected_estimate"=mus[3]),
kappa=c("estimate"=mus[2], "uncorrected_estimate"=mus[4]),
delta=delta, df=df, X=X, alpha=alpha))
}
## EB shrinkage
eb <- vapply(se, function(se) unlist(w_eb(mus[1]/se^2, mus[2], alpha)),
numeric(3))
eb <- as.data.frame(t(eb))
th_eb <- mu1+eb$w*(Y-mu1)
## Optimal shrinkage. First pre-compute cva for this kurtosis
if (length(se) > 30 & wopt) {
mmin <- w_opt(mus[1]/min(se)^2, mus[2], alpha=alpha)$m2
mmax <- w_opt(mus[1]/max(se)^2, mus[2], alpha=alpha)$m2
df <- data.frame(m2=c(mmin*0.95, seq(mmin, mmax, length.out=501),
mmax*1.05))
cvj <- function(j)
cva(df$m2[j], kappa=mus[2], alpha, check=TRUE)$cv
df$cv <- vapply(seq_len(nrow(df)), cvj, numeric(1))
} else {
df <- NULL
}
if (wopt) {
op <- vapply(se, function(se)
unlist(w_opt(mus[1]/se^2, mus[2], cv_tbl=df,
alpha=alpha)), numeric(3))
op <- as.data.frame(t(op))
th_op <- mu1+op$w*(Y-mu1)
} else {
op <- eb*NA
th_op <- th_eb*NA
}
## Nonparametric moment estimates for critical values
par_cov <- function(j)
rho(se[j]^2/mus[1], mus[2],
za*sqrt(1+se[j]^2/mus[1]), check=TRUE)$alpha
ncov_pa <- vapply(seq_along(Y), par_cov, numeric(1))
df <- data.frame(w_eb=eb$w, w_opt=op$w, ncov_pa=ncov_pa,
len_eb=eb$length*se, len_op=op$length*se,
len_pa=sqrt(eb$w)*za*se, len_us=za*se, th_us=Y,
th_eb=th_eb, th_op=th_op, se=se,
weights=wgt, residuals=Y-mu1)
list(mu2=c("estimate"=mus[1], "uncorrected_estimate"=mus[3]),
kappa=c("estimate"=mus[2], "uncorrected_estimate"=mus[4]),
delta=delta, df=df, X=X, alpha=alpha)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.