#' Survival function of truncated normal distribution.
#'
#' @keywords internal
#' @details
#' Let \eqn{X} be a normal random variable with mean \code{mu} and standard deviation \code{sig*nu_norm}.
#' This function returns \eqn{P(X \ge v^{T}y | X \in \code{truncation)}}, where \code{truncation} is a subset of the real line.
#' Log-sum-exp operations are used to avoid underflows in the upper tail probability of the truncated normal distribution.
#'
#' Input:
#' @param truncation A data frame of truncation with 3 columns: min_mean, max_mean, and contained.
#' Each row corresponds to a truncation interval with lower and upper limits specified by min_mean and max_mean,
#' respectively. In addition, contained is binary-valued and indicates whether this interval is included
#' in the final truncation set.
#' @param vTy Numeric; the value to evaluate the survival function on
#' @param nu_norm Numeric; part 1 of standard deviation
#' @param sig Numeric; part 2 of standard deviation
#' @param mu Numeric; mean of untruncated distribution, default to 0
#' @examples
#' truncation <- data.frame(matrix(c(-2,1,0,2),byrow = T,ncol=2))
#' colnames(truncation) <- c("min_mean", "max_mean")
#' truncation$contained <- 1
#' TNSurv(truncation,vTy=0, nu_norm=1, sig=1)
#' @export
TNSurv <- function(truncation, vTy, nu_norm, sig, mu = 0){
n_intervals <- dim(truncation)[[1]]
n1 = -Inf;
d1 = -Inf;
for (i in c(1:n_intervals)) {
cur_interval <- truncation[i, ]
if (cur_interval$contained == 1) {
a = pnorm((cur_interval$max_mean - mu) / sqrt(nu_norm * sig), log.p = TRUE);
b = pnorm((cur_interval$min_mean - mu) / sqrt(nu_norm * sig), log.p = TRUE);
arg2 = log_subtract(a, b);
d1 = log_sum_exp(d1, arg2);
# one-sided p-value
# two-sided p-values
if (cur_interval$max_mean >= (vTy)) {
arg2 = log_subtract(pnorm((cur_interval$max_mean - mu) / sqrt(nu_norm * sig), log.p = TRUE ),
pnorm((max(cur_interval$min_mean, vTy) - mu) / sqrt(nu_norm * sig), log.p = TRUE));
n1 = log_sum_exp(n1, arg2);
}
}
}
if(is.nan(exp(n1 - d1))){
p = 0
}else{
p = exp(n1 - d1)
}
return (p);
}
# ----- computing the confidence intervals -----
# return selective ci for v^T mu (i.e. a single parameter)
#' compute selective confidence intervals
#'
#' This function computes an equal-tailed (1-alpha) selective confidence intervals.
#'
#' @keywords internal
#'
#' @param truncation, the truncation set (object: intervals)
#' Computes a confidence interval for the mean of a truncated normal distribution.
#' @param vTv v: the contrast vector that defines the parameter of interest
#' @param vTy y: the observed response vector
#' @param sigma The known noise standard deviation.
#' If unknown, we recommend a conservative estimate. If it
#' is left blank, we use the sample variance as a conservative estimate.
#' @param alpha, the significance level. Default to 0.05
#' @param steps_lim, the maximum steps bisection method will take to initialize the LCB and UCB, default
#' to 50.
#' @return This function returns a vector of lower and upper confidence limits.
#'
#' @export
compute_CI <- function(vTy, vTv, sigma, truncation, alpha=0.05, steps_lim=50) {
### Conservative guess
scale <- sigma * sqrt(vTv)
q <- sum(vTy) / scale
# transform for calculating p value with`calc_p_value_safer`
truncation <- data.frame(truncation)
colnames(truncation) <- c("min_mean","max_mean")
truncation$contained <- 1
fun <- function(x) {
# survival function
return(TNSurv(truncation, vTy, vTv, sigma, mu = x))
}
# L: fun.L(L) = 0
fun.L <- function(x) {
return(fun(x) - (alpha/2))
}
# U: fun.U(U) = 0
fun.U <- function(x) {
return(fun(x) - (1-alpha/2))
}
# find the starting point (x1, x2) such that
# fun.L(x1), fun.U(x1) <= 0 AND fun.L(x2), fun.U(x2) >= 0.
# i.e. fun(x1) <= alpha/2 AND fun(x2) >= 1-alpha/2.
# find x1 s.t. fun(x1) <= alpha/2
# what we know:
# fun is monotonically increasing;
# fun(x) = NaN if x too small;
# fun(x) > alpha/2 if x too big.
# so we can do a modified bisection search to find x1.
step <- 0
x1.up <- q * scale + scale
x1 <- q * scale - 3 * scale
f1 <- fun(x1)
while(step <= steps_lim) {
if (is.na(f1)) { # x1 is too small
x1 <- (x1 + x1.up) / 2
f1 <- fun(x1)
}
else if (f1 > alpha/2) { # x1 is too big
x1.up <- x1
x1 <- x1 - 3 * 1.4^step
f1 <- fun(x1)
step <- step + 1
}
else { # fun(x1) <= alpha/2, excited!
break
}
}
# find x2 s.t. fun(x2) <= 1 - alpha/2
# what we know:
# fun is monotonically increasing;
# fun(x) = NaN if x too big;
# fun(x) < 1 - alpha/2 if x too small.
# again can do a modified bisection search to find x2.
step <- 0
x2 = q * scale + 3 * scale
x2.lo = q * scale - scale
f2 = fun(x2)
while(step <= steps_lim) {
if (is.na(f2)) { # x2 is too big
x2 <- (x2 + x2.lo) / 2
f2 <- fun(x2)
}
else if (f2 < 1 - alpha/2) { # x2 is too small
x2.lo <- x2
x2 <- x2 + 3 * 1.4^step
f2 <- fun(x2)
step <- step + 1
}
else { # fun(x2) >= 1 - alpha/2, excited!
break
}
}
# if the above search does not work, set up a grid search
# for starting points
if (is.na(f1)||(f1 > alpha/2)||is.na(f2)||(f2 < 1-alpha/2)) {
grid <- seq(from = q * scale - 1000*scale, to = q*scale + 1000*scale)
value <- sapply(grid, fun)
# want max x1: fun(x1) <= alpha/2
ind1 <- rev(which(value <= alpha/2))[1]
x1 <- grid[ind1]
# want min x2: fun(x2) >= 1-alpha/2
ind2 <- which(value >= 1 - alpha/2)[1]
x2 <- grid[ind2]
}
# if the above fails, then either x1, x2 = NA, so uniroot() will throw error,
# in which case we set (-Inf, Inf) as the CI
# we know the functions are increasing
L <- tryCatch({
stats::uniroot(fun.L, c(x1, x2), extendInt = "upX", tol = 1e-4)$root
}, error = function(e) {
-Inf
})
U <- tryCatch({
stats::uniroot(fun.U, c(x1, x2), extendInt = "upX", tol = 1e-4)$root
}, error = function(e) {
Inf
})
return(c(L, U))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.