# Main source file for inference on regions
#source('samplerWrappers.R')
regionConfInt = function(dat, groups, threshold, Sigma = NULL,
CIparams, conditioning, external_mu_val=NULL,
tilt = FALSE,
regularize_sig = 0,
sampler = "restrictedMVN",...){
# We assume the parameter exceeds the threshold,
# and find a confidence interval for the sum
datSt = getDiffMoments(as.matrix(dat), groups , regularize_sig)
if (! is.null(Sigma)) {
datSt$cov_diff = Sigma
CIparams$sig_known = TRUE
} else{
CIparams$sig_known = FALSE
}
CI_res = formTwosidedCI(diffVec = datSt$delta_hat,thresh = threshold, conditioning = conditioning,
diffSigma = datSt$cov_diff,external_mu_val = external_mu_val,
tilt = tilt,params =CIparams, sampler = sampler,...)
return(CI_res)
}
formTwosidedCI = function(diffVec,thresh,diffSigma, params,
sampler = "hamiltonian",conditioning = rep(1,length(diffVec)),
inner_shape = NULL,
external_mu_val=0,
start_search = "obs",
tilt = FALSE,
median_sweep = FALSE,
doPlot = FALSE){
#
# thresh is the thresholding value (always positive).
# A conditioning vector of {-1,0,1}^d is given in the input.
# For a positive region:
# condition[i] = 1 means diffVec[i]>thresh
# condition[i] = -1 means diffVec[i]<thresh
# For a negative region:
# condition[i] = 1 means diffVec[i] < -thresh
# condition[i] = -1 means diffVec[i] > -thresh
# eta is taken to be 1 if condition[i]=1 and 0 otherwise
# An unchecked assumption is that the region 'conditioning==1' is a single connected region.
thin_c = 6 # thinning per variable
###
# 1. Preparation
###
alpha = params$alpha
d = length(diffVec)
eta = 1*(conditioning==1)
region = which(conditioning==1)
exterior = which(conditioning==-1)
diffSigmaInv = solve(diffSigma)
stopifnot(sum(eta)>0) # at least one positive point in the region
# Choose sampler function
samp_func = setSampler(sampler)
# The inference function runs for positive regions, so we first set direction
# Check direction
direction = 1
if (diffVec[region[1]] < 0) {
diffVec = -diffVec
direction = -1
}
stopifnot(all ( diffVec[region]>= thresh) & all(diffVec[exterior] < thresh))
# Set lower and upper thresholds for conditioning
lower = rep(-Inf, d)
lower[region] = thresh
upper = rep(Inf, d)
upper[exterior] = thresh
# Find the observed statistic (area-of-region)
stat_obs = sum(eta * diffVec)
obs_quant = numeric(length(params$m_values))
# make sure the profile has a sum of 1.
if (is.null(inner_shape)) {
inner_shape = getMuSigmaEta(1, diffSigma[region,region], eta[region])
}
inner_shape = inner_shape/mean(inner_shape)
if (is.null(external_mu_val)){
external_mu_val = diffVec[exterior]
}
# Choose starting value:
if (is.numeric(start_search)){
m0 = start_search
} else if (start_search == 'thresh'){
m0 = (thresh + stat_obs/sum(eta))/2
} else if (start_search == 'obs') {
m0 = stat_obs/sum(eta)
}
start.value = diffVec
if (tilt) {
# We want to search for a good starting value of m0, so stat_obs is the sum of the region
m0stop = FALSE
m0counter = 0
full_mus = numeric(length(eta))
while(m0stop){
mean_vec = setMeanVec(m0, inner_shape, external_mu_val, conditioning)
search_samp = samp_func(50, mu = mean_vec, sigma = diffSigma, lower = lower, upper = upper,
burn.in = d*100*thin_c,thinning = d*thin_c,algorithm = "gibbs",
start.value = start.value)
# Check that the observed statistic is not in the tails of the distribution for this m0
search_res = searchStep( search_samp, eta, stat_obs, quants = c(0.2,0.8), m0, params$searchstep)
# Check if failed to get a non NA sample
if(is.na(search_res$success)){ return(list()) }
# process search results; stop if the following conditions occur:
# (m0 < min(params$m_values)) | (m0 > max(params$m_values) | (m0counter > 20) )
m0counter = m0counter + 1
if ((m0 < min(params$m_values)) | (m0 > max(params$m_values) | (m0counter > 20) )){
m0stop = TRUE
force_break = TRUE
} else{
m0stop = search_res$success
m0 = search_res$new_m0
}
}
mean_vec = setMeanVec(m0, inner_shape, external_mu_val, conditioning)
samp = samp_func(params$nsamp, mu = mean_vec, sigma = diffSigma, lower = lower, upper = upper,
burn.in = d*100*thin_c, thinning = d*thin_c , algorithm = "gibbs",
start.value = start.value)
# Check if failed to get a non NA sample
if(any(is.na(samp))){ return(list()) }
ts = samp %*% eta
o_samp = samp[order(ts),]
o_ts = o_samp %*% eta
mu_shape = numeric(length(eta))
mu_shape[region] = inner_shape
weights = o_samp %*% t(mu_shape %*% diffSigmaInv)
# Note that when weights are too small for tilting, sometimes we might get Nan's
for (i in 1:length(params$m_values)){
curr_m = params$m_values[i]
min_wt = min((curr_m-m0)*weights)
rescaled_weights = exp((curr_m-m0)*weights-min_wt)
norm_weights = rescaled_weights/sum(rescaled_weights)
obs_quant[i] = sum(norm_weights[o_ts<stat_obs])
}
} else {
full_mus = numeric(length(eta))
for (i in 1:length(params$m_values)){
mean_vec = setMeanVec(params$m_values[i], inner_shape, external_mu_val, conditioning)
samp = samp_func(params$nsamp, mu = mean_vec, sigma = diffSigma, lower = lower, upper = upper,
burn.in = d*100*thin_c,thinning = d*thin_c,algorithm = "gibbs",
start.value = diffVec)
if (any(is.na(samp))){
samp = samp_func(params$nsamp, mu = mean_vec, sigma = diffSigma, lower = lower, upper = upper,
burn.in = d*100*thin_c,thinning = d*thin_c,algorithm = "gibbs",
start.value = (diffVec + thresh)/2)
}
ts = samp %*% eta
# find quantile of stat_obs in o_ts:
obs_quant[i] = (sum(stat_obs >= ts)+1)/(length(ts)+2)
}
}
# sanity check
if(!any(obs_quant<0.98 & obs_quant>0.02,na.rm = TRUE)){
return(list())
# Failed run
}
index_0 = which(params$m_values==0)
p_value = 1 - obs_quant[index_0]
lower_bound = findMValue(params$m_values,obs_quant,1-alpha/2)
upper_bound = findMValue(params$m_values,obs_quant,alpha/2)
reject_reg = (params$m_values < lower_bound) | (params$m_values > upper_bound)
mean_est= findMValue(params$m_values,obs_quant,0.5)
full_baseline = setMeanVec(1, inner_shape, external_mu_val, conditioning)
return( list(vals = params$m_values, rej = reject_reg, quants = obs_quant,
baseline = full_baseline, params = params,
direction = direction,p_value = p_value, stat_obs = stat_obs,
lower_bound = lower_bound, upper_bound = upper_bound, cond_mean_est = mean_est))
}
getDiffMoments = function(datmat, groups, regularize_sig=0) {
# Extracts the for a difference-of-means model information from the samples,
# including :
# - Vector of estimators Z
# - Estimated covariance per group
# - Estimated covariance of difference
reg_cov_weight = regularize_sig
datSt = list()
if(nrow(datmat)==1){
datmat = t(datmat)
}
stopifnot(nrow(datmat)==length(groups))
datSt$n_a = sum(groups==1)
datSt$n_b = sum(groups==0)
if (ncol(datmat)>1) {
dat_a = datmat[groups==1,]
dat_b = datmat[groups==0,]
datSt$cov_a = cov(dat_a)
datSt$cov_b = cov(dat_b)
# to regularize sigma, we inflate by (1+reg_cov_weight)
reg_cov_a = datSt$cov_a
diag(reg_cov_a) = diag(reg_cov_a)*(1+reg_cov_weight)
datSt$cov_a = reg_cov_a
reg_cov_b = datSt$cov_b
diag(reg_cov_b) = diag(reg_cov_b)*(1+reg_cov_weight)
datSt$cov_b = reg_cov_b
}
else{
dat_a = datmat[groups==1]
dat_b = datmat[groups==0]
datSt$cov_a = var(dat_a)*(1+reg_cov_weight)
datSt$cov_b = var(dat_b)*(1+reg_cov_weight)
datSt$delta_hat = mean(dat_a) - mean(dat_b)
}
datSt$cov_diff = datSt$cov_a/datSt$n_a + datSt$cov_b/datSt$n_b
datSt$delta_hat = colMeans(dat_a) - colMeans(dat_b)
return(datSt)
}
setMeanVec = function(m, inner_shape, ext_shape, conditioning){
# Takes the profile vector inner_shape and scales by m
# Then adds the ext_shape to the external region
mean_vec = numeric(length(conditioning))
region = which(conditioning==1)
mean_vec[region] = m *inner_shape
exterior = which(conditioning==-1)
mean_vec[exterior] = ext_shape
return(mean_vec)
}
searchStep = function ( samp, eta, stat_obs, quants = c(0.2,0.8), m0, step_size) {
# Takes a sample and decides whether m0 is in the qunatile range defined
# by quants, or needs to be changed (by step_size)
stopifnot(length(quants)==2)
# check for NAs
search_res = list()
if(any(is.na(samp))){
# Failed to get a non NA sample
search_res$success = NA
search_res$new_m0 = m0
return(search_res)
}
search_ts = samp %*% eta
search_qs = quantile(ecdf(search_ts),c(quants))
if ((stat_obs < search_qs[2] ) & (stat_obs > search_qs[1] )) {
search_res$success = TRUE
search_res$new_m0 = m0
}
else {
search_res$success = FALSE
if (stat_obs > search_qs[2]) {
search_res$new_m0 = m0 +step_size
} else {
search_res$new_m0 = m0 - step_size
}
}
return(search_res)
}
getMuSigmaEta = function(m, Sigma,eta) {
# Get the mean vector from the Sigma and a parameter m0.
# The goal is to get a positive mean vector mu = m*shape
# so that m is the magnitude:
# m * sum(support(eta)) = eta'mu -> m = eta'mu/support(eta)
# and its shape is proportion to eta'Sigma
# shape propto eta'Sigma
if (is.matrix(Sigma)){
shape = as.vector(eta %*% Sigma)
normalizing = sum(eta * shape)/sum(eta)
shape = shape/normalizing
} else { # in case Sigma is a real-value
shape = 1
}
return(m * shape)
}
findMValue = function(m_values,obs_quant,p){
# A bit counter intuitive, but as m increases the reference distribution shifts right.
# Therefore, the lower bound for m is should be the first value where obs_quant is smaller than p,
# and the upper bound for m is the first value where obs_quant is larger than p.
# If we use no tilt, we might not have monotone values in the quantile estimates.
# Instead of smoothing, we give a score to how many values agree with this selection.
n = length(obs_quant)
score = numeric(n+1)
score[1] = sum(obs_quant< p, na.rm=TRUE)
for (i in 2:length(score)){
score[i] = sum(obs_quant[1:(i-1)] >= p,na.rm=TRUE) + sum(obs_quant[(i):n]< p,
na.rm=TRUE)
}
score[n+1] = sum(obs_quant > p,na.rm=TRUE)
best_match = which.max(score)
quantile = c(-Inf,m_values)[best_match]
return(quantile)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.