knitr::opts_chunk$set(echo = TRUE)
devtools::install_github("hoangtt1989/BICEP")
library(BICEP) set.seed(123) n <- 100 p <- 5 beta_true <- c(5, 5, 3, 2, 2) X <- matrix(rnorm(n * p), nrow = n, ncol = p) noise <- rnorm(n) y <- X %*% beta_true + noise
What is the log empirical likelihood ratio for testing that the first two coefficients equal their true value using the bi-linear algorithm (BICEP)?
test1 <- TRICEP_glm_beta_fixed(beta_true[c(1, 2)], c(1, 2), X, y) test1$logelr
What about the nested algorithm?
test2 <- TRICEP_glm_beta_fixed(beta_true[c(1, 2)], c(1, 2), X, y, algorithm = 'nested') test2$logelr
Compute a confidence interval for the first coefficient
confint1 <- TRICEP_glm_beta_profiler(1, X, y, conf_level = .05) confint1
library(tidyverse) library(ggpubr) library(janitor) n <- 200 p <- 1 alpha_add <- -.3 test_idx <- 1 nout <- 5 dirty_idx <- 1:nout clean_idx <- setdiff(1:n, dirty_idx) dirty_min <- 4 dirty_max <- 6 set.seed(123) true_means <- 1.3
Generate univariate data with mean equal to r true_means
.
#####generate data with a certain mean data_mat <- do.call(cbind, lapply(true_means, function(x) {rnorm(n, mean = x)})) data_df <- as.data.frame(data_mat) %>% clean_names() %>% mutate(idx = 1:n) ##create the test mean_mat <- matrix(colMeans(data_mat), nrow = n, ncol = p) Z_mle <- data_mat - mean_mat mean_test_mat <- mean_mat mean_test_mat[, test_idx] <- mean_test_mat[, test_idx] + alpha_add Z_clean_test <- data_mat - mean_test_mat ## #####
Take the first five observations and make them outliers.
#####pick points and move them far away from the point cloud dirty_mat <- data_mat dirty_idx <- 1:nout dirty_mat[dirty_idx, ] <- dirty_mat[dirty_idx, ] + matrix(runif(nout * p, min = dirty_min, max = dirty_max), nrow = nout, ncol = p) mean_dirty_mat <- matrix(colMeans(dirty_mat), nrow = n, ncol = p) dirty_df <- as.data.frame(dirty_mat) %>% clean_names () %>% mutate(Index = 1:n) dirty_scatter <- ggplot(dirty_df, aes(x = v1, y = Index)) + geom_point() + geom_vline(xintercept = mean_dirty_mat[1, 1] + alpha_add, linetype = 2, color = 'red') + geom_vline(xintercept = mean_dirty_mat[1, 1], linetype = 2) + geom_rect(aes(xmin = min(dirty_mat[dirty_idx, ]) - .1, xmax = max(dirty_mat[dirty_idx, ]) + .1, ymin = -2, ymax = 10), color = 'gray', fill = NA, alpha = .1) + labs(x = '') + theme_bw() ##create the test Z_dirty_mle <- dirty_mat - mean_mat mean_dirty_test_mat <- mean_dirty_mat mean_dirty_test_mat[, test_idx] <- mean_dirty_test_mat[, test_idx] + alpha_add Z_dirty_test <- dirty_mat - mean_dirty_test_mat ## #####
Perform empirical likelihood and robust empirical likelihood.
owen_dirty_test <- emplik_concord(Z_dirty_test) REL_dirty_test <- REL_fixed(Z_dirty_test, q = nout, wts_beta_rep = 2, RB_tau = 1.5, dual_step = 20) dirty_test_df <- data_frame(EL = as.vector(owen_dirty_test$wts), REL = REL_dirty_test$wts, Index = 1:n) %>% gather(Type, Weights, -Index)
In the top scatter plot, the MLE is the black dashed line and the red dashed line is the candidate value for the mean. The five artificially induced outliers are enclosed in a rectangle to the right.
dirty_wts_plot <- ggplot(dirty_test_df, aes(x = Index, y = Weights, color = Type)) + geom_point() + geom_rect(aes(xmin = -1, xmax = max(dirty_idx) + .8, ymin = min(owen_dirty_test$wts[REL_dirty_test$outlier_idx]) - 1e-4, ymax = max(REL_dirty_test$wts[REL_dirty_test$outlier_idx]) + 1e-4), color = 'gray', fill = NA, alpha = .1) + geom_hline(yintercept = 1/n, linetype = 2, alpha = .5) + theme_bw() ggarrange(dirty_scatter, dirty_wts_plot, nrow = 2, labels = c('A', 'B'))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.