Nothing
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.width=6
)
## -----------------------------------------------------------------------------
# Load the hdme package
library(hdme)
## -----------------------------------------------------------------------------
create_example_data <- function(n, p, s = 5, sdX = 1, sdU = 0.5,
sdEpsilon = 0.1, family = "gaussian") {
# Independent true covariates with mean zero and standard deviation sdX
X <- matrix(rnorm(n * p, sd = sdX), nrow = n, ncol = p)
# if Gaussian, response has standard deviation sdEpsilon, and zero intercept
# if binomial, response is binomial with mean (1 + exp(-X %*% beta))^(-1)
beta <- c(-2, -1, 0.5, 1, 2, rep(0, p - s))
if(family == "gaussian") {
# True coefficient vector has s non-zero elements and p-s zero elements
y <- X %*% beta + rnorm(n, sd = sdEpsilon)
} else if (family == "binomial") {
# Need an amplification in the binomial case
beta <- beta * 3
y <- rbinom(n, size = 1, prob = (1 + exp(-X %*% beta))**(-1))
}
# The measurements W have mean X and standard deviation sdU.
# We assume uncorrelated measurement errors
W <- X + matrix(rnorm(n * p, sd = sdU), nrow = n, ncol = p)
return(list(X = X, W = W, y = y, beta = beta, sigmaUU = diag(p) * sdU))
}
## ---- message=FALSE-----------------------------------------------------------
n <- 100
p <- 500
set.seed(1000)
ll <- create_example_data(n, p)
## ---- message=FALSE-----------------------------------------------------------
library(glmnet)
library(dplyr)
# Lasso with cross-validation on data without measurement error
fit1 <- cv.glmnet(ll$X, ll$y)
# Lasso with cross-validation on data with measurement error
fit2 <- cv.glmnet(ll$W, ll$y)
# Create a data frame with results ([-1] because we drop the intercept)
lassoEstimates <- tibble(
index = rep(1:p, times = 3),
beta = c(ll$beta, as.numeric(coef(fit1)[-1]), coef(fit2)[-1]),
label = c(rep("True values", p), rep("No measurement error", p), rep("Measurement error", p))
)
## -----------------------------------------------------------------------------
library(ggplot2)
theme_set(theme_bw())
theme_update(
panel.grid = element_blank()
)
ggplot(lassoEstimates, aes(x = index, y = beta, color = label)) +
geom_point() +
xlab("p") +
scale_color_brewer(type = "qualitative", palette = "Paired") +
theme(legend.title=element_blank()) +
ggtitle("Measurement error leading to false positives")
## ---- message=FALSE, warning=FALSE--------------------------------------------
library(tidyr)
estimatesOfNonzero <- lassoEstimates %>%
spread(key = label, value = beta) %>%
filter(`True values` != 0) %>%
gather(key = label, value = beta, -index)
ggplot(estimatesOfNonzero, aes(x = index, y = beta, color = label)) +
geom_point() +
xlab("p") +
scale_color_brewer(type = "qualitative", palette = "Paired") +
theme(legend.title=element_blank()) +
ggtitle("Measurement error leading to attenuation")
## -----------------------------------------------------------------------------
# Number of samples
n <- 1000
# Number of covariates
p <- 50
# Create example data
ll <- create_example_data(n, p, family = "binomial")
## -----------------------------------------------------------------------------
args(gds)
## -----------------------------------------------------------------------------
# Fit the Generalized Dantzig Selector
gds_estimate <- gds(ll$X, ll$y, family = "binomial")
## -----------------------------------------------------------------------------
class(gds_estimate)
## -----------------------------------------------------------------------------
str(gds_estimate)
## -----------------------------------------------------------------------------
set.seed(1000)
# Generate example data
ll <- create_example_data(n, p)
# Fit the corrected lasso
corrected_fit <- corrected_lasso(W = ll$W, y = ll$y, sigmaUU = ll$sigmaUU)
## -----------------------------------------------------------------------------
# Class of the object
class(corrected_fit)
# The coef() method prints the number of nonzero estimates as a function of the radius
coef(corrected_fit)
## -----------------------------------------------------------------------------
args(corrected_lasso)
## -----------------------------------------------------------------------------
plot(corrected_fit)
## -----------------------------------------------------------------------------
plot(corrected_fit, type = "path")
## -----------------------------------------------------------------------------
set.seed(323)
n <- 100
p <- 50
ll <- create_example_data(n, p, sdU = 0.2, family = "binomial")
## -----------------------------------------------------------------------------
corrected_fit <- corrected_lasso(ll$W, ll$y, ll$sigmaUU, family = "binomial")
## -----------------------------------------------------------------------------
plot(corrected_fit)
## -----------------------------------------------------------------------------
plot(corrected_fit, type = "path")
## -----------------------------------------------------------------------------
set.seed(1000)
# Generate example data
ll <- create_example_data(n, p)
# Run lasso with cross-validation
cv_corrected_fit <- cv_corrected_lasso(W = ll$W, y = ll$y, sigmaUU = ll$sigmaUU)
## -----------------------------------------------------------------------------
class(cv_corrected_fit)
## -----------------------------------------------------------------------------
str(cv_corrected_fit)
## -----------------------------------------------------------------------------
plot(cv_corrected_fit)
## -----------------------------------------------------------------------------
corrected_fit <- corrected_lasso(ll$W, ll$y, ll$sigmaUU, radii = cv_corrected_fit$radius_1se)
## -----------------------------------------------------------------------------
str(corrected_fit)
## -----------------------------------------------------------------------------
set.seed(1)
# Number of samples
n <- 1000
# Number of covariates
p <- 50
# Generate data
ll <- create_example_data(n, p, sdU = 0.2)
## -----------------------------------------------------------------------------
mus_fit <- mus(ll$W, ll$y)
class(mus_fit)
## -----------------------------------------------------------------------------
coef(mus_fit)
## -----------------------------------------------------------------------------
plot(mus_fit)
## -----------------------------------------------------------------------------
mus_fit <- mus(ll$W, ll$y, delta = 0.1)
## -----------------------------------------------------------------------------
plot(mus_fit)
## -----------------------------------------------------------------------------
set.seed(323)
n <- 100
p <- 50
ll <- create_example_data(n, p, sdU = 0.2, family = "binomial")
gmus_fit <- gmus(ll$W, ll$y, family = "binomial")
## -----------------------------------------------------------------------------
class(gmus_fit)
str(gmus_fit)
## -----------------------------------------------------------------------------
plot(gmus_fit)
## -----------------------------------------------------------------------------
gmus_fit <- gmus(ll$W, ll$y, delta = 0.1, family = "binomial")
## -----------------------------------------------------------------------------
plot(gmus_fit)
## -----------------------------------------------------------------------------
set.seed(323)
n <- 100
p <- 50
ll <- create_example_data(n, p, sdU = 0.2, family = "binomial")
gmu_lasso_fit <- gmu_lasso(ll$W, ll$y, family = "binomial")
## -----------------------------------------------------------------------------
class(gmu_lasso_fit)
str(gmu_lasso_fit)
## -----------------------------------------------------------------------------
plot(gmu_lasso_fit)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.