source("data-raw/screening-rules.R")
library(SLOPE)
library(rdatasets)
data <- list(
dorothea = dorothea,
arcene = arcene,
golub = golub,
gisette = gisette
)
out <- data.frame()
for (i in 1:length(data)) {
nm <- names(data)[i]
x <- data[[i]]$x
y <- data[[i]]$y
x_scale <- apply(x, 2, norm, "2")
x <- scale(x[, x_scale != 0], scale = x_scale[x_scale != 0])
n <- nrow(x)
p <- ncol(x)
x_colnorms <- apply(x, 2, norm, "2")
for (family in c("gaussian", "binomial")) {
y <- data[[i]]$y
if (family == "gaussian")
y <- y - mean(y)
else
y <- sign(y - 0.5)
cat(i, nm, family, "\n")
fit <- SLOPE(x,
y,
family = family,
scale = FALSE,
center = FALSE,
lambda = "bh",
q = 0.1*min(1, n/p),
max_passes = 1e5,
diagnostics = TRUE,
screen = TRUE)
n_penalties <- length(fit$sigma)
n_kkt_checks <- lengths(fit$violations)[1:n_penalties]
active_sets <- matrix(FALSE, p, n_penalties)
for (j in 2:n_penalties) {
beta_prev <- coef(fit)[-1, j-1]
intercept_prev <- coef(fit)[1, j-1]
lambda <- fit$lambda*fit$sigma[j]
lambda_prev <- fit$lambda*fit$sigma[j-1]
linear_predictor <- x %*% beta_prev + intercept_prev
if (family == "gaussian") {
pseudo_gradient_prev <- linear_predictor - y
} else {
# binomial
pseudo_gradient_prev <- -y / (1 + exp(y * linear_predictor));
}
gradient_prev <- t(x) %*% pseudo_gradient_prev
active_sets[, j] <- activeSet(x,
y,
lambda*nrow(x),
lambda_prev*nrow(x),
beta_prev,
intercept_prev,
gradient_prev,
pseudo_gradient_prev,
x_colnorms,
method = "strong")
active_sets[, j] <- active_sets[, j] | (beta_prev != 0)
}
beta <- coef(fit)[-1, ]
n_violations <- colSums((!active_sets) & (beta != 0))
n_active <- colSums(active_sets)
n_true_active <- colSums(beta != 0)
n_unique <- apply(beta, 2, function(x) {
length(unique(abs(x[x != 0])))
})
tmp <- data.frame(dataset = nm,
family = family,
n = nrow(x),
p = ncol(x),
penalty = seq_along(fit$sigma),
n_violations = n_violations,
n_active = n_active,
n_true_active = n_true_active,
n_unique = n_unique,
n_kkt_checks = n_kkt_checks)
out <- rbind(out, tmp)
}
}
sim_efficiency_violations_real_data <- out
overwrite <- file.exists("data/sim_efficiency_violations_real_data.rda")
usethis::use_data(sim_efficiency_violations_real_data, overwrite = overwrite)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.