data-raw/sim_performance_real_data.R

# configure R to use R_LIBS_USER folder for packages
path <- Sys.getenv("R_LIBS_USER")

# # use tar on path instead
# Sys.setenv(R_BUILD_TAR = "tar")

if (!dir.exists(path)) {
  dir.create(path, showWarnings = FALSE, recursive = TRUE)
}

packrat::restore()
packrat::on()

library(SLOPE)
library(rdatasets)
library(e1071)
library(SparseM)
library(Matrix)

temp_file <- tempfile(fileext = ".txt")

# e2006 test set
download.file(
  "https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/QJEUKR/PGPWAK",
  temp_file,
  "libcurl"
)

tmp <- e1071::read.matrix.csr(temp_file, fac = TRUE)

e2006 <- list(x = as(tmp$x, "dgCMatrix"), y = tmp$y)

data <- list(
  e2006 = e2006,
  dorothea = rdatasets::dorothea,
  physician = rdatasets::physician,
  zipcode = rdatasets::zipcode
)

out <- data.frame()

iter <- 0

for (i in 1:length(data)) {

  dataset <- names(data)[i]
  x <- data[[i]]$x
  y <- data[[i]]$y

  family <- switch(dataset,
                   e2006 = "gaussian",
                   dorothea = "binomial",
                   physician = "poisson",
                   zipcode = "multinomial")

  n <- nrow(x)
  p <- ncol(x)

  n_lambda <- switch(family, multinomial = p*(length(unique(y)) - 1), p)

  for (screening in c(TRUE, FALSE)) {
    iter <- iter + 1

    cat("iter:", iter, "/", length(data)*2,
        "\tdata:", dataset, family,
        "\tscreening:", screening, "\n")

    time <- system.time({
      fit <- SLOPE(x,
                   y,
                   family = family,
                   lambda = "bh",
                   q = 0.1*min(1, n/p),
                   screen = screening)
    })

    tmp <- data.frame(dataset = dataset,
                      family = family,
                      screening = screening,
                      n = n,
                      p = p,
                      time = time[3])
    out <- rbind(out, tmp)
  }
}

rownames(out) <- NULL

sim_performance_real_data <- out
overwrite <- file.exists("data/sim_performance_real_data.rda")
usethis::use_data(sim_performance_real_data, overwrite = overwrite)
jolars/strong.SLOPE.simulations documentation built on Aug. 10, 2020, 8:45 a.m.