inst/doc/oem_vignette.R

## ---- echo=FALSE, message = FALSE, cache=FALSE--------------------------------
# install.packages("oem", repos = "http://cran.us.r-project.org")

## ---- message = FALSE, cache=FALSE, eval = FALSE------------------------------
#  install.packages("devtools", repos = "http://cran.us.r-project.org")

## ---- message = FALSE, cache=FALSE, eval=FALSE--------------------------------
#  library(devtools)
#  install_github("jaredhuling/oem")

## ---- message = FALSE, cache=FALSE--------------------------------------------
library(oem)

## ---- message = FALSE, cache=FALSE--------------------------------------------
nobs  <- 1e4
nvars <- 100
x <- matrix(rnorm(nobs * nvars), ncol = nvars)
y <- drop(x %*% c(0.5, 0.5, -0.5, -0.5, 1, rep(0, nvars - 5))) + rnorm(nobs, sd = 4)

## ---- message = FALSE, cache=FALSE--------------------------------------------
fit1 <- oem(x = x, y = y, penalty = "lasso")

## ---- fig.show='hold', fig.width = 7.15, fig.height = 5-----------------------
plot(fit1)

## ---- message = FALSE, cache=FALSE--------------------------------------------
fit2 <- oem(x = x, y = y, penalty = c("lasso", "mcp", "grp.lasso", "grp.mcp"),
            groups = rep(1:20, each = 5))

## ---- fig.show='hold', fig.width = 7.15, fig.height = 10----------------------
layout(matrix(1:4, ncol = 2))
plot(fit2, which.model = 1, xvar = "lambda")
plot(fit2, which.model = 2, xvar = "lambda")
plot(fit2, which.model = 3, xvar = "lambda")
plot(fit2, which.model = "grp.mcp", xvar = "lambda")

## ---- message = FALSE, cache=FALSE--------------------------------------------
nobs  <- 1e5
nvars <- 100
x2 <- matrix(rnorm(nobs * nvars), ncol = nvars)
y2 <- drop(x2 %*% c(0.5, 0.5, -0.5, -0.5, 1, rep(0, nvars - 5))) + rnorm(nobs, sd = 4)

system.time(fit2a <- oem(x = x2, y = y2, penalty = c("grp.lasso"),
                         groups = rep(1:20, each = 5), nlambda = 100L))
system.time(fit2b <- oem(x = x2, y = y2, 
                         penalty = c("grp.lasso", "lasso", "mcp", 
                                     "scad", "elastic.net", "grp.mcp",
                                     "grp.scad", "sparse.grp.lasso"),
                         groups = rep(1:20, each = 5), nlambda = 100L))
system.time(fit2c <- oem(x = x2, y = y2, 
                         penalty = c("grp.lasso", "lasso", "mcp", 
                                     "scad", "elastic.net", "grp.mcp",
                                     "grp.scad", "sparse.grp.lasso"),
                         groups = rep(1:20, each = 5), nlambda = 500L))

## ---- message = FALSE, cache=FALSE--------------------------------------------
nobs  <- 5e4
nvars <- 100
x2 <- matrix(rnorm(nobs * nvars), ncol = nvars)

y2 <- rbinom(nobs, 1, prob = 1 / (1 + exp(-drop(x2 %*% c(0.15, 0.15, -0.15, -0.15, 0.25, rep(0, nvars - 5))))))


system.time(fit2a <- oem(x = x2, y = y2, penalty = c("grp.lasso"),
                         family = "binomial",
                         groups = rep(1:20, each = 5), nlambda = 100L))
system.time(fit2b <- oem(x = x2, y = y2, penalty = c("grp.lasso", "lasso", "mcp", "scad", "elastic.net"),
                         family = "binomial",
                         groups = rep(1:20, each = 5), nlambda = 100L))


## ---- message = FALSE, cache=FALSE--------------------------------------------
system.time(cvfit1 <- cv.oem(x = x, y = y, 
                             penalty = c("lasso", "mcp", 
                                         "grp.lasso", "grp.mcp"), 
                             gamma = 2,
                             groups = rep(1:20, each = 5), 
                             nfolds = 10))

## ---- fig.show='hold', fig.width = 7.15, fig.height = 7.5---------------------
layout(matrix(1:4, ncol = 2))
plot(cvfit1, which.model = 1)
plot(cvfit1, which.model = 2)
plot(cvfit1, which.model = 3)
plot(cvfit1, which.model = 4)

## ---- message = FALSE, cache=FALSE--------------------------------------------

nobsc  <- 1e5
nvarsc <- 100
xc <- matrix(rnorm(nobsc * nvarsc), ncol = nvarsc)
yc <- drop(xc %*% c(0.5, 0.5, -0.5, -0.5, 1, rep(0, nvarsc - 5))) + rnorm(nobsc, sd = 4)

system.time(cvalfit1 <- cv.oem(x = xc, y = yc, penalty = "lasso", 
                               groups = rep(1:20, each = 5), 
                               nfolds = 10))

system.time(xvalfit1 <- xval.oem(x = xc, y = yc, penalty = "lasso",
                                 groups = rep(1:20, each = 5), 
                                 nfolds = 10))

system.time(xvalfit2 <- xval.oem(x = xc, y = yc, penalty = "lasso",
                                 groups = rep(1:20, each = 5), 
                                 nfolds = 10, ncores = 2))

system.time(ofit1 <- oem(x = xc, y = yc, penalty = "lasso",
                         groups = rep(1:20, each = 5)))

## ---- message = FALSE, cache=FALSE--------------------------------------------
nobs  <- 2e3
nvars <- 20
x <- matrix(runif(nobs * nvars, max = 2), ncol = nvars)

y <- rbinom(nobs, 1, prob = 1 / (1 + exp(-drop(x %*% c(0.25, -1, -1, -0.5, -0.5, -0.25, rep(0, nvars - 6))))))

## ---- message = FALSE, cache=FALSE--------------------------------------------
cvfit2 <- cv.oem(x = x, y = y, penalty = c("lasso", "mcp", 
                                           "grp.lasso", "grp.mcp"), 
                 family = "binomial",
                 type.measure = "class",
                 gamma = 2,
                 groups = rep(1:10, each = 2), 
                 nfolds = 10, standardize = FALSE)

## ---- echo = FALSE, fig.show='hold', fig.width = 7.15, fig.height = 7.5-------
yrng <- range(c(unlist(cvfit2$cvup), unlist(cvfit2$cvlo)))
layout(matrix(1:4, ncol = 2))
plot(cvfit2, which.model = 1, ylim = yrng)
plot(cvfit2, which.model = 2, ylim = yrng)
plot(cvfit2, which.model = 3, ylim = yrng)
plot(cvfit2, which.model = "grp.mcp", ylim = yrng)

## -----------------------------------------------------------------------------
mean(y)

## ---- message = FALSE, cache=FALSE--------------------------------------------
cvfit2 <- cv.oem(x = x, y = y, penalty = c("lasso", "mcp", 
                                           "grp.lasso", "grp.mcp"), 
                 family = "binomial",
                 type.measure = "auc",
                 gamma = 2,
                 groups = rep(1:10, each = 2), 
                 nfolds = 10, standardize = FALSE)

## ---- echo = FALSE, fig.show='hold', fig.width = 7.15, fig.height = 7.5-------
yrng <- range(c(unlist(cvfit2$cvup), unlist(cvfit2$cvlo)))
layout(matrix(1:4, ncol = 2))
plot(cvfit2, which.model = 1, ylim = yrng)
plot(cvfit2, which.model = 2, ylim = yrng)
plot(cvfit2, which.model = 3, ylim = yrng)
plot(cvfit2, which.model = "grp.mcp", ylim = yrng)

## ---- message = FALSE, cache=FALSE--------------------------------------------
xtx <- crossprod(xc) / nrow(xc)
xty <- crossprod(xc, yc) / nrow(xc)


system.time(fit <- oem(x = xc, y = yc, 
                       penalty = c("lasso", "grp.lasso"), 
                       standardize = FALSE, intercept = FALSE,
                       groups = rep(1:20, each = 5)))

system.time(fit.xtx <- oem.xtx(xtx = xtx, xty = xty, 
                               penalty = c("lasso", "grp.lasso"), 
                               groups = rep(1:20, each = 5))  )  
                   
max(abs(fit$beta[[1]][-1,] - fit.xtx$beta[[1]]))
max(abs(fit$beta[[2]][-1,] - fit.xtx$beta[[2]])) 

col.std <- apply(xc, 2, sd)
fit.xtx.s <- oem.xtx(xtx = xtx, xty = xty, 
                     scale.factor = col.std,
                     penalty = c("lasso", "grp.lasso"), 
                     groups = rep(1:20, each = 5))  


## ---- message = FALSE, cache=FALSE--------------------------------------------
set.seed(123)
nrows <- 50000
ncols <- 100
bkFile <- "bigmat.bk"
descFile <- "bigmatk.desc"
bigmat <- filebacked.big.matrix(nrow=nrows, ncol=ncols, type="double",
                                backingfile=bkFile, backingpath=".",
                                descriptorfile=descFile,
                                dimnames=c(NULL,NULL))

# Each column value with be the column number multiplied by
# samples from a standard normal distribution.
set.seed(123)
for (i in 1:ncols) bigmat[,i] = rnorm(nrows)*i

yb <- rnorm(nrows) + bigmat[,1] - bigmat[,2]

## out-of-memory computation
fit <- big.oem(x = bigmat, y = yb, 
               penalty = c("lasso", "grp.lasso"), 
               groups = rep(1:20, each = 5))

## fitting with in-memory computation
fit2 <- oem(x = bigmat[,], y = yb, 
            penalty = c("lasso", "grp.lasso"), 
            groups = rep(1:20, each = 5))   
           
max(abs(fit$beta[[1]] - fit2$beta[[1]]))            


## ---- message = FALSE, cache=FALSE--------------------------------------------

nobsc  <- 1e5
nvarsc <- 500
xc <- matrix(rnorm(nobsc * nvarsc), ncol = nvarsc)
yc <- drop(xc %*% c(0.5, 0.5, -0.5, -0.5, 1, rep(0, nvarsc - 5))) + rnorm(nobsc, sd = 4)


system.time(fit <- oem(x = xc, y = yc, 
                       penalty = c("lasso", "grp.lasso"), 
                       standardize = FALSE, intercept = FALSE,
                       groups = rep(1:20, each = 25)))

system.time(fitp <- oem(x = xc, y = yc, 
                        penalty = c("lasso", "grp.lasso"), 
                        standardize = FALSE, intercept = FALSE,
                        groups = rep(1:20, each = 25), ncores = 2))


## ---- message = FALSE, cache=FALSE--------------------------------------------

nobs  <- 1e4
nvars <- 102
x <- matrix(rnorm(nobs * nvars), ncol = nvars)
y <- drop(x %*% c(0.5, 0.5, -0.5, -0.5, 1, 0.5, rep(0, nvars - 6))) + rnorm(nobs, sd = 4)

lams <- exp(seq(log(2.5), log(5e-5), length.out = 100L))

ols.estimates <- coef(lm.fit(y = y, x = cbind(1, x)))[-1]

fit.adaptive <- oem(x = x, y = y, penalty = c("lasso"),
                    penalty.factor = 1 / abs(ols.estimates),
                    lambda = lams)

group.indicators <- rep(1:34, each = 3)

## norms of OLS estimates for each group
group.norms      <- sapply(1:34, function(idx) sqrt(sum((ols.estimates[group.indicators == idx]) ^ 2)))
fit.adaptive.grp <- oem(x = x, y = y, penalty = c("grp.lasso"),
                        group.weights = 1 / group.norms,
                        groups = group.indicators, 
                        lambda = lams)


## ---- echo = FALSE, fig.show='hold', fig.width = 7.15, fig.height = 4.25------
layout(matrix(1:2, ncol = 2))
plot(fit.adaptive)
plot(fit.adaptive.grp)

Try the oem package in your browser

Any scripts or data that you put into this service are public.

oem documentation built on Oct. 13, 2022, 9:06 a.m.