knitr::opts_chunk$set(fig.width = 7, fig.height = 4.5, dpi = 300,
                      fig.cap = "", fig.align = "center")
showtext::showtext.opts(dpi = 300)
library(sl3)
library(methods)

Accessing these slides

--

View them online:

???


class: inverse, center, middle

Core sl3 Design Principles


sl3 Architecture

All of the classes defined in sl3 are based on the R6 framework, which brings a newer object-oriented paradigm to the R language.

Core classes

--

--

--

???


Object Oriented Programming (OOP)

--

--

--

--


class: inverse, center, middle

The Anatomy of sl3


Get the package

devtools::install_github("jeremyrcoyle/sl3")
set.seed(49753)
library(data.table)
library(dplyr)
library(origami)
library(SuperLearner)

--

--

To start using sl3, let's load the package:

library(sl3)

A "toy" data set

We use data from the Collaborative Perinatal Project (CPP) to illustrate the features of sl3 as well as its proper usage. For convenience, the data is included with the sl3 R package.

# load example data set
data(cpp_imputed)

# here are the covariates we are interested in and, of course, the outcome
covars <- c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs",
            "sexn")
outcome <- "haz"

???


Setting up sl3_Task I

task <- make_sl3_Task(data = cpp_imputed, covariates = covars,
                      outcome = outcome, outcome_type="continuous")

--


sl3_Task Options


Setting up sl3_Task II

Let's take a look at the task that we set up:

task

Learners I: Introduction

--

--

--

# make learner object
lrnr_glm <- make_learner(Lrnr_glm)

Learners II: Core Methods

--

--

# fit learner to task data
lrnr_glm_fit <- lrnr_glm$train(task)

# verify that the learner is fit
lrnr_glm_fit$is_trained

Learners III: Prediction

preds <- lrnr_glm_fit$predict()
head(preds)

--

preds <- lrnr_glm_fit$predict(task)
head(preds)

Learners IV: Properties

sl3_list_learners(c("binomial", "offset"))

--

sl3_list_learners(c("binomial", "offset"))

Learners V: Tuning Parameters

--

sl3 Learners support some common parameters (where applicable):


Compatibility with SuperLearner Package

lrnr_sl_glmnet <- make_learner(Lrnr_pkg_SuperLearner, "SL.glmnet")

???


Dependent Data / Time-series

data(bsds)
task <- sl3_Task$new(bsds, covariates = c("cnt"), outcome = "cnt")
#self exciting threshold autoregressive model
tsDyn_learner <- Lrnr_tsDyn$new(learner="setar", m=1, model="TAR", n.ahead=5)
fit_1 <- tsDyn_learner$train(task)
fit_1$predict(task)

--

Examples can be found in the "examples" directory on github.

--


class: inverse, center, middle

Composing Learners in sl3


Pipelines I

--

screen_cor <- Lrnr_pkg_SuperLearner_screener$new("screen.corP")
screen_fit <- screen_cor$train(task)
print(screen_fit)

Pipelines II

screened_task <- screen_fit$chain()
print(screened_task)

Pipelines III

screened_glm_fit <- lrnr_glm$train(screened_task)
screened_preds <- screened_glm_fit$predict()
head(screened_preds)

Pipelines IV

--

sg_pipeline <- make_learner(Pipeline, screen_cor, lrnr_glm)
sg_pipeline_fit <- sg_pipeline$train(task)
sg_pipeline_preds <- sg_pipeline_fit$predict()
head(sg_pipeline_preds)

--


Pipelines V

dt <- delayed_learner_train(sg_pipeline, task)
plot(dt, color=FALSE, height="300px")

Stacks I

--

stack <- make_learner(Stack, lrnr_glm, sg_pipeline)
stack_fit <- stack$train(task)
stack_preds <- stack_fit$predict()
head(stack_preds)

???



Stacks II

dt <- delayed_learner_train(stack, task)
plot(dt, color=FALSE, height="500px")

But What About Cross-validation?

Almost forgot! CV is necessary in order to honestly evaluate our models and avoid over-fitting. We provide facilities for easily doing this, based on the origami package.

--

--

cv_stack <- Lrnr_cv$new(stack)
cv_fit <- cv_stack$train(task)
cv_preds <- cv_fit$predict()

Cross-validation (continued...)

risks <- cv_fit$cv_risk(loss_squared_error)
print(risks)

--


Cross-validation (continued...)

dt <- delayed_learner_train(cv_stack, task)
plot(dt, color=FALSE, height="500px")

class: inverse, center, middle

Putting it all together: Super Learning


Super Learner I: Meta-Learners

--

metalearner <- make_learner(Lrnr_nnls)
cv_task <- cv_fit$chain()
ml_fit <- metalearner$train(cv_task)

--


Super Learner II: Pipelines

--

sl_pipeline <- make_learner(Pipeline, stack_fit, ml_fit)
sl_preds <- sl_pipeline$predict()
head(sl_preds)

Super Learner III: Lrnr_sl

--

sl <- Lrnr_sl$new(learners = stack,
                  metalearner = metalearner)
sl_fit <- sl$train(task)
lrnr_sl_preds <- sl_fit$predict()
head(lrnr_sl_preds)

--

???


class: inverse, center, middle

Computing with delayed


Delayed I

--

--

lrnr_rf <- make_learner(Lrnr_randomForest)
lrnr_glmnet <- make_learner(Lrnr_glmnet)
sl <- Lrnr_sl$new(learners = list(lrnr_glm, lrnr_rf, lrnr_glmnet),
                  metalearner = metalearner)

--

delayed_sl_fit <- delayed_learner_train(sl, task)
plot(delayed_sl_fit)

Delayed II

delayed_sl_fit <- delayed_learner_train(sl, task)
plot(delayed_sl_fit, color = TRUE, height="500px")

Delayed III

--

???


class: center, middle

Thanks!

We have a great team: Jeremy Coyle, Nima Hejazi, Ivana Malenica, Oleg Sofrygin.

Slides created via the R package xaringan.

Powered by remark.js, knitr, and R Markdown.



jeremyrcoyle/sl3 documentation built on April 30, 2024, 10:16 p.m.