## ---- eval=FALSE, include=TRUE-------------------------------------------
# devtools::install_github("alexioannides/pipeliner")
## ---- eval=FALSE, include=TRUE-------------------------------------------
# transform_features(function(df) {
# data.frame(x1 = log(df$var1))
# })
## ---- eval=FALSE, include=TRUE-------------------------------------------
# transform_response(function(df) {
# data.frame(y = log(df$response))
# })
## ---- eval=FALSE, include=TRUE-------------------------------------------
# estimate_model(function(df) {
# lm(y ~ 1 + x1, df)
# })
## ---- eval=FALSE, include=TRUE-------------------------------------------
# inv_transform_response(function(df) {
# data.frame(pred_response = exp(df$pred_y))
# })
## ------------------------------------------------------------------------
library(pipeliner)
data <- faithful
lm_pipeline <- pipeline(
data,
transform_features(function(df) {
data.frame(x1 = (df$waiting - mean(df$waiting)) / sd(df$waiting))
}),
transform_response(function(df) {
data.frame(y = (df$eruptions - mean(df$eruptions)) / sd(df$eruptions))
}),
estimate_model(function(df) {
lm(y ~ 1 + x1, df)
}),
inv_transform_response(function(df) {
data.frame(pred_eruptions = df$pred_model * sd(df$eruptions) + mean(df$eruptions))
})
)
in_sample_predictions <- predict(lm_pipeline, data, verbose = TRUE)
head(in_sample_predictions)
## ------------------------------------------------------------------------
summary(lm_pipeline$inner_model)
## ------------------------------------------------------------------------
pred_function <- lm_pipeline$predict
predictions <- pred_function(data, verbose = FALSE)
head(predictions)
## ---- warning=FALSE, message=FALSE---------------------------------------
library(tidyverse)
lm_pipeline <- data %>%
pipeline(
transform_features(function(df) {
transmute(df, x1 = (waiting - mean(waiting)) / sd(waiting))
}),
transform_response(function(df) {
transmute(df, y = (eruptions - mean(eruptions)) / sd(eruptions))
}),
estimate_model(function(df) {
lm(y ~ 1 + x1, df)
}),
inv_transform_response(function(df) {
transmute(df, pred_eruptions = pred_model * sd(eruptions) + mean(eruptions))
})
)
head(predict(lm_pipeline, data))
## ------------------------------------------------------------------------
library(modelr)
# define a function that estimates a machine learning pipeline on a single fold of the data
pipeline_func <- function(df) {
pipeline(
df,
transform_features(function(df) {
transmute(df, x1 = (waiting - mean(waiting)) / sd(waiting))
}),
transform_response(function(df) {
transmute(df, y = (eruptions - mean(eruptions)) / sd(eruptions))
}),
estimate_model(function(df) {
lm(y ~ 1 + x1, df)
}),
inv_transform_response(function(df) {
transmute(df, pred_eruptions = pred_model * sd(eruptions) + mean(eruptions))
})
)
}
# 5-fold cross-validation using machine learning pipelines
cv_rmse <- crossv_kfold(data, 5) %>%
mutate(model = map(train, ~ pipeline_func(as.data.frame(.x))),
predictions = map2(model, test, ~ predict(.x, as.data.frame(.y))),
residuals = map2(predictions, test, ~ .x - as.data.frame(.y)$eruptions),
rmse = map_dbl(residuals, ~ sqrt(mean(.x ^ 2)))) %>%
summarise(mean_rmse = mean(rmse), sd_rmse = sd(rmse))
cv_rmse
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.