Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
options(rlang_backtrace_on_error = "none")
## ----setup--------------------------------------------------------------------
library(hardhat)
library(tibble)
library(modeldata)
data(penguins)
penguins <- na.omit(penguins)
## ----out.width = '100%', echo = FALSE-----------------------------------------
knitr::include_graphics("../man/figures/Fitting.png")
## ----out.width = '100%', echo = FALSE-----------------------------------------
knitr::include_graphics("../man/figures/Prediction.png")
## -----------------------------------------------------------------------------
new_simple_lm <- function(coefs, coef_names, blueprint) {
if (!is.numeric(coefs)) {
stop("`coefs` should be a numeric vector.", call. = FALSE)
}
if (!is.character(coef_names)) {
stop("`coef_names` should be a character vector.", call. = FALSE)
}
if (length(coefs) != length(coef_names)) {
stop("`coefs` and `coef_names` must have the same length.")
}
new_model(
coefs = coefs,
coef_names = coef_names,
blueprint = blueprint,
class = "simple_lm"
)
}
## -----------------------------------------------------------------------------
manual_model <- new_simple_lm(1, "my_coef", default_xy_blueprint())
manual_model
names(manual_model)
manual_model$blueprint
## -----------------------------------------------------------------------------
simple_lm_impl <- function(predictors, outcomes) {
lm_fit <- lm.fit(predictors, outcomes)
coefs <- lm_fit$coefficients
coef_names <- names(coefs)
coefs <- unname(coefs)
list(
coefs = coefs,
coef_names = coef_names
)
}
## -----------------------------------------------------------------------------
predictors <- as.matrix(subset(penguins, select = bill_length_mm))
outcomes <- penguins$body_mass_g
simple_lm_impl(predictors, outcomes)
## -----------------------------------------------------------------------------
simple_lm_bridge <- function(processed) {
validate_outcomes_are_univariate(processed$outcomes)
predictors <- as.matrix(processed$predictors)
outcomes <- processed$outcomes[[1]]
fit <- simple_lm_impl(predictors, outcomes)
new_simple_lm(
coefs = fit$coefs,
coef_names = fit$coef_names,
blueprint = processed$blueprint
)
}
## -----------------------------------------------------------------------------
# Simulate formula interface
processed_1 <- mold(bill_length_mm ~ body_mass_g + species, penguins)
# Simulate xy interface
processed_2 <- mold(x = penguins["body_mass_g"], y = penguins$bill_length_mm)
simple_lm_bridge(processed_1)
simple_lm_bridge(processed_2)
## ----error=TRUE---------------------------------------------------------------
multi_outcome <- mold(bill_length_mm + bill_depth_mm ~ body_mass_g + species, penguins)
simple_lm_bridge(multi_outcome)
## -----------------------------------------------------------------------------
# Generic
simple_lm <- function(x, ...) {
UseMethod("simple_lm")
}
# Default
simple_lm.default <- function(x, ...) {
stop(
"`simple_lm()` is not defined for a '", class(x)[1], "'.",
call. = FALSE
)
}
# XY method - data frame
simple_lm.data.frame <- function(x, y, ...) {
processed <- mold(x, y)
simple_lm_bridge(processed)
}
# XY method - matrix
simple_lm.matrix <- function(x, y, ...) {
processed <- mold(x, y)
simple_lm_bridge(processed)
}
# Formula method
simple_lm.formula <- function(formula, data, ...) {
processed <- mold(formula, data)
simple_lm_bridge(processed)
}
# Recipe method
simple_lm.recipe <- function(x, data, ...) {
processed <- mold(x, data)
simple_lm_bridge(processed)
}
## -----------------------------------------------------------------------------
predictors <- penguins[c("bill_length_mm", "bill_depth_mm")]
outcomes_vec <- penguins$body_mass_g
outcomes_df <- penguins["body_mass_g"]
# Vector outcome
simple_lm(predictors, outcomes_vec)
# 1 column data frame outcome
simple_lm(predictors, outcomes_df)
# Formula interface
simple_lm(body_mass_g ~ bill_length_mm + bill_depth_mm, penguins)
## ----warning=FALSE, message=FALSE---------------------------------------------
library(recipes)
# - Log a predictor
# - Generate dummy variables for factors
simple_lm(body_mass_g ~ log(bill_length_mm) + species, penguins)
# Same, but with a recipe
rec <- recipe(body_mass_g ~ bill_length_mm + species, penguins) %>%
step_log(bill_length_mm) %>%
step_dummy(species, one_hot = TRUE)
simple_lm(rec, penguins)
## -----------------------------------------------------------------------------
simple_lm <- function(x, ...) {
UseMethod("simple_lm")
}
simple_lm.data.frame <- function(x, y, intercept = TRUE, ...) {
blueprint <- default_xy_blueprint(intercept = intercept)
processed <- mold(x, y, blueprint = blueprint)
simple_lm_bridge(processed)
}
simple_lm.matrix <- function(x, y, intercept = TRUE,...) {
blueprint <- default_xy_blueprint(intercept = intercept)
processed <- mold(x, y, blueprint = blueprint)
simple_lm_bridge(processed)
}
simple_lm.formula <- function(formula, data, intercept = TRUE, ...) {
blueprint <- default_formula_blueprint(intercept = intercept)
processed <- mold(formula, data, blueprint = blueprint)
simple_lm_bridge(processed)
}
simple_lm.recipe <- function(x, data, intercept = TRUE, ...) {
blueprint <- default_recipe_blueprint(intercept = intercept)
processed <- mold(x, data, blueprint = blueprint)
simple_lm_bridge(processed)
}
## -----------------------------------------------------------------------------
# By default an intercept is included
simple_lm(predictors, outcomes_df)
# But the user can turn this off
simple_lm(body_mass_g ~ log(bill_length_mm) + species, penguins, intercept = FALSE)
## -----------------------------------------------------------------------------
predict_simple_lm_numeric <- function(object, predictors) {
coefs <- object$coefs
pred <- as.vector(predictors %*% coefs)
out <- spruce_numeric(pred)
out
}
## -----------------------------------------------------------------------------
model <- simple_lm(bill_length_mm ~ body_mass_g + species, penguins)
predictors <- forge(penguins, model$blueprint)$predictors
predictors <- as.matrix(predictors)
predict_simple_lm_numeric(model, predictors)
## -----------------------------------------------------------------------------
predict_simple_lm_bridge <- function(type, object, predictors) {
type <- rlang::arg_match(type, "numeric")
predictors <- as.matrix(predictors)
switch(
type,
numeric = predict_simple_lm_numeric(object, predictors)
)
}
## ----error=TRUE---------------------------------------------------------------
model <- simple_lm(bill_length_mm ~ body_mass_g + species, penguins)
# Pass in the data frame
predictors <- forge(penguins, model$blueprint)$predictors
predict_simple_lm_bridge("numeric", model, predictors)
# Partial matches are an error
predict_simple_lm_bridge("numer", model, predictors)
## -----------------------------------------------------------------------------
predict.simple_lm <- function(object, new_data, type = "numeric", ...) {
# Enforces column order, type, column names, etc
processed <- forge(new_data, object$blueprint)
out <- predict_simple_lm_bridge(type, object, processed$predictors)
validate_prediction_size(out, new_data)
out
}
## -----------------------------------------------------------------------------
model <- simple_lm(bill_length_mm ~ log(body_mass_g) + species, penguins)
predict(model, penguins)
## ----warning=TRUE, error=TRUE-------------------------------------------------
# `new_data` isn't a data frame
predict(model, penguins$species)
# Missing a required column
predict(model, subset(penguins, select = -body_mass_g))
# In this case, 'species' is a character,
# but can be losslessy converted to a factor.
# That happens for you automatically and silently.
penguins_chr_species <- transform(penguins, species = as.character(species))
predict(model, penguins_chr_species)
# Slightly different from above. Here, 'species' is a character,
# AND has an extra unexpected factor level. It is
# removed with a warning, but you still get a factor
# with the correct levels
penguins_chr_bad_species <- penguins_chr_species
penguins_chr_bad_species$species[1] <- "new_level"
predict(model, penguins_chr_bad_species)
# This case throws an error.
# Here, 'species' is a double and
# when it should have been a factor.
# You can't cast a double to a factor!
penguins_dbl_species <- transform(penguins, species = 1)
predict(model, penguins_dbl_species)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.