Nothing
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
library(dbplyr)
library(nycflights13)
library(DBI)
library(modeldb)
## -----------------------------------------------------------------------------
# Open a database connection
con <- DBI::dbConnect(RSQLite::SQLite(), path = ":memory:")
RSQLite::initExtension(con)
library(dplyr)
# Copy data to the database
db_flights <- copy_to(con, nycflights13::flights, "flights")
# Create a simple sample
db_sample <- db_flights %>%
filter(!is.na(arr_time)) %>%
head(20000)
## -----------------------------------------------------------------------------
db_sample %>%
select(arr_delay, dep_delay, distance) %>%
linear_regression_db(arr_delay)
## -----------------------------------------------------------------------------
db_sample %>%
select(arr_delay, origin) %>%
add_dummy_variables(origin, values = c("EWR", "JFK", "LGA"))
## -----------------------------------------------------------------------------
origins <- db_flights %>%
group_by(origin) %>%
summarise() %>%
pull()
origins
## -----------------------------------------------------------------------------
db_sample %>%
select(arr_delay, origin) %>%
add_dummy_variables(origin, values = origins) %>%
linear_regression_db(arr_delay)
## -----------------------------------------------------------------------------
db_sample %>%
select(arr_delay, arr_time, dep_delay, dep_time) %>%
linear_regression_db(arr_delay, sample_size = 20000)
## -----------------------------------------------------------------------------
db_sample %>%
mutate(distanceXarr_time = distance * arr_time) %>%
select(arr_delay, distanceXarr_time) %>%
linear_regression_db(arr_delay, sample_size = 20000)
## -----------------------------------------------------------------------------
db_sample %>%
mutate(distanceXarr_time = distance * arr_time) %>%
select(arr_delay, distance, arr_time, distanceXarr_time) %>%
linear_regression_db(arr_delay, sample_size = 20000)
## -----------------------------------------------------------------------------
remote_model <- db_sample %>%
mutate(distanceXarr_time = distance * arr_time) %>%
select(arr_delay, dep_time, distanceXarr_time, origin) %>%
add_dummy_variables(origin, values = origins) %>%
linear_regression_db(y_var = arr_delay, sample_size = 20000)
remote_model
## ---- echo = FALSE------------------------------------------------------------
dbDisconnect(con)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.