inst/doc/linear-regression.R

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
library(dbplyr)
library(nycflights13)
library(DBI)
library(modeldb)

## -----------------------------------------------------------------------------
# Open a database connection
con <- DBI::dbConnect(RSQLite::SQLite(), path = ":memory:")
RSQLite::initExtension(con)

library(dplyr)
# Copy data to the database
db_flights <- copy_to(con, nycflights13::flights, "flights")
# Create a simple sample
db_sample <- db_flights %>%
  filter(!is.na(arr_time)) %>%
  head(20000)

## -----------------------------------------------------------------------------
db_sample %>%
  select(arr_delay, dep_delay, distance) %>%
  linear_regression_db(arr_delay)

## -----------------------------------------------------------------------------
db_sample %>%
  select(arr_delay, origin) %>%
  add_dummy_variables(origin, values = c("EWR", "JFK", "LGA"))

## -----------------------------------------------------------------------------
origins <- db_flights %>%
  group_by(origin) %>%
  summarise() %>%
  pull()

origins

## -----------------------------------------------------------------------------
db_sample %>%
  select(arr_delay, origin) %>%
  add_dummy_variables(origin, values = origins) %>%
  linear_regression_db(arr_delay)

## -----------------------------------------------------------------------------
db_sample %>%
  select(arr_delay, arr_time, dep_delay, dep_time) %>%
  linear_regression_db(arr_delay, sample_size = 20000)

## -----------------------------------------------------------------------------
db_sample %>%
  mutate(distanceXarr_time = distance * arr_time) %>%
  select(arr_delay, distanceXarr_time) %>%
  linear_regression_db(arr_delay, sample_size = 20000)

## -----------------------------------------------------------------------------
db_sample %>%
  mutate(distanceXarr_time = distance * arr_time) %>%
  select(arr_delay, distance, arr_time, distanceXarr_time) %>%
  linear_regression_db(arr_delay, sample_size = 20000)

## -----------------------------------------------------------------------------
remote_model <- db_sample %>%
  mutate(distanceXarr_time = distance * arr_time) %>%
  select(arr_delay, dep_time, distanceXarr_time, origin) %>%
  add_dummy_variables(origin, values = origins) %>%
  linear_regression_db(y_var = arr_delay, sample_size = 20000)

remote_model

## ---- echo = FALSE------------------------------------------------------------
dbDisconnect(con)

Try the modeldb package in your browser

Any scripts or data that you put into this service are public.

modeldb documentation built on Nov. 2, 2023, 5:39 p.m.