inst/doc/sql.R

## ----pre, include = FALSE-----------------------------------------------------
if (!rlang::is_installed("randomForest")) {
  knitr::opts_chunk$set(
    eval = FALSE
  )
}

## ----setup, include=FALSE-----------------------------------------------------
library(dplyr)
library(tidypredict)
library(randomForest)
library(dbplyr)
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
set.seed(100)

## -----------------------------------------------------------------------------
library(dplyr)
library(tidypredict)
library(randomForest)
library(dbplyr)

flights_table <- nycflights13::flights %>%
  mutate(
    current_score = 0, 
    flight_id = row_number()
    ) 

## -----------------------------------------------------------------------------
library(DBI)

con <- dbConnect(RSQLite::SQLite(), path = ":memory:")
db_fligths <- copy_to(con,flights_table )

## -----------------------------------------------------------------------------
df <- db_fligths %>%
  select(dep_delay, hour, distance) %>%
  head(1000) %>%
  collect() 

## -----------------------------------------------------------------------------
model <- lm(dep_delay ~ ., data = df)

## -----------------------------------------------------------------------------
tidypredict_test(model)

## ---- eval = FALSE------------------------------------------------------------
#  if(tidypredict_test(model)$alert) stop("Threshold exceeded!")

## -----------------------------------------------------------------------------
library(dbplyr)

update_statement <- build_sql("UPDATE flights_table SET current_score  = ", tidypredict_sql(model, con = con), con = con)

update_statement

## -----------------------------------------------------------------------------
dbSendQuery(con, update_statement)

## -----------------------------------------------------------------------------
db_fligths %>%
  select(current_score) %>%
  head(10) 
  

## -----------------------------------------------------------------------------
dbWriteTable(con, "daily_scores", 
             tibble(
               flight_id = 0,
               score = 0,
               date = ""
             ))

## -----------------------------------------------------------------------------
new_predictions <- db_fligths %>%
  filter(month == 12) %>% 
  tidypredict_to_column(model, vars = "score") %>%
  select(
    flight_id,
    score) %>%
  mutate(date = "01/01/2018")

## -----------------------------------------------------------------------------
insert_scores <- build_sql("INSERT INTO daily_scores ", sql_render(new_predictions, con = con), con = con)
insert_scores

## -----------------------------------------------------------------------------
dbSendQuery(con, insert_scores)

## -----------------------------------------------------------------------------
tbl(con, "daily_scores") %>%
  inner_join(tbl(con, "flights_table"), by = "flight_id") %>%
  filter(date == "01/01/2018") %>%
  select(dep_delay, hour, distance, score, date)

## ---- include = FALSE---------------------------------------------------------
dbDisconnect(con)

Try the tidypredict package in your browser

Any scripts or data that you put into this service are public.

tidypredict documentation built on Jan. 22, 2023, 1:41 a.m.