Simple tasks related to R Tip: Break up Function Nesting for Legibility. Most remote data systems start and end with data remote, so we are materializing tables when showing database timings.
library("dplyr") library("microbenchmark") library("ggplot2") library("data.table") library("rquery") db <- DBI::dbConnect(RPostgreSQL::PostgreSQL(), host = 'localhost', port = 5432, user = 'johnmount', password = '') # PostgreSQL dbExistsTable() does not work dbopts <- dbi_connection_preferences(db) print(dbopts) options(dbopts) nrows <- 100000
Simple problem (subset rows and columns).
mtcarsb <- mtcars[rep(seq_len(nrow(mtcars)), nrows), ,] print(dim(mtcarsb)) mtcarsd <- as.data.table(mtcarsb) mtcarsdb <- dbi_copy_to(db, "mtcarsdb", mtcarsb, overwrite = TRUE, temporary = TRUE) # DBI::dbGetQuery(db, "CREATE INDEX mtcarsdb_cyl ON mtcarsdb (cyl)") rquery_sql <- mtcarsdb %.>% select_rows_nse(., cyl == 8) %.>% select_columns(., qc(mpg, cyl, wt)) %.>% to_sql(., db) cat(rquery_sql) DBI::dbGetQuery(db, paste("EXPLAIN", rquery_sql)) mtcarst <- dplyr::tbl(db, "mtcarsdb") dplyr_sql <- mtcarst %>% filter(cyl == 8) %>% select(mpg, cyl, wt) %>% dbplyr::remote_query(.) cat(dplyr_sql) DBI::dbGetQuery(db, paste("EXPLAIN", dplyr_sql)) timings <- microbenchmark( base_stepped = { . <- mtcarsb . <- subset(., cyl == 8) . <- .[, c("mpg", "cyl", "wt")] nrow(.) }, base_nested = { nrow(mtcarsb[mtcarsb$cyl == 8, c("mpg", "cyl", "wt")]) }, dplyr = { mtcarsb %>% filter(cyl == 8) %>% select(mpg, cyl, wt) %>% nrow }, dplyr_database = { res <- mtcarst %>% filter(cyl == 8) %>% select(mpg, cyl, wt) %>% compute() as.numeric(as.data.frame(tally(res))[[1]][[1]]) }, data.table_nested = { nrow(mtcarsd[cyl==8, c("mpg", "cyl", "wt")]) }, data.table_stepped = { mtcarsd[cyl==8, ][, c("mpg", "cyl", "wt") ][, .N ] }, rquery_database = { res <- mtcarsdb %.>% select_rows_nse(., cyl == 8) %.>% select_columns(., qc(mpg, cyl, wt)) %.>% materialize(db, ., table_name = "restab", overwrite = TRUE, temporary = TRUE) dbi_nrow(db, res$table_name) } ) print(timings) autoplot(timings)
Want to add the query to query folding feature that dbplyr
's optimizer has.
DBI::dbDisconnect(db)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.