# to run the timings in this readme, set the following env variable: Sys.setenv(ODBC_EVAL_BENCHMARKS = "true")
eval_timings <- as.logical(Sys.getenv("ODBC_EVAL_BENCHMARKS", "false")) knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = eval_timings )
The odbc package is often much faster than the existing RODBC and DBI compatible RODBCDBI packages. We'll benchmark writing and reading data from the nycflights13 package using the three packages.
# The CRAN version of RODBC uses iODBC, so to use unixODBC we need to # recompile it from source, specifying the odbc manager explicitly: # # install.packages("RODBC", type = "source", INSTALL_opts="--configure-args='--with-odbc-manager=odbc'") # # see `vignette("develop")` for more details.
library(odbc) library(RODBC) library(RODBCDBI) library(DBI) library(nycflights13)
RODBC and RODBCDBI trip up on flights
' tibble subclass, so we'll convert it to a data frame before passing to each package. Also, the RODBCDBI package does not support writing timestamps, so we remove that column as well.
flights$time_hour <- NULL flights <- as.data.frame(flights)
Now, let's configure three driver connections to a Microsoft SQL Server database, one for each package we'll be benchmarking.
options("connectionObserver" = NULL)
odbc <- dbConnect(odbc::odbc(), dsn = "MicrosoftSQLServer", uid = "SA", pwd = "BoopBop123!") rodbc <- RODBC::odbcConnect(dsn = "MicrosoftSQLServer", uid = "SA", pwd = "BoopBop123!") rodbcdbi <- dbConnect(RODBCDBI::ODBC(), dsn = "MicrosoftSQLServer", user = "SA", password = "BoopBop123!")
The above code requires a dsn
MicrosoftSQLServer
to have been previously configured; see vignettes("setup")
to learn more.
We'll first benchmark writing the flights dataset, which contains ~300,000 rows and 18 columns, to a database.
The code for odbc and RODBCDBI looks quite similar, as they both use the DBI front-end:
if ("flights" %in% dbListTables(odbc)) { dbExecute(odbc, "drop table flights") }
odbc_write <- system.time(dbWriteTable(odbc, "flights", flights)) odbc_write
#> user system elapsed #> 0.883 0.176 8.108
if ("flights" %in% dbListTables(odbc)) { dbExecute(odbc, "drop table flights") }
rodbcdbi_write <- system.time(dbWriteTable(rodbcdbi, "flights", flights)) rodbcdbi_write
#> user system elapsed #> 8.287 11.107 257.841
if ("flights" %in% dbListTables(odbc)) { dbExecute(odbc, "drop table flights") }
The timings for RODBC and RODBCDBI are quite similar, as they both utilize the RODBC back-end:
rodbc_write <- system.time(sqlSave(rodbc, flights, "flights")) rodbc_write
#> user system elapsed #> 8.266 11.023 235.825
if ("flights" %in% dbListTables(odbc)) { dbExecute(odbc, "drop table flights") }
if (!"flights" %in% dbListTables(odbc)) { dbWriteTable(odbc, "flights", flights) }
Again, the syntax for odbc and RODBCDBI is identical, while the timings for RODBCDBI and RODBC are quite similar.
odbc_read <- system.time(result <- dbReadTable(odbc, "flights")) rodbcdbi_read <- system.time(result <- dbReadTable(rodbcdbi, "flights")) rodbc_read <- system.time(result <- sqlFetch(rodbc, "flights"))
odbc_read
#> user system elapsed #> 0.515 0.024 0.557
rodbcdbi_read
#> user system elapsed #> 1.308 0.035 1.356
rodbc_read
#> user system elapsed #> 1.291 0.033 1.343
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.