# to run the timings in this readme, set the following env variable: Sys.setenv(ODBC_EVAL_BENCHMARKS = "true")
eval_timings <- as.logical(Sys.getenv("ODBC_EVAL_BENCHMARKS", "false")) knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = eval_timings )
The odbc package is often much faster than the existing RODBC and DBI compatible RODBCDBI packages. We'll benchmark writing and reading data from the nycflights13 package using the three packages.
# The CRAN version of RODBC uses iODBC, so to use unixODBC we need to # recompile it from source, specifying the odbc manager explicitly: # # install.packages("RODBC", type = "source", INSTALL_opts="--configure-args='--with-odbc-manager=odbc'") # # see `vignette("develop")` for more details.
library(odbc) library(RODBC) library(RODBCDBI) library(DBI) library(nycflights13)
RODBC and RODBCDBI trip up on flights
' tibble subclass, so we'll convert it to a data frame before passing to each package. Also, the RODBCDBI package does not support writing timestamps, so we remove that column as well.
flights$time_hour <- NULL flights <- as.data.frame(flights)
Now, let's configure three driver connections to a Microsoft SQL Server database, one for each package we'll be benchmarking.
options("connectionObserver" = NULL)
odbc <- dbConnect(odbc::odbc(), dsn = "MicrosoftSQLServer", uid = "SA", pwd = "BoopBop123!") rodbc <- RODBC::odbcConnect(dsn = "MicrosoftSQLServer", uid = "SA", pwd = "BoopBop123!") rodbcdbi <- dbConnect(RODBCDBI::ODBC(), dsn = "MicrosoftSQLServer", user = "SA", password = "BoopBop123!")
The above code requires a dsn
MicrosoftSQLServer
to have been previously configured; see vignettes("setup")
to learn more.
We'll first benchmark writing the flights dataset, which contains ~300,000 rows and 18 columns, to a database.
The code for odbc and RODBCDBI looks quite similar, as they both use the DBI front-end:
if ("flights" %in% dbListTables(odbc)) { dbExecute(odbc, "drop table flights") }
odbc_write <- system.time(dbWriteTable(odbc, "flights", flights)) odbc_write
#> user system elapsed #> 0.883 0.176 8.108
if ("flights" %in% dbListTables(odbc)) { dbExecute(odbc, "drop table flights") }
rodbcdbi_write <- system.time(dbWriteTable(rodbcdbi, "flights", flights)) rodbcdbi_write
#> user system elapsed #> 8.287 11.107 257.841
if ("flights" %in% dbListTables(odbc)) { dbExecute(odbc, "drop table flights") }
The timings for RODBC and RODBCDBI are quite similar, as they both utilize the RODBC back-end:
rodbc_write <- system.time(sqlSave(rodbc, flights, "flights")) rodbc_write
#> user system elapsed #> 8.266 11.023 235.825
if ("flights" %in% dbListTables(odbc)) { dbExecute(odbc, "drop table flights") }
if (!"flights" %in% dbListTables(odbc)) { dbWriteTable(odbc, "flights", flights) }
Again, the syntax for odbc and RODBCDBI is identical, while the timings for RODBCDBI and RODBC are quite similar.
odbc_read <- system.time(result <- dbReadTable(odbc, "flights")) rodbcdbi_read <- system.time(result <- dbReadTable(rodbcdbi, "flights")) rodbc_read <- system.time(result <- sqlFetch(rodbc, "flights"))
odbc_read
#> user system elapsed #> 0.515 0.024 0.557
rodbcdbi_read
#> user system elapsed #> 1.308 0.035 1.356
rodbc_read
#> user system elapsed #> 1.291 0.033 1.343
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.