# to run the timings in this readme, set the following env variable:
Sys.setenv(ODBC_EVAL_BENCHMARKS = "true")
eval_timings <- as.logical(Sys.getenv("ODBC_EVAL_BENCHMARKS", "false"))

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  eval = eval_timings
)

The odbc package is often much faster than the existing RODBC and DBI compatible RODBCDBI packages. We'll benchmark writing and reading data from the nycflights13 package using the three packages.

# The CRAN version of RODBC uses iODBC, so to use unixODBC we need to 
# recompile it from source, specifying the odbc manager explicitly:
#
# install.packages("RODBC", type = "source", INSTALL_opts="--configure-args='--with-odbc-manager=odbc'")
#
# see `vignette("develop")` for more details.
library(odbc)
library(RODBC)
library(RODBCDBI)

library(DBI)

library(nycflights13)

Setup

RODBC and RODBCDBI trip up on flights' tibble subclass, so we'll convert it to a data frame before passing to each package. Also, the RODBCDBI package does not support writing timestamps, so we remove that column as well.

flights$time_hour <- NULL
flights <- as.data.frame(flights)

Now, let's configure three driver connections to a Microsoft SQL Server database, one for each package we'll be benchmarking.

options("connectionObserver" = NULL)
odbc <- dbConnect(odbc::odbc(), dsn = "MicrosoftSQLServer", uid = "SA", pwd = "BoopBop123!")
rodbc <- RODBC::odbcConnect(dsn = "MicrosoftSQLServer", uid = "SA", pwd = "BoopBop123!")
rodbcdbi <- dbConnect(RODBCDBI::ODBC(), dsn = "MicrosoftSQLServer", user = "SA", password = "BoopBop123!")

The above code requires a dsn MicrosoftSQLServer to have been previously configured; see vignettes("setup") to learn more.

Writing

We'll first benchmark writing the flights dataset, which contains ~300,000 rows and 18 columns, to a database.

The code for odbc and RODBCDBI looks quite similar, as they both use the DBI front-end:

if ("flights" %in% dbListTables(odbc)) {
  dbExecute(odbc, "drop table flights")
}
odbc_write <- system.time(dbWriteTable(odbc, "flights", flights))

odbc_write
#>   user  system elapsed 
#>  0.883   0.176   8.108 
if ("flights" %in% dbListTables(odbc)) {
  dbExecute(odbc, "drop table flights")
}
rodbcdbi_write <- system.time(dbWriteTable(rodbcdbi, "flights", flights))

rodbcdbi_write
#>   user  system elapsed 
#>  8.287  11.107 257.841 
if ("flights" %in% dbListTables(odbc)) {
  dbExecute(odbc, "drop table flights")
}

The timings for RODBC and RODBCDBI are quite similar, as they both utilize the RODBC back-end:

rodbc_write <- system.time(sqlSave(rodbc, flights, "flights"))

rodbc_write
#>   user  system elapsed 
#>  8.266  11.023 235.825 
if ("flights" %in% dbListTables(odbc)) {
  dbExecute(odbc, "drop table flights")
}

Reading

if (!"flights" %in% dbListTables(odbc)) {
  dbWriteTable(odbc, "flights", flights)
}

Again, the syntax for odbc and RODBCDBI is identical, while the timings for RODBCDBI and RODBC are quite similar.

odbc_read     <- system.time(result <- dbReadTable(odbc, "flights"))
rodbcdbi_read <- system.time(result <- dbReadTable(rodbcdbi, "flights"))
rodbc_read    <- system.time(result <- sqlFetch(rodbc, "flights"))
odbc_read
#>   user  system elapsed 
#>  0.515   0.024   0.557 
rodbcdbi_read
#>   user  system elapsed 
#>  1.308   0.035   1.356 
rodbc_read
#>   user  system elapsed 
#>  1.291   0.033   1.343 


rstats-db/odbc documentation built on May 15, 2024, 9:45 a.m.