knitr::opts_chunk$set(
  collapse = TRUE,
  comment = " # "
)
options(width = 100)

Example trying most of the replyr functions on a few data sources.

suppressPackageStartupMessages(library('dplyr'))
packageVersion("dplyr")
# possibly need this https://github.com/tidyverse/dplyr/issues/3145
suppressPackageStartupMessages(library('dbplyr'))
packageVersion("dbplyr")
suppressPackageStartupMessages(library('sparklyr'))
packageVersion("sparklyr")
R.Version()$version.string
library("replyr")
packageVersion("replyr")
source('CheckFns.R')

Check replyr basic opearations against a few data service providers.

Local data.frame example.

noopCopy <- function(df,name) {
  df
}
resBase <- runExample(noopCopy)

Local tbl example.

tblCopy <- function(df,name) {
  as.tbl(df)
}
resTbl <- runExample(tblCopy)
if(!listsOfSameData(resBase, resTbl)) {
  stop("tbl result differs")
}

SQLite example.

my_db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
RSQLite::initExtension(my_db) # filed as dplyr issue https://github.com/tidyverse/dplyr/issues/3150
# my_db <- dplyr::src_sqlite(":memory:", create = TRUE)
class(my_db)
copyToRemote <- remoteCopy(my_db)
resSQLite <- runExample(copyToRemote)
failingFrameIndices(resBase, resSQLite)
if(!listsOfSameData(resBase, resSQLite)) {
  stop("SQLite result differs")
}
DBI::dbDisconnect(my_db)
rm(list=c('my_db','copyToRemote')); gc(verbose = FALSE) # disconnect

MySQL example ("docker start mysql"). Kind of a poor results as the adapted MySql has a hard time with NA.

Taking MySQL check out, too much trouble to maintain the testing database (docker now not sharing ports on the container for MySQL).

my_db <- dplyr::src_mysql('mysql','127.0.0.1',3306,'root','')
class(my_db)
copyToRemote <- remoteCopy(my_db)

resMySQL <- runExample(copyToRemote)

failures <- failingFrameIndices(resBase, resMySQL) 
retrykeys <- list()
retrykeys[[2]] <- c('x', 'z')
retrykeys[[3]] <- c('x', 'z')
retrykeys[[7]] <- c('year', 'name')
retrykeys[[8]] <- c('year', 'name')
retrykeys[[9]] <- c('year')
retrykeys[[10]] <- c('group')
retrykeys[[11]] <- c('index')
retrykeys[[12]] <- c('index','meastype')
for(i in failures) {
  if(i<=length(retrykeys)) {
    explained <- sameData(resBase[[i]], resMySQL[[i]],
                          ingoreLeftNAs= TRUE, keySet=retrykeys[[i]])
    print(paste("MySQL result differs",i,
                " explained by left NAs: ",
                explained))
    if(!explained) {
      stop("MySQL non NA differnce")
    }
  } else {
    stop(paste("different result for example", i))
  }
}
DBI::dbDisconnect(my_db)
rm(list=c('my_db','copyToRemote')); gc(verbose = FALSE) # disconnect

PostgreSQL example.

library('RPostgreSQL')
my_db <- DBI::dbConnect(dbDriver("PostgreSQL"), 
                        host = 'localhost',
                        port = 5432,
                        user = 'johnmount',
                        password = '')

class(my_db)
copyToRemote <- remoteCopy(my_db)
resPostgreSQL <- runExample(copyToRemote)
if(!listsOfSameData(resBase, resPostgreSQL)) {
  stop("PostgreSQL result differs")
}
DBI::dbDisconnect(my_db)
rm(list=c('my_db','copyToRemote')); gc(verbose = FALSE) # disconnect

RPostgres example.

library("RPostgres")
my_db <- DBI::dbConnect(RPostgres::Postgres(),
  host = 'localhost',
  port = 5432,
  user = 'johnmount',
  password = '')
class(my_db)
copyToRemote <- remoteCopy(my_db)
resPostgreSQL <- runExample(copyToRemote)
if(!listsOfSameData(resBase, resPostgreSQL)) {
  stop("RPostgres result differs")
}
DBI::dbDisconnect(my_db)
rm(list=c('my_db','copyToRemote')); gc(verbose = FALSE) # disconnect

Spark 2. example (lowest version of Spark we are supporting).

# Can't easilly override Spark version once it is up.
my_db <- sparklyr::spark_connect(version='2.2.0', 
   master = "local")
class(my_db)
my_db$spark_home
copyToRemote <- remoteCopy(my_db)
resSpark <- runExample(copyToRemote)
if(!listsOfSameData(resBase, resSpark)) {
  stop("Spark result differs")
}
spark_disconnect(my_db)
rm(list=c('my_db','copyToRemote')); gc(verbose = FALSE) # disconnect
print("all done")
rm(list=ls())
gc(verbose = FALSE)


WinVector/replyr documentation built on Oct. 22, 2020, 8:07 p.m.