tests/testthat/test_save_df.R

library(cassandrasimple)
library(dplyr)

test_that("cass_save_df: updates work with proper type conversions and NA handling", {
  jCassSess <- get_cass_session('localhost', 'Tios001', 'cassandrasimple_test')

  cass_update(jCassSess, "drop table if exists test_insert_table")

  cass_update(jCassSess, "create table if not exists test_insert_table (
              dt date,
              hr tinyint,
              obj_id int,
              int_1 int,
              int_2 int,
              float_1 float,
              float_2 float,
              double_1 double,
              str_val text,
              factor_str_val text,
              ts_val timestamp,
              dt_val date,
              bool_val boolean,
              primary key ((dt),hr,obj_id)
  )")

  set.seed(13247)

  # test simplest save
  myDf1 <- data.frame(dt=as.Date('2017-02-22'), hr=seq(1,24), obj_id=round(runif(24,min=1,max=1000),0),
                      int_1=as.integer(runif(24,min=1,max=24)),
                      int_2=as.numeric(seq(1,24)),
                      float_1=runif(24,min=-10,max=10),
                      float_2=runif(24,min=-1000,max=1000),
                      double_1=runif(24,min=-1000,max=1000),
                      str_val=paste("Hi! It's hour",seq(1,24)),
                      factor_str_val=as.factor(paste("fac_",round(runif(24,min=1,max=100),0),sep='')),
                      ts_val=as.POSIXct('2017-02-22 00:23:33',tz='EST'),
                      dt_val=as.Date('2017-03-01'),
                      bool_val=c(rep(T, 12),rep(F, 12)),
                      stringsAsFactors = F
                      )
  myDf1[3,]$float_1 <- NA
  myDf1[3,]$str_val <- NA
  myDf1[3,]$factor_str_val <- NA
  myDf1[4,]$double_1 <- NA
  myDf1[4,]$dt_val <- NA
  myDf1[5,]$int_1 <- NA
  myDf1[5,]$ts_val <- NA
  myDf1[5,]$bool_val <- NA

  cass_save_df(jCassSess, myDf1, "test_insert_table", row_batches=2)

  res_df <- cass_query(jCassSess, "select * from test_insert_table")
  res_df <- arrange(res_df, hr)

  # test NA cases
  expect_true(is.na(filter(res_df, dt=='2017-02-22', hr==3)$str_val))
  expect_true(is.na(filter(res_df, dt=='2017-02-22', hr==3)$float_1))
  expect_true(is.na(filter(res_df, dt=='2017-02-22', hr==3)$factor_str_val))
  expect_true(is.na(filter(res_df, dt=='2017-02-22', hr==4)$dt_val))
  expect_true(is.na(filter(res_df, dt=='2017-02-22', hr==4)$double_1))
  expect_true(is.na(filter(res_df, dt=='2017-02-22', hr==5)$int_1))
  expect_true(is.na(filter(res_df, dt=='2017-02-22', hr==5)$ts_val))
  expect_true(is.na(filter(res_df, dt=='2017-02-22', hr==5)$bool_val))


  # test float
  expect_true(abs(filter(res_df, dt=='2017-02-22', hr==5)$float_1 - (-8.41989)) <= 0.0001)
  expect_true(abs(filter(res_df, dt=='2017-02-22', hr==5)$float_2 - (-780.6906)) <= 0.0001)

  # test string escaping
  expect_equal(filter(res_df, dt=='2017-02-22', hr==5)$str_val,"Hi! It's hour 5")

  # test boolean
  expect_equal(filter(res_df, hr==1)$bool_val,T)
  expect_equal(filter(res_df, hr==20)$bool_val,F)

  # test coersion of ints in numeric form to ints
  expect_equal(res_df$hr,seq(1,24))
  expect_equal(res_df$int_2,seq(1,24))

  # test date conversion
  expect_equal(filter(res_df, dt=='2017-02-22', hr==1)$dt_val, as.Date('2017-03-01'))

  # test POSIXct converstion
  expect_equal(strftime(filter(res_df, dt=='2017-02-22', hr==1)$ts_val, format='%Y-%m-%d %H:%M:%S %z',tz='UTC'),'2017-02-22 05:23:33 +0000')

  # factor to string
  expect_equal(class(res_df$factor_str_val),'character')


  # test 1 row case
  myDf1row <- myDf1[1,]
  myDf1row$dt = as.Date('2017-02-23')
  myDf1row$float_1 <- NA
  myDf1row$int_1 <- NA
  myDf1row$str_val <- NA

  cass_save_df(jCassSess, myDf1row, "test_insert_table", row_batches=2)

  res_1row_df <- cass_query(jCassSess, "select * from test_insert_table where dt = '2017-02-23'")
  expect_equal(nrow(res_1row_df),1)
  expect_equal(res_1row_df$dt, as.Date('2017-02-23'))
  expect_equal(res_1row_df$hr, as.integer(1))
  expect_equal(strftime(filter(res_1row_df, dt=='2017-02-23', hr==1)$ts_val, format='%Y-%m-%d %H:%M:%S %z',tz='UTC'),'2017-02-22 05:23:33 +0000')

  expect_true(is.na(filter(res_1row_df, dt=='2017-02-23', hr==1)$float_1))
  expect_true(is.na(filter(res_1row_df, dt=='2017-02-23', hr==1)$int_1))
  expect_true(is.na(filter(res_1row_df, dt=='2017-02-23', hr==1)$str_val))

  expect_equal(class(res_1row_df$factor_str_val),'character')
  expect_equal(res_1row_df$factor_str_val,'fac_18')
  # test 0 rows case

  # do nothing for empty df
  output_message <- capture.output(cass_save_df(jCassSess, myDf1row[0,], "test_insert_table", row_batches=2))
  expect_match(output_message[1], "cass_save_df called with empty data frame.  Doing nothing")



  cass_update(jCassSess, "drop table if exists test_insert_table")

  close_cass_session(jCassSess)
})


test_that("cass_save_df: with option sleep_between_batches will cause sleeps between batches", {
  jCassSess <- get_cass_session('localhost', 'Tios001', 'cassandrasimple_test')

  cass_update(jCassSess, "drop table if exists test_insert_table")

  cass_update(jCassSess, "create table if not exists test_insert_table (
              dt date,
              hr tinyint,
              obj_id int,
              int_1 int,
              int_2 int,
              float_1 float,
              float_2 float,
              double_1 double,
              str_val text,
              factor_str_val text,
              ts_val timestamp,
              dt_val date,
              primary key ((dt),hr,obj_id)
  )")

  set.seed(13247)

  # test simplest save
  myDf1 <- data.frame(dt=as.Date('2017-02-22'), hr=seq(1,24), obj_id=round(runif(24,min=1,max=1000),0),
                      int_1=as.integer(runif(24,min=1,max=24)),
                      int_2=as.numeric(seq(1,24)),
                      float_1=runif(24,min=-10,max=10),
                      float_2=runif(24,min=-1000,max=1000),
                      double_1=runif(24,min=-1000,max=1000),
                      str_val=paste("Hi! It's hour",seq(1,24)),
                      factor_str_val=as.factor(paste("fac_",round(runif(24,min=1,max=100),0),sep='')),
                      ts_val=as.POSIXct('2017-02-22 00:23:33',tz='EST'),
                      dt_val=as.Date('2017-03-01'),
                      stringsAsFactors = F
  )

  output_message <- capture.output(cass_save_df(jCassSess, myDf1, "test_insert_table", row_batches=2, sleep_between_batches=2, show_debug=T))
  expect_match(paste0(output_message,collapse="\n"), "Total sleep: +24 ")

  res_df <- cass_query(jCassSess, "select * from test_insert_table")
  res_df <- arrange(res_df, hr)
  expect_equal(nrow(res_df),24)

  cass_update(jCassSess, "drop table if exists test_insert_table")

  close_cass_session(jCassSess)
})

test_that("cass_save_df: defensive code cases are handled for bad inputs", {


  jCassSess <- get_cass_session('localhost', 'Tios001', 'cassandrasimple_test')

  cass_update(jCassSess, "drop table if exists test_simple_table")

  cass_update(jCassSess, "create table if not exists test_simple_table (
              dt date,
              hr tinyint,
              int_1 int,
              primary key ((dt),hr)
  )")

  set.seed(13247)
  myDf1 <- data.frame(dt=as.Date('2017-02-22'), hr=seq(1,24),
                      int_1=as.integer(runif(24,min=1,max=24)),
                      stringsAsFactors = F)

  expect_error(cass_save_df(jCassSess, myDf1, NA, 1000), 'Invalid table name')
  expect_error(cass_save_df(jCassSess, myDf1, 'bad table name', 1000), 'Illegal characters in the table name')

  expect_error(cass_save_df(jCassSess, myDf1, 'ok_table', NA), 'Invalid row batches')
  expect_error(cass_save_df(jCassSess, myDf1, 'ok_table', NULL), 'Invalid row batches')
  expect_error(cass_save_df(jCassSess, myDf1, 'ok_table', "hello"), 'Invalid row batches')


  myDfBadCol1 <- data.frame(dt=as.Date('2017-02-22'), hr=seq(1,24),
                      "another.bad.col"=as.integer(runif(24,min=1,max=24)),
                      stringsAsFactors = F)
  expect_error(cass_save_df(jCassSess, myDfBadCol1, 'test_simple_table', 1000), 'Illegal characters in the df column names')

  myDfBadCol2 <- data.frame(dt=as.Date('2017-02-22'), hr=seq(1,24),
                           "another.bad.col"=as.integer(runif(24,min=1,max=24)),
                           stringsAsFactors = F)
  expect_error(cass_save_df(jCassSess, myDfBadCol2, 'test_simple_table', 1000), 'Illegal characters in the df column names')




  close_cass_session(jCassSess)

})
thefooj/cassandrasimple documentation built on Oct. 6, 2020, 10:21 p.m.