tests/testthat/helper.R

# helper function to skip test if noctua unit test environment variables not set
skip_if_no_env <- function(){
  have_arn <- Sys.getenv("noctua_arn") != "" 
  have_query <- is.s3_uri(Sys.getenv("noctua_s3_query"))
  have_tbl <- is.s3_uri(Sys.getenv("noctua_s3_tbl"))
  if (!all(have_arn, have_query, have_tbl))
    skip("Environment variables are not set for testing")
}

# helper function to skip tests if we don't have the suggested package
skip_if_package_not_avialable <- function(pkg) {
  if (!nzchar(system.file(package = pkg)))
    skip(sprintf("`%s` not available for testing", pkg))
}

# expected athena ddl's
# expected athena ddl's
tbl_ddl <- 
  list(tbl1 = 
         DBI::SQL(paste0("CREATE EXTERNAL TABLE `AwsDataCatalog`.`default`.`test_df` (
  `x` INT,
  `y` STRING
)
ROW FORMAT DELIMITED
	FIELDS TERMINATED BY ','
	LINES TERMINATED BY ", gsub("_","","'\\_n'"),
                  "\nLOCATION '",Sys.getenv("noctua_s3_tbl"),"test_df/default/test_df/'
TBLPROPERTIES (\"skip.header.line.count\"=\"1\");")),
tbl2 = 
  DBI::SQL(paste0("CREATE EXTERNAL TABLE `AwsDataCatalog`.`default`.`test_df` (
  `x` INT,
  `y` STRING
)
ROW FORMAT DELIMITED
	FIELDS TERMINATED BY ','
	LINES TERMINATED BY ", gsub("_","","'\\_n'"),
           "\nLOCATION '",Sys.getenv("noctua_s3_tbl"),"test_df/default/test_df/'
TBLPROPERTIES (\"skip.header.line.count\"=\"1\",
\t\t'compressionType'='gzip');")),
tbl3 = 
  DBI::SQL(paste0("CREATE EXTERNAL TABLE `AwsDataCatalog`.`default`.`test_df` (
  `x` INT,
  `y` STRING
)
ROW FORMAT DELIMITED
\tFIELDS TERMINATED BY '	'
\tLINES TERMINATED BY ", gsub("_","","'\\_n'"),"
LOCATION '",Sys.getenv("noctua_s3_tbl"),"test_df/default/test_df/'
TBLPROPERTIES (\"skip.header.line.count\"=\"1\");")),
tbl4 = 
  DBI::SQL(paste0("CREATE EXTERNAL TABLE `AwsDataCatalog`.`default`.`test_df` (
  `x` INT,
  `y` STRING
)
ROW FORMAT DELIMITED
\tFIELDS TERMINATED BY '	'
\tLINES TERMINATED BY ", gsub("_","","'\\_n'"),"
LOCATION '",Sys.getenv("noctua_s3_tbl"),"test_df/default/test_df/'
TBLPROPERTIES (\"skip.header.line.count\"=\"1\",
\t\t'compressionType'='gzip');")), 
tbl5 = 
  DBI::SQL(paste0("CREATE EXTERNAL TABLE `AwsDataCatalog`.`default`.`test_df` (
  `x` INT,
  `y` STRING
)
STORED AS PARQUET
LOCATION '",Sys.getenv("noctua_s3_tbl"),"test_df/default/test_df/'\n;")),
tbl6 = 
  DBI::SQL(paste0("CREATE EXTERNAL TABLE `AwsDataCatalog`.`default`.`test_df` (
  `x` INT,
  `y` STRING
)
PARTITIONED BY (`timestamp` STRING)
STORED AS PARQUET
LOCATION '",Sys.getenv("noctua_s3_tbl"),"test_df/default/test_df/'
tblproperties (\"parquet.compress\"=\"SNAPPY\");")),
tbl7 = 
  DBI::SQL(paste0("CREATE EXTERNAL TABLE `AwsDataCatalog`.`default`.`test_df` (
  `x` INT,
  `y` STRING
)
ROW FORMAT  serde 'org.apache.hive.hcatalog.data.JsonSerDe'
LOCATION '",Sys.getenv("noctua_s3_tbl"),"test_df/default/test_df/'\n")),
tbl8 = 
  DBI::SQL(paste0("CREATE EXTERNAL TABLE `AwsDataCatalog`.`default`.`test_df` (
  `x` INT,
  `y` STRING
)
PARTITIONED BY (`timestamp` STRING)
ROW FORMAT  serde 'org.apache.hive.hcatalog.data.JsonSerDe'
LOCATION '",Sys.getenv("noctua_s3_tbl"),"test_df/default/test_df/'\n")))


# static Athena Query Request Tests
athena_test_req1 <-
         list(OutputLocation = Sys.getenv("noctua_s3_query"),
              EncryptionConfiguration = list(EncryptionOption = "SSE_S3",
                                             KmsKey = "test_key"))
athena_test_req2 <-
       list(OutputLocation = Sys.getenv("noctua_s3_query"),
            EncryptionConfiguration = list(EncryptionOption = "SSE_S3"))
athena_test_req3 <- list(OutputLocation = Sys.getenv("noctua_s3_query"))
athena_test_req4 <- list(OutputLocation = Sys.getenv("noctua_s3_query"))

show_ddl <- DBI::SQL(paste0('CREATE EXTERNAL TABLE `default.test_df`(\n  `w` timestamp, \n  `x` int, \n  `y` string, \n  `z` boolean)\nPARTITIONED BY ( \n  `timestamp` string)\nROW FORMAT DELIMITED \n  FIELDS TERMINATED BY \'\\t\' \n  LINES TERMINATED BY \'\\n\' \nSTORED AS INPUTFORMAT \n  \'org.apache.hadoop.mapred.TextInputFormat\' \nOUTPUTFORMAT \n  \'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat\'\nLOCATION\n  \'' ,Sys.getenv("noctua_s3_tbl"), 'test_df/default/test_df\'\nTBLPROPERTIES (\n  \'skip.header.line.count\'=\'1\')'))

expected_stat_output = c(
  "EngineExecutionTimeInMillis",
  "DataScannedInBytes",
  "DataManifestLocation",
  "TotalExecutionTimeInMillis",
  "QueryQueueTimeInMillis",
  "QueryPlanningTimeInMillis",
  "ServiceProcessingTimeInMillis"
)
DyfanJones/noctua documentation built on April 15, 2024, 3:22 p.m.