tests/testthat/test_system.R

context("test system functions")
test_that("test clean_data_frame",{
  # create df with dupicated columns names and data frame type column
  df <- data.frame(a = 1:5, a = 2:6)
  colnames(df)<-c("a", "a")
  df$b <- data.frame(c = 3:7, d = 4:8)
  result <- clean_data_frame(df)
  # data frame column should be flattened out and the result data frame should have 4 columns.
  expect_equal(length(colnames(result)), 4)
  colnames(result)
  expect_equal(colnames(result), c("a", "a.1", "b.c", "b.d"))

  df2 <- data.frame(a = 1:5)
  colnames(df2)<-c("country \\ year")
  result2 <- clean_data_frame(df2)
  expect_equal(colnames(result2), c("country  year"))

  # Make sure clean_data_frame drops row names.
  # Use mtcars for the test because it has row names.
  df3a <- mtcars
  df3a <- clean_data_frame(df3a)
  df3b <- mtcars
  row.names(df3b) <- NULL
  # Compare the row names between data frames.
  # One data frame is processed by clean_data_frame.
  # The other one is processed manually.
  expect_equal(row.names(df3a), row.names(df3b))
})

test_that("test within_date_range", {
  library(lubridate)
  df <- data.frame(
    date = c(
      lubridate::today(), lubridate::today(),
      lubridate::today()-5, lubridate::today()-5,
      lubridate::today()-lubridate::weeks(3), lubridate::today()-lubridate::weeks(3),
      lubridate::today() %m-% months(2), lubridate::today() %m-% months(2),
      lubridate::today() %m-% months(11), lubridate::today() %m-% months(11)
    )
  )
  df_today <- df %>% dplyr::filter(exploratory::within_date_range(date, "today"))
  expect_equal(nrow(df_today), 2)
  df_7days <- df %>% dplyr::filter(exploratory::within_date_range(date, "last_7_days"))
  expect_equal(nrow(df_7days), 4)
  df_4weeks <- df %>% dplyr::filter(exploratory::within_date_range(date, "last_4_weeks"))
  expect_equal(nrow(df_4weeks), 6)
  df_3months <- df %>% dplyr::filter(exploratory::within_date_range(date, "last_3_months"))
  expect_equal(nrow(df_3months), 8)
  df_12months <- df %>% dplyr::filter(exploratory::within_date_range(date, "last_12_months"))
  expect_equal(nrow(df_12months), 10)
  df_all <- df %>% dplyr::filter(exploratory::within_date_range(date, "all"))
  expect_equal(nrow(df_all), 10)
  df_mtod <- data.frame(
    date = c(
      floor_date(lubridate::today(), unit = "month"),
      floor_date(lubridate::today(), unit = "month")
    )
  )
  df_mtod_res <- df_mtod %>% dplyr::filter(exploratory::within_date_range(date, "month_to_date"))
  expect_equal(nrow(df_mtod_res), 2)

  df_qtod <- data.frame(
    date = c(
      lubridate::today(),
      lubridate::today()
    )
  )
  df_qtod_res <- df_qtod %>% dplyr::filter(exploratory::within_date_range(date, "quarter_to_date"))
  expect_equal(nrow(df_qtod_res), 2)

  df_ytod <- data.frame(
    date = c(
      lubridate::today(),
      lubridate::today(),
      lubridate::today()
    )
  )
  df_ytod_res <- df_ytod %>% dplyr::filter(exploratory::within_date_range(date, "year_to_date"))
  expect_equal(nrow(df_ytod_res),3)
})

test_that("test clean_names", {
  df0 <- data.frame("a space name" = c(1,2,3), "b space name" = c(4,5,6))
  df1 <- df0 %>% dplyr::rename(`a space name` = "a.space.name", `b space name` = "b.space.name")
  df2 <- exploratory::clean_names(df1, case = "remove_space")
  expect_equal(colnames(df2), c("aspacename", "bspacename"))
  df3 <- exploratory::clean_names(df1)
  expect_equal(colnames(df3), c("a_space_name", "b_space_name"))
  df4 <- exploratory::clean_names(df1, case ="parsed")
  expect_equal(colnames(df4), c("a_space_name", "b_space_name"))
  df5 <- exploratory::clean_names(df1, case ="snake")
  expect_equal(colnames(df5), c("a_space_name", "b_space_name"))
  df6 <- exploratory::clean_names(df1, case ="lower_camel")
  expect_equal(colnames(df6), c("aSpaceName", "bSpaceName"))
  df7 <- exploratory::clean_names(df1, case ="upper_camel")
  expect_equal(colnames(df7), c("ASpaceName", "BSpaceName"))
  df8 <- exploratory::clean_names(df1, case ="all_caps")
  expect_equal(colnames(df8), c("A_SPACE_NAME", "B_SPACE_NAME"))
  df9 <- exploratory::clean_names(df1, case ="lower_upper")
  expect_equal(colnames(df9), c("aSPACEname", "bSPACEname"))
  df10 <- exploratory::clean_names(df1, case ="upper_lower")
  expect_equal(colnames(df10), c("AspaceNAME", "BspaceNAME"))
  df11 <- exploratory::clean_names(df1, case ="old_janitor")
  expect_equal(colnames(df11), c("a_space_name", "b_space_name"))
  df12 <- df0 %>% dplyr::rename(` a space name ` = "a.space.name", ` b space name ` = "b.space.name")
  df13 <- exploratory::clean_names(df12, case ="trim_space")
  expect_equal(colnames(df13), c("a space name", "b space name"))


})

test_that("test parse_html_tables",{
  result <- parse_html_tables('https://download2.exploratory.io/test/table.html')
  expect_equal(length(result), 1)
})

test_that("test parse_html_tables with japanese euc-jp table",{
  result <- parse_html_tables('http://download.exploratory.io/test/table_eucjp.html', 'EUC-JP')
  expect_equal(length(result), 1)
})

test_that("test parse_html_tables with japanese shift_jis table",{
  result <- parse_html_tables('http://download.exploratory.io/test/table_sjis.html', 'SHIFT_JIS')
  expect_equal(length(result), 1)
})

if (FALSE) { # Disabled for now since this test is susceptible to webpage change and unstable.
  test_that("test scrape_html_table",{
    result <- scrape_html_table('https://www.cbinsights.com/research-unicorn-companies', 1, TRUE)
    expect_equal(ncol(result), 6)
    # may change if the web page is updated
    # seems it changes quite often, excluding this check.
    #expect_equal(nrow(result), 166)
  })
}


test_that("test scrape_html_table with japanese euc-jp table",{
  result <- scrape_html_table('http://download.exploratory.io/test/table_eucjp.html', 1, TRUE, 'EUC-JP')
  expect_equal(ncol(result), 2)
  expect_equal(nrow(result), 3)
})

test_that("test scrape_html_table with japanese shift_jis table",{
  result <- scrape_html_table('http://download.exploratory.io/test/table_sjis.html', 1, TRUE, 'SHIFT_JIS')
  expect_equal(ncol(result), 2)
  expect_equal(nrow(result), 3)
})

test_that("test source check conflict case", {
  # this fails in devtools::check() because it can't find the pass because ../../R/model_builder.R can't be found
  # but this works in devtools::test(), so this needs condition to check the existance of the file.
  if(file.exists("../../R/model_builder.R")){
    filenames <- c("../../R/model_builder.R", "../../R/don't_exist.R")

    # suppress file doesn't exist warning
    suppressWarnings({ret <- checkSourceConflict(filenames)})
    expect_true(!is.null(ret[[filenames[[1]]]]$names))
    expect_true(is.null(ret[[filenames[[2]]]]$names))

    expect_true(is.null(ret[[filenames[[1]]]]$error))
    expect_true(!is.null(ret[[filenames[[2]]]]$error))
  } else {
    testthat::skip("../../R/model_builder.R doesn't exist")
  }
})

test_that("test statecode",{

  abbs <- c("NY", "CA", "IL", "DC", "DC")
  num_codes <- c("36", "06", "17", "11", "11")
  names <- c("New York","California","Illinois","District of Columbia","District of Columbia")
  dirty_names <- c("* New York","California[4]"," Illinois","* District of Columbia","* District of Columbia[3]")
  namesWithDifferentCases <- c("new york","califorNIA","ILLINOIS", "districtOf columbia","washington D.C.")
  divisions <- c("Middle Atlantic","Pacific", "East North Central", "South Atlantic", "South Atlantic")
  regions <- c("Northeast","West","North Central", "South", "South")

  expect_equal(names, statecode(abbs, "name"))
  expect_equal(divisions, statecode(abbs, "division"))
  expect_equal(regions, statecode(abbs, "region"))
  expect_equal(abbs, statecode(names, "alpha_code"))
  expect_equal(num_codes, statecode(names, "num_code"))
  # with different cases
  expect_equal(abbs, statecode(namesWithDifferentCases, "alpha_code"))
  # format test
  expect_equal(names, statecode(namesWithDifferentCases, "name"))
  # normalize test
  expect_equal(abbs, statecode(dirty_names, "alpha_code"))
})

test_that("test select_columns",{
  df <- data.frame(year=c(2014, 2015, 2016), sales=c(400, 500, 600), profit=c(200, 200, 300))
  # it selects only year and profit
  df1 <- df %>% exploratory::select_columns('year2', 'year1', 'year', 'profit', 'sales1')
  expect_equal(ncol(df1), 2)
  expect_equal(colnames(df1), c('year', 'profit'))

  df2 <- df %>% exploratory::select_columns('year')
  expect_true(is.data.frame(df2))
})

test_that("test select_columns with one column", {
  df <- structure(
    list(
      id = c("cus_AFEeV9EMHRXGeS", "cus_AA00SSatqya7uv", "cus_ARStNcs5xADH7a"),
      object = c("customer", "customer", "customer")
    ),
    .Names = c("id", "object"), class = "data.frame", row.names = c(443L, 609L, 131L)
  )

  col <- exploratory::select_columns(exploratory::clean_data_frame(df), "id") %>% colnames()
  expect_equal(col, "id")
})

test_that("test select_columns with exclude option",{
  df <- data.frame(year=c(2014, 2015, 2016), sales=c(400, 500, 600), profit=c(200, 200, 300))
  # It selects only sales. year and profit will be EXCLUDED
  df1 <- df %>% exploratory::select_columns('year2', 'year1', 'year', 'profit', 'sales1', exclude=TRUE)
  expect_equal(ncol(df1), 1)
  expect_equal(colnames(df1), c('sales'))
  expect_true(is.data.frame(df1))

})

test_that("countycode", {
  ret1 <- countycode(c("California", "CA"),c("San Francisco", "San Francisco"))
  expect_equal(ret1, c("06075", "06075"))
  ret2 <- countycode(c("MD", "MD", "MD"),c("Baltimore", "Baltimore City", "City of Baltimore"))
  expect_equal(ret2, c("24005", "24510", "24510"))
})
test_that("js_glue_transformer", {
  exploratory_env <- new.env()
  exploratory_env$.config <- new.env()

  exploratory_env$v <- c('a"',"b'","c")
  res <- exploratory:::glue_exploratory("@{ `v` }", .transformer=exploratory:::js_glue_transformer)
  expect_equal(as.character(res), '"a\\\"", "b\'", "c"') # default quote case.

  res <- exploratory:::glue_exploratory("@{`v`, quote=''}", .transformer=exploratory:::js_glue_transformer)
  expect_equal(as.character(res), "a\", b', c") # No quote case.

  exploratory_env$.config$v <- new.env()
  exploratory_env$.config$v$quote <- "" # Made the default no quote.
  res <- exploratory:::glue_exploratory("@{`v`}", .transformer=exploratory:::js_glue_transformer)
  expect_equal(as.character(res), "a\", b', c") # No quote result

  rm("v", envir=exploratory_env$.config) # clear config.

  exploratory_env$v <- c(T,F,NA)
  res <- exploratory:::glue_exploratory("@{v}", .transformer=exploratory:::js_glue_transformer)
  expect_equal(as.character(res), "true, false, null")

  # Empty vector case.
  exploratory_env$v <- as.character(c())
  res <- exploratory:::glue_exploratory("@{v}", .transformer=exploratory:::js_glue_transformer)
  expect_equal(as.character(res), "")

  exploratory_env$v <- 1
  exploratory_env$w <- 2
  exploratory_env$x <- 1000000
  res <- exploratory:::glue_exploratory("{a: {x: @{v}}, b:@{w}, c:@{x}}", .transformer=exploratory:::js_glue_transformer)
  expect_equal(as.character(res), "{a: {x: 1}, b:2, c:1000000}")

  exploratory_env$stock_symbols <- c("AAPL", "GOOG")
  res <- exploratory:::glue_exploratory("{stock:{$in:[@{stock_symbols}]}}", .transformer=exploratory:::js_glue_transformer)
  expect_equal(as.character(res), "{stock:{$in:[\"AAPL\", \"GOOG\"]}}")

  exploratory_env$stock_symbols <- c()
  res <- exploratory:::glue_exploratory("{stock:{$in:[@{stock_symbols}]}}", .transformer=exploratory:::js_glue_transformer)
  expect_equal(as.character(res), "{stock:{$in:[]}}", "message")

  exploratory_env$number_range <- c(-10, 20)
  res <- exploratory:::glue_exploratory("{salary:{$gte:@{number_range[1]}, $lt:@{number_range[2]}}}", .transformer=exploratory:::js_glue_transformer)
  expect_equal(as.character(res), "{salary:{$gte:-10, $lt:20}}")
})

test_that("sql_glue_transformer", {
  exploratory_env <- new.env()
  exploratory_env$.config <- new.env()

  exploratory_env$v <- c(1,2,3)
  res <- exploratory:::glue_exploratory("@{ v }", .transformer=exploratory:::sql_glue_transformer)
  expect_equal(as.character(res), "1, 2, 3")

  exploratory_env$v <- c('a"',"b'","c")
  res <- exploratory:::glue_exploratory("@{ `v` }", .transformer=exploratory:::sql_glue_transformer)
  expect_equal(as.character(res), "'a\"', 'b''', 'c'") # Not sure if this behavior works for all types of databases.

  exploratory_env$v <- c("a","b","c")
  res <- exploratory:::glue_exploratory("@{`v`, quote=FALSE}", .transformer=exploratory:::sql_glue_transformer)
  expect_equal(as.character(res), "a, b, c") # No quote case.

  exploratory_env$v <- c('a"',"b'","c")
  res <- exploratory:::glue_exploratory("@{ `v`, quote=\"\", escape=\"'\" }", .transformer=exploratory:::sql_glue_transformer)
  expect_equal(as.character(res), "a\", b'', c") # No quote but with escape.

  exploratory_env$dept_names <- c("Sales","HR","CEO's secretary", "Data Science\\Statistics")
  exploratory_env$empid_above <- 1100
  res <- exploratory:::glue_exploratory("select * from emp where deptname in (@{dept_names}) and empid > @{empid_above}", .transformer=exploratory:::sql_glue_transformer)
  expect_equal(as.character(res), "select * from emp where deptname in ('Sales', 'HR', 'CEO''s secretary', 'Data Science\\Statistics') and empid > 1100")

  exploratory_env$dept_names <- as.character(c())
  res <- exploratory:::glue_exploratory("select * from emp where deptname in (@{dept_names}) and empid > @{empid_above}", .transformer=exploratory:::sql_glue_transformer)
  expect_equal(as.character(res), "select * from emp where deptname in (NULL) and empid > 1100")

  exploratory_env$number_limit <- 1000000
  res <- exploratory:::glue_exploratory("select top @{number_limit} * from emp", .transformer=exploratory:::sql_glue_transformer)
  expect_equal(as.character(res), "select top 1000000 * from emp")

  exploratory_env$number_range <- c(-10, 20)
  res <- exploratory:::glue_exploratory("select * from emp where salary between @{number_range[1]} and @{number_range[2]}", .transformer=exploratory:::sql_glue_transformer)
  expect_equal(as.character(res), "select * from emp where salary between -10 and 20")
})

test_that("bigquery_glue_transformer", {
  exploratory_env <- new.env()
  exploratory_env$.config <- new.env()

  exploratory_env$v <- c(1,2,3)
  res <- exploratory:::glue_exploratory("@{ v }", .transformer=exploratory:::bigquery_glue_transformer)
  expect_equal(as.character(res), "1, 2, 3")

  exploratory_env$v <- c("a","b","c")
  res <- exploratory:::glue_exploratory("@{ `v` }", .transformer=exploratory:::bigquery_glue_transformer)
  expect_equal(as.character(res), "'a', 'b', 'c'") # Not sure if this behavior works for all types of databases.
  res <- exploratory:::glue_exploratory("@{ `v` , quote = FALSE }", .transformer=exploratory:::bigquery_glue_transformer)
  expect_equal(as.character(res), "a, b, c") # No quote case

  exploratory_env$dept_names <- c("Sales","HR","CEO's secretary", "Data Science\\Statistics")
  exploratory_env$empid_above <- 1100
  res <- exploratory:::glue_exploratory("select * from emp where deptname in (@{dept_names}) and empid > @{empid_above}", .transformer=exploratory:::bigquery_glue_transformer)
  expect_equal(as.character(res), "select * from emp where deptname in ('Sales', 'HR', 'CEO\\'s secretary', 'Data Science\\\\Statistics') and empid > 1100")

  exploratory_env$dept_names <- as.character(c())
  res <- exploratory:::glue_exploratory("select * from emp where deptname in (@{dept_names}) and empid > @{empid_above}", .transformer=exploratory:::bigquery_glue_transformer)
  expect_equal(as.character(res), "select * from emp where deptname in (NULL) and empid > 1100")

  exploratory_env$number_limit <- 1000000
  res <- exploratory:::glue_exploratory("select * from emp limit @{number_limit}", .transformer=exploratory:::bigquery_glue_transformer)
  expect_equal(as.character(res), "select * from emp limit 1000000")


})

test_that("glue_salesforce", {
  res <- exploratory:::glue_salesforce("${1+1}")
  expect_equal(as.character(res), "2")

  exploratory_env <- new.env()
  exploratory_env$.config <- new.env()
  exploratory_env$number_limit <- 1

  res <- exploratory:::glue_salesforce(exploratory:::glue_exploratory("${1+1 + @{number_limit}}", .transformer=exploratory:::salesforce_glue_transformer))
  expect_equal(as.character(res), "3")

})

test_that("prefecturecode", {

  df <- readRDS(url("https://www.dropbox.com/s/eygfwy9mo7xn9xb/prefecturecode_testdata.rds?raw=1"))

  res <- exploratory::prefecturecode(df$hiragana, output_type="name")
  expect_equal(FALSE, any(is.na(res)))

  res <- exploratory::prefecturecode(df$kanji.with.todofuken, output_type="name")
  expect_equal(FALSE, any(is.na(res)))
  # Test case for Hokkaido (Kanji), Tokyo (Kanji), and Osaka (Hiragana)
  df <- data.frame(a=c("\u5317\u6D77\u9053", "\u6771\u4eac", "", NA, "\u304a\u304a\u3055\u304b"))
  result <- exploratory::prefecturecode(df$a, output_type = "code")
  expect_equal(result,c("01", "13", NA, NA, "27"))


  res <- exploratory::prefecturecode(df$kanji, output_type="name")
  expect_equal(FALSE, any(is.na(res)))

  res <- exploratory::prefecturecode(df$romaji.wikipedia, output_type="name")
  expect_equal(FALSE, any(is.na(res)))

  res <- exploratory::prefecturecode(df$romaji.normalized, output_type="name")
  expect_equal(FALSE, any(is.na(res)))

  res <- exploratory::prefecturecode(df$romaji.test, output_type="name")
  expect_equal(FALSE, any(is.na(res)))

})

test_that("geocode_japan_prefecture", {
  df <- readRDS(url("https://www.dropbox.com/s/eygfwy9mo7xn9xb/prefecturecode_testdata.rds?raw=1"))

  res <- exploratory::geocode_japan_prefecture(df, "kanji")
  expect_equal(FALSE, any(is.na(res$longitude)))
  expect_equal(FALSE, any(is.na(res$latitude)))
})

test_that("city_code_japan", {
  # Data: tibble(
  #  x=c("Hokkaido", "Tokyo-to", "Kanagawa-ken", "Kanagawa-ken"),
  #  y=c("Sapporo-shi Shiraishi-ku", "Inagi-shi", "Ashigarashimo-gun Hakone-machi", "Hakone-machi"))
  # (In all Japanese Kanji chars).
  df <- tibble(
    x=c("\u5317\u6d77\u9053", "\u6771\u4eac\u90fd", "\u795e\u5948\u5ddd\u770c", "\u795e\u5948\u5ddd\u770c"),
    y=c("\u672d\u5e4c\u5e02\u767d\u77f3\u533a", "\u7a32\u57ce\u5e02", "\u8db3\u67c4\u4e0b\u90e1\u7bb1\u6839\u753a", "\u7bb1\u6839\u753a"))
  res <- exploratory::city_code_japan(df$x, df$y)
  expect_equal(FALSE, any(is.na(res)))
  expect_equal("01104", res[1])
  expect_equal("13225", res[2])
  # It should resolve the city code from the city name with "gun".
  expect_equal("14382", res[3])
  # It should resolve the city code from the city name without "gun".
  expect_equal("14382", res[4])

})

test_that("geocode_japan_city", {
  # Data: tibble(x=c("Hokkaido", "Tokyo-to"), y=c("Sapporo-shi Shiraishi-ku", "Inagi-shi"))  (In all Japanese Kanji chars).
  df <- tibble(x=c("\u5317\u6d77\u9053", "\u6771\u4eac\u90fd"), y=c("\u672d\u5e4c\u5e02\u767d\u77f3\u533a", "\u7a32\u57ce\u5e02"))
  df$code <- exploratory::city_code_japan(df$x, df$y)
  res <- exploratory::geocode_japan_city(df, "code")
  expect_equal(FALSE, any(is.na(res$longitude)))
  expect_equal(FALSE, any(is.na(res$latitude)))
})

test_that("center.pacific.ocean argument in geocode_world_country.", {
  df <- tibble(country.code=c("US"))
  res <- exploratory::geocode_world_country(df, "country.code", center.pacific.ocean=TRUE)
  expect_equal(264.287109, res$longitude[[1]])
  res <- exploratory::geocode_world_country(df, "country.code", center.pacific.ocean=FALSE)
  expect_equal(-95.712891, res$longitude[[1]])
})

test_that("read_parquet_file", {
  df <- read_parquet_file("https://dl.dropbox.com/s/sjkgk9gj0vemq36/sample.parquet")
  expect_equal(TRUE, is.data.frame(df))
})

test_that("read_parquet_file downlod failed error message", {
  tryCatch({
    df <- exploratory::read_parquet_file("https://dummy.dropbox.com/s/sjkgk9gj0vemq36/sample2.parquet")
  }, error = function(cond) {
    expect_equal(str_detect(cond$message, "EXP-DATASRC-15 ::"), TRUE)
    expect_equal(str_detect(cond$message, "Could not resolve host|Couldn't resolve host"), TRUE)
  })
})

test_that("read_parquet_file open local file failed error message", {
  tryCatch({
    df <- exploratory::read_parquet_file("test_dummy.parquet")
  }, error = function(cond) {
    if (Sys.info()["sysname"]=="Windows") { # Windows show different message than Linux and Mac.
      expect_equal(stringr::str_detect(cond$message, "EXP-DATASRC-13 :: \\[\"test_dummy.parquet\",\"IOError: Failed to open local file"), TRUE)
    } else {
      expect_equal(cond$message, c("EXP-DATASRC-14 :: [\"test_dummy.parquet\"] :: The file does not exist."))
    }
  })
})

test_that("read_rds_file failed to import remove file error message", {
  tryCatch({
    df <- exploratory::read_rds_file("https://dummy.dropbox.com/s/sjkgk9gj0vemq36/sample.rds")
  }, error = function(cond) {
    expect_equal(stringr::str_detect(cond$message, "EXP-DATASRC-13 :: \\[\"https://dummy.dropbox.com/s/sjkgk9gj0vemq36/sample.rds\""),TRUE)
  })
})

test_that("read_rds_file open local file failed error message", {
  tryCatch({
    df <- exploratory::read_rds_file("test_dummy.rds")
  }, error = function(cond) {
    expect_equal(cond$message, c("EXP-DATASRC-14 :: [\"test_dummy.rds\"] :: The file does not exist."))
  })
})

test_that("read_delim_file downlod failed error message", {
  tryCatch({
    df <- exploratory::read_delim_file("https://dummy.dropbox.com/s/sjkgk9gj0vemq36/sample.csv", delim = ",")
  }, error = function(cond) {
    expect_equal(str_detect(cond$message, "EXP-DATASRC-15 ::"), TRUE)
    expect_equal(str_detect(cond$message, "Could not resolve host|Couldn't resolve host"), TRUE)
  })
})

test_that("read_delim_file with guess delimiter error handling and retry with default comma delimiter", {
  df <- exploratory::read_delim_file("https://www.dropbox.com/scl/fi/o3vuqea89mp6t24e61cmg/one-col.csv?rlkey=6ko09im7w4bn5x0yrj9kjduzv&dl=1", delim = NULL)
  expect_equal(colnames(df), c("a"))
})

test_that("read_delim_file witout requirement delim argument error message", {
  tryCatch({
    df <- exploratory::read_delim_file("https://www.dropbox.com/s/tb6ppzockjao7vg/too_longer_header.csv?dl=1")
  }, error = function(cond) {
    expect_equal(cond$message, c("EXP-DATASRC-13 :: [\"https://www.dropbox.com/s/tb6ppzockjao7vg/too_longer_header.csv?dl=1\",\"argument \\\"delim\\\" is missing, with no default\"] :: Failed to import file."))
  })
})

test_that("read_delim_file with too long column name error message", {
  tryCatch({
    df <- exploratory::read_delim_file("https://www.dropbox.com/s/tb6ppzockjao7vg/too_longer_header.csv?dl=1", delim=",")
    as.name(colnames(df))
  }, error = function(cond) {
    expect_equal(cond$message, c("variable names are limited to 10000 bytes"))
  })
})


test_that("read_delim_file with incorrect encoding error message (invalid multibyte string)", {
  tryCatch({
    df <- exploratory::read_delim_file("https://www.dropbox.com/s/zqr228arxwnxsvp/b2010_ksmj.csv?dl=1", delim = ",",
                                       quote = "\"" , col_names = TRUE , na = c(''),n_max=50 ,
                                       locale=readr::locale(encoding = "UTF-8", decimal_mark = ".", tz = "Asia/Tokyo", grouping_mark = "," ),
                                       trim_ws = TRUE , progress = FALSE)
  }, error = function(cond) {
    if (Sys.info()["sysname"]=="Linux") { # for Linux case, exploratory::read_delim_file returns error message without ID to make it more readable.
      expect_equal(cond$message, c("The encoding of the file may not be UTF-8. Select other encoding and try again."))
    } else {
      expect_equal(cond$message, c("EXP-DATASRC-13 :: [\"https://www.dropbox.com/s/zqr228arxwnxsvp/b2010_ksmj.csv?dl=1\",\"invalid multibyte string, element 1\"] :: Failed to import file."))
    }
  })
})


test_that("read_delim_file open local file failed error message", {
  tryCatch({
    df <- exploratory::read_delim_file("test_dummy.csv", delim=",")
  }, error = function(cond) {
    expect_equal(cond$message, c("EXP-DATASRC-14 :: [\"test_dummy.csv\"] :: The file does not exist."))
  })
})

test_that("read_delim_file with text data", {
  df <- exploratory::read_delim_file(data_text = "a,b\n1,2", delim=",")
  expect_equal(nrow(df),1)
  expect_equal(ncol(df),2)
})

test_that("case_when mixed data types error message", {
  skip("dplyr 1.0.10 has an issue in reporting error in this case, but it is fixed in the latest main branch.")
  tryCatch({
    Global_Sales_1_source1 <- exploratory::read_excel_file("https://www.dropbox.com/s/t9ou9hmbqdxj75f/Global_Sales.xlsx?dl=1")
    Global_Sales_2 <- Global_Sales_1_source1 %>% dplyr::mutate(calculation_1 = case_when(Segment == "Consumer" ~ 1 , TRUE ~ Segment))
  }, error = function(e) {
    if (!is.null(e$parent)) {
      # Because of https://github.com/tidyverse/dplyr/issues/6261, now we need to check the below error message.
      # Once the issue is fixed, we will update the test with the original condition.
      expect_equal(stringr::str_detect(e$parent$message, "must be a double vector|must be the same length as the vector"),TRUE)
    } else {
      # Because of https://github.com/tidyverse/dplyr/issues/6261, now we need to check the below error message.
      # Once the issue is fixed, we will update the test with the original condition.
      expect_equal(stringr::str_detect(e$message, "must be a double vector|must be the same length as the vector"),TRUE)
    }
  })
})

test_that("read_excel_file downlod failed error message", {
  tryCatch({
    df <- exploratory::read_excel_file("https://dl.dropbox.com/s/sjkgk9gj0vemq36/sample.xlsx")
  }, error = function(cond) {
    expect_equal(stringr::str_detect(cond$message, "EXP-DATASRC-13 :: \\[\"https://dl.dropbox.com/s/sjkgk9gj0vemq36/sample.xlsx\",\"zip file (.*) cannot be opened"), TRUE)
  })
})

test_that("read_excel_file open local file failed error message", {
  tryCatch({
    df <- exploratory::read_excel_file("test_dummy.xlsx")
  }, error = function(cond) {
    expect_equal(cond$message, c("EXP-DATASRC-14 :: [\"test_dummy.xlsx\"] :: The file does not exist."))
  })
})

test_that("read_parquet_file should be to read the parquet file with an invalid UTF-8 encoding.", {
  # Make sure that the current arrow version (5.0) can read this parquet file.
  # arrow 3.0/4.0, cannot read this parquet file and throw an error
  # "Invalid UTF-8 payload" but it is fixed in 5.0.
  df <- read_parquet_file("https://dl.dropbox.com/s/9yp6yk1jjnd8dz0/invalid_utf8_payload_test.parquet")
  expect_equal(TRUE, is.data.frame(df))
})

test_that("test filter_cascade",{
  library(stringr)
  df <- readRDS(url("https://www.dropbox.com/s/p2vmd79ly1zugh9/airbnb_nyc_filter_7.rds?dl=1"))
  df <- df %>% filter_cascade(detect_outlier(reviews_per_month, "iqr") == "Normal", cut(reviews_per_month, breaks = 5, dig.lab = 10) %in% c("(0.21,0.31]"))
  expect_equal(nrow(df), 2661)
})

test_that("read_excel_files", {
  # Download the files to a local directory
  t_file <- tempfile(fileext = ".xls")
  t_file2 <- tempfile(fileext = ".xls")
  download.file("https://www.dropbox.com/scl/fi/7m4ay1x18jm1kq7kv58if/merge1.xls?rlkey=4wyi45j2cer002deefi7ndaav&dl=1", destfile = t_file, mode = "wb")
  download.file("https://www.dropbox.com/scl/fi/by7ulgjstptwo7jnftnrm/merge2.xls?rlkey=fql98njdx0vbgrw13h4n23iaz&dl=1", destfile = t_file2, mode = "wb")

  # Read the downloaded files
  df <- exploratory::read_excel_files(c(t_file, t_file2))
  expect_equal(colnames(df), c("id", "放送...1", "放送...2", "個人...3", "個人...4"))
})

test_that("test setConnectionPoolMode", {
  # Save the original pool_connection value to restore later
  original_pool_connection <- exploratory::getConnectionPoolMode()

  # Test setting connection pool mode to TRUE
  exploratory::setConnectionPoolMode(TRUE)
  expect_true(exploratory::getConnectionPoolMode())

  # Test setting connection pool mode to FALSE
  exploratory::setConnectionPoolMode(FALSE)
  expect_false(exploratory::getConnectionPoolMode())

  # Restore the original value
  exploratory::setConnectionPoolMode(original_pool_connection)
})

test_that("unnest_safe handles group columns with and without '|' and other edge cases", {
  # 1. No grouping columns, simple unnest
  df1 <- tibble(id = 1:2, val = list(1:2, 3:4))
  res1 <- df1 %>% unnest_safe("val")
  expect_equal(res1$val, c(1,2,3,4))

  # 2. Grouping column with '|'
  df2 <- tibble(`group !\"#$%&'()*+, -./:;<=>?@[]^_'{|}~` = c("A", "A", "B"), val = list(1:2, 3:4, 5:6)) %>% group_by(`group !\"#$%&'()*+, -./:;<=>?@[]^_'{|}~`)
  res2 <- df2 %>% unnest_safe("val")
  expect_equal(res2$val, c(1,2,3,4,5,6))
  expect_true("group !\"#$%&'()*+, -./:;<=>?@[]^_'{|}~" %in% colnames(res2))
  expect_equal(unique(res2$`group !\"#$%&'()*+, -./:;<=>?@[]^_'{|}~`), c("A", "B"))

  # 3. Grouping column without '|'
  df3 <- tibble(g = c("A", "A", "B"), val = list(1:2, 3:4, 5:6)) %>% group_by(g)
  res3 <- df3 %>% unnest_safe("val")
  expect_equal(res3$val, c(1,2,3,4,5,6))
  expect_true("g" %in% colnames(res3))
  expect_equal(unique(res3$g), c("A", "B"))

  # 4. Multiple grouping columns, some with '|'
  df4 <- tibble(`group !\"#$%&'()*+, -./:;<=>?@[]^_'{|}~` = c("A", "A", "B"), g2 = c(1,1,2), val = list(1:2, 3:4, 5:6)) %>% group_by(`group !\"#$%&'()*+, -./:;<=>?@[]^_'{|}~`, g2)
  res4 <- df4 %>% unnest_safe("val")
  expect_equal(res4$val, c(1,2,3,4,5,6))
  expect_true(all(c("group !\"#$%&'()*+, -./:;<=>?@[]^_'{|}~", "g2") %in% colnames(res4)))
  expect_equal(unique(res4$`group !\"#$%&'()*+, -./:;<=>?@[]^_'{|}~`), c("A", "B"))
  expect_equal(unique(res4$g2), c(1, 2))

  # 5. Unnesting a column with empty lists
  df5 <- tibble(id = 1:3, val = list(1:2, integer(0), 3:4))
  res5 <- df5 %>% unnest_safe("val")
  expect_equal(res5$val, c(1,2,3,4))
})
exploratory-io/exploratory_func documentation built on June 12, 2025, 2:03 p.m.