tests/testthat/test-dataset-json.R

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

skip_if_not_available("dataset")

library(dplyr, warn.conflicts = FALSE)

test_that("JSON dataset", {

  # set up JSON directory for testing
  json_dir <- make_temp_dir()

  on.exit(unlink(json_dir, recursive = TRUE))
  dir.create(file.path(json_dir, 5))
  dir.create(file.path(json_dir, 6))

  con_file1 <- file(file.path(json_dir, 5, "file1.json"), open = "wb")
  jsonlite::stream_out(df1, con = con_file1, verbose = FALSE)
  close(con_file1)

  con_file2 <- file(file.path(json_dir, 6, "file2.json"), open = "wb")
  jsonlite::stream_out(df2, con = con_file2, verbose = FALSE)
  close(con_file2)

  ds <- open_dataset(json_dir, format = "json", partitioning = "part")

  expect_r6_class(ds$format, "JsonFileFormat")
  expect_r6_class(ds$filesystem, "LocalFileSystem")
  expect_identical(names(ds), c(names(df1), "part"))
  expect_identical(dim(ds), c(20L, 7L))

  expect_equal(
    ds %>%
      select(string = chr, integer = int, part) %>%
      filter(integer > 6 & part == 5) %>%
      collect() %>%
      summarize(mean = mean(as.numeric(integer))), # as.numeric bc they're being parsed as int64
    df1 %>%
      select(string = chr, integer = int) %>%
      filter(integer > 6) %>%
      summarize(mean = mean(integer))
  )
  # Collecting virtual partition column works
  expect_equal(
    collect(ds) %>% arrange(part) %>% pull(part),
    c(rep(5, 10), rep(6, 10))
  )
})

test_that("JSON Fragment scan options", {
  options <- FragmentScanOptions$create("json")
  expect_equal(options$type, "json")

  expect_error(FragmentScanOptions$create("json", invalid_selection = TRUE), regexp = "invalid_selection")

})

Try the arrow package in your browser

Any scripts or data that you put into this service are public.

arrow documentation built on Nov. 25, 2023, 1:09 a.m.