tests/testthat/test-tbl_json.R

# tbl_json: as_tbl_json.character -----------------------

test_that("correctly parses length(json) == 1", {
  expect_identical(
    as_tbl_json('{"name": "bob", "age": 32}'),
    tbl_json(
      data.frame(document.id = 1L),
      list(list(name = "bob", age = 32L))
    )
  )
})

test_that("correctly parses length(json) > 1", {
  expect_identical(
    as_tbl_json(
      c('{"name": "bob", "age": 32}',
        '{"name": "susan", "age": 25}')
    ),
    tbl_json(
      data.frame(document.id = 1L:2L),
      list(
        list(name = "bob", age = 32L),
        list(name = "susan", age = 25L)
      )
    )
  )
})

test_that("correctly parses character(0)", {
  expect_identical(
    as_tbl_json(character(0)),
    tbl_json(
      data.frame(document.id = integer(0)),
      list()
    )
  )
})

test_that("correctly parses empty objects", {

  nl <- list()
  names(nl) <- character(0)

  expect_identical(
    as_tbl_json(c('[]', '{}')),
    tbl_json(
      data.frame(document.id = 1L:2L),
      list(list(), nl)
    )
  )

})

test_that("correctly structures an array", {
  expect_identical(
    as_tbl_json('[{"name": "bob"}, {"name": "susan"}]'),
    tbl_json(
      data.frame(document.id = 1L),
      list(list(list(name = "bob"), list(name = "susan")))
    )
  )
})

test_that("[ works with various indexing", {
  obj <- as_tbl_json(c('{"name": "value"}', '{"name": "other"}')) %>%
    json_types()
  # column indexing
  expect_identical(obj[0], obj[,0])
  expect_identical(obj[1], obj[,1])
  expect_identical(obj[-1], obj[,-1])
  expect_identical(obj[0:2], obj)
  
  # no indexing
  expect_identical(obj, obj[])
  
  # row indexing
  expect_identical(obj[1,], obj[1,1:2])
  expect_identical(obj[-1,], obj[2,])
  expect_identical(obj[1:2,], obj)
})

test_that("throws error on invalid json", {

    expect_error(as_tbl_json(''))

  }
)

# tbl_json: as.character.tbl_json -----------------------------------------

inverts_json_test <- function(json) {
  expect_identical(json, json %>% as_tbl_json %>% as.character)
}

test_that("works for simple cases", {

  inverts_json_test('"a"')
  inverts_json_test('1')
  inverts_json_test('true')
  inverts_json_test('false')
  inverts_json_test('null')
  inverts_json_test('{}')
  inverts_json_test('[]')

})

test_that("works for more complex cases", {

  inverts_json_test('{"name":"a"}')
  inverts_json_test('{"name":1}')
  inverts_json_test('{"name":[1]}')
  inverts_json_test('{"name":[null]}')
  inverts_json_test('{"name":null}')
  inverts_json_test('[[1,2],1]')

})

test_that("works for worldbank data", {

  inverts_json_test(worldbank[1:5])

})

test_that("throws informative warning message when attr(.,'JSON') is missing", {
  j <- '{"a": 1, "b": "test"}' %>% as_tbl_json()
  j[["..JSON"]] <- NULL
  
  expect_warning(j %>% as.character(),'\\.\\.JSON.*remove.*tbl_json')
  expect_identical(suppressWarnings(j %>% as.character()),character())
})


# as_tbl_json.tbl_json ----------------------------------------------------

test_that('functions as the identity on a simple pipeline', {
   x <- commits %>% gather_array() %>% enter_object('commit') %>% spread_all()
   
   expect_identical(
     x, as_tbl_json(x)
   )
   
   y <- commits %>% gather_array() %>% gather_object()
   
   expect_identical(
     y, as_tbl_json(y)
   )
})

test_that('functions as the identity on a more advanced pipeline', {
  x <- commits %>% gather_array() %>% spread_values(
    sha=jstring('sha')
    , name=jstring('commit','author','name')
    , msg=jstring('commit','message')
    , comment_count=jnumber('commit','comment_count')
    , committer.name=jstring('commit','committer','name')
    , committer.date=jstring('commit','committer','date')
    , tree.sha=jstring('committ','tree','sha')
    , tree.url=jstring('committ','tree','url')
    , url=jstring('url')
  )
  
  expect_identical(
    x, as_tbl_json(x)
  )
})

# as_tbl_json.list --------------------------------------------------------

test_that("list is interpreted as already parsed", {
  j <- jsonlite::fromJSON('{"a": "b"}')

  expect_s3_class(as_tbl_json(j), "tbl_json")
  
  l <- list(a = "b", c = list("a","b","c"))
  
  expect_s3_class(as_tbl_json(l), "tbl_json")
  expect_identical(as_tbl_json(l)$document.id, c(1L,2L))
  
  expect_s3_class(as_tbl_json(list()), "tbl_json")
})

test_that("jsonlite::toJSON works as anticipated", {
  expect_identical(jsonlite::toJSON(json_get(as.tbl_json('"a"'))
                                    , null='null'
                                    , auto_unbox = TRUE) %>% as.character
                   , "[\"a\"]")
})

# print.tbl_json ----------------------------------------------------------

test_that("purrr::map_chr works as expected", {
  a <- json_get(as.tbl_json('"a"','JSON')) %>% purrr::map_chr(jsonlite::toJSON,
                          null = "null",
                          auto_unbox = TRUE)
  
  expect_identical(a,'\"a\"')
})

test_that("print.tbl_json works for a simple case", {
  expect_snapshot(as.tbl_json('"a"'))
})

test_that("print.tbl_json json.width works correctly", {
  expect_snapshot(print(as.tbl_json('"12345"'), json.width = 4))
})

test_that("print.tbl_json json.n works correctly", {
  expect_snapshot(print(as.tbl_json(c('"a"', '"b"')), json.n = 1))
})

test_that('does not throw an error', {
  printregex <- 'tbl_json.*JSON.*attribute.*document\\.id'
  json <- '{"a":1, "b": "test", "c": [1,2,3]}'
  
  expect_output(json %>% as.tbl_json() %>% print, printregex)
  
  j <- json %>% spread_all() %>% enter_object('c') %>% 
    gather_array('c_id') %>% append_values_number()
  
  expect_output(j %>% print, printregex)
  
  attr(j,'JSON') <- NULL
  
  expect_output(suppressWarnings(j %>% print), printregex)
})

# tbl_json: as.tbl_json.data.frame ----------------------------------------

test_that("works for a data.frame and tibble created objects", {

    df <- data.frame(
      document.id = 1:2,
      json = c('{"name": "bob"}', '{"name": "susan"}'),
      stringsAsFactors = FALSE)
    # data.frame
    expect_identical(
      as.tbl_json(df, json.column = "json"),
      as.tbl_json(df$json)
    )
    # tibble
    df <- dplyr::tibble(
      document.id = 1:2,
      json = c('{"name": "bob"}', '{"name": "susan"}'))
    expect_identical(
      as.tbl_json(df, json.column = "json"),
      as.tbl_json(df$json)
    )

  }
)

test_that("works in a pipeline", {

    df <- dplyr::tibble(
      age = c(32, 45),
      json = c('{"name": "bob"}', '{"name": "susan"}')
    )

    expect_identical(
      df %>% as.tbl_json(json.column = "json") %>%
        spread_values(name = jstring("name")) %>%
        dplyr::filter(age == 32) %>%
        `[[`("name"),
      "bob"
    )

  }
)

test_that("handles nested lists as a JSON column", {
  tj <- as_tbl_json('{"name": "value"}')
  tj_tib <- tibble::as_tibble(json_get_column(tj, "json"))
  
  expect_identical(
    as_tbl_json(tj_tib, json.column = "json"),
    tj
    )
})

test_that("throws an error without json.column specified", {
    expect_error(as.tbl_json(iris))
  }
)

test_that("throws an error if json column doesn't exist", {
    expect_error(as.tbl_json(iris, json.column = "json"))
  }
)

# tbl_json ----------------------------------------------------------------

test_that("tbl_json constructor works with no data", {
    expect_identical(tbl_json(data.frame(), list()) %>% nrow, 0L)
  }
)

test_that("[ row filtering works with a simple example", {

    expect_identical(
      as.tbl_json(c('{"name": "bob"}', '{"name": "susan"}'))[1, ],
      tbl_json(
        data.frame(document.id = 1L),
        list(list(name = "bob"))
      )
    )

  }
)

test_that("[ column filtering doesn't change the JSON", {

    x <- c(
      '{"name": "bob", "children": [{"name": "george"}]}',
      '{"name": "susan", "children": [{"name": "sally"}, {"name": "bobby"}]}'
        ) %>% as.tbl_json %>%
      spread_values("parent" = jstring("name")) %>%
      enter_object("children") %>%
      gather_array %>%
      spread_values("child" = jstring("name"))

    expect_identical(
      attr(x, "JSON"),
      attr(x[, c("parent", "child")], "JSON")
    )

  }
)


test_that('handles "drop" like a tbl_df', {
  mydata <- as.tbl_json('[{"name": "Frodo", "occupation": "Ring Bearer"}
                        ,{"name": "Aragorn", "occupation": "Ranger"}]') %>%
    gather_array() %>%
    spread_values(name=jstring('name'), occupation=jstring('occupation'))
   
  expect_s3_class(mydata[,],'tbl_json')
  expect_s3_class(mydata[,'name'],'tbl_json')
  #TODO: Figure out how we want to proceed
  #expect_s3_class(suppressWarnings(mydata[,'occupation',drop=TRUE]),'tbl_json')
  #expect_warning(is.tbl_json(mydata[,'name',drop=TRUE]),'drop ignored')
})

test_that("[ keeps column order consistent", {
  tj <- as_tbl_json('[{"a": "b"}, {"a": "c"}]') %>%
    gather_array() %>%
    gather_object()
  
  expect_identical(
    names(tj[1,"document.id"]),
    c("document.id", "..JSON")
  )
  
  expect_identical(
    names(tj[2,c("..JSON", "document.id")]),
    c("..JSON", "document.id")
  )
  
})

test_that("$ leaves tbl_json idempotent", {
  tj <- as_tbl_json('{"a": "b"}')
  
  tj$alt <- 1
  
  expect_identical(
    tj,
    mutate(tj, alt = alt)
  )
})

# as_tibble ---------------------------------------------------------------

test_that('as_tibble drops the JSON attribute and tbl_json class', {
  
  jtidy <- issues %>% gather_array() %>% spread_all()
  
  expect_identical(json_get(dplyr::as_tibble(jtidy)),NULL)
  expect_false('tbl_json' %in% class(dplyr::as_tibble(jtidy)))
})

test_that('as_data_frame functions like as_tibble', {
  
  jtidy <- issues %>% gather_array() %>% spread_values(
    url=jstring('url')
    , body=jstring('body')
    , user.id=jnumber('user.id')
    , user.login=jstring('user.login')
  )
  
  expect_identical(json_get(dplyr::as_tibble(jtidy)),NULL)
  expect_false('tbl_json' %in% class(dplyr::as_tibble(jtidy)))
})

# tbl_json: dplyr NSE verbs -----------------------------------------------

test_that("dplyr::group_by works", {
  tj <- as_tbl_json('{"a": "b"}')
  
  g1 <- tj %>% group_by(document.id) %>% mutate(a = n())
  
  #group_by drops the class today
  expect_false(inherits(g1, "tbl_json"))
  
  expect_error(
    {tj %>% group_by(..JSON) %>% mutate(b = n())},
    class = "vctrs_error_subscript_oob"
  )
})

test_that("dplyr::filter works with a simple example", {

    x <- as.tbl_json(c('{"name": "bob"}', '{"name": "susan"}'))

    expect_identical(
      dplyr::filter(x, document.id == 1),
      tbl_json(
        data.frame(document.id = 1L),
        list(list(name = "bob"))
      )
    )

  }
)

test_that("dplyr::filter works in a more complex pipeline", {

    json <- c(
      '{"name": "bob", "children": [{"name": "george"}]}',
      '{"name": "susan", "children": [{"name": "sally"}, {"name": "bobby"}]}'
        )
    susan.children <- json %>% as.tbl_json %>%
      spread_values(name = jstring("name")) %>%
      dplyr::filter(name == "susan") %>%
      enter_object("children") %>%
      gather_array %>%
      spread_values(child = jstring("name"))

    expect_identical(susan.children$child, c("sally", "bobby"))

  }
)


test_that("dplyr::arrange works with a simple example", {

    x <- as.tbl_json(c('{"name": "bob"}', '{"name": "susan"}'))

    expect_identical(
      x %>% dplyr::arrange(desc(document.id)),
      tbl_json(
        data.frame(document.id = c(2L, 1L)),
        list(list(name = "susan"), list(name = "bob"))
      )
    )

  }
)

test_that("dplyr::mutate works with a simple example", {

    x <- as.tbl_json(c('{"name": "bob"}', '{"name": "susan"}'))

    expect_identical(
      x %>%
        spread_values(name = jstring("name")) %>%
        dplyr::mutate(fullname = paste(name, "green")),
      tbl_json(
        dplyr::tibble(
          document.id = c(1L, 2L),
          name = c("bob", "susan"),
          fullname = c("bob green", "susan green")),
        list(list(name = "bob"), list(name = "susan"))
      )
    )

  }
)

test_that("dplyr::mutate works in a more complex pipeline", {

    json <- c(
      '{"name": "bob", "children": [{"name": "george"}]}',
      '{"name": "susan", "children": [{"name": "sally"}, {"name": "bobby"}]}')

    children <- json %>% as.tbl_json %>%
      spread_values(name = jstring("name")) %>%
      dplyr::mutate(parent.rank = rank(name)) %>%
      enter_object("children") %>%
      gather_array %>%
      spread_values(child = jstring("name"))

    expect_identical(children$parent.rank, c(1, 2, 2))
    expect_identical(children$child, c("george", "sally", "bobby"))

  }
)

test_that("dplyr::transmute works", {
  obj <- as.tbl_json(c('{"name": "value"}', '{"name": "string"}'))
  
  prep <- obj %>% gather_object %>% append_values_string()
  
  use_transmute <- prep %>% transmute(string = paste0(string, "_hi"))
  
  expect_s3_class(use_transmute, "tbl_json")
  expect_identical(nrow(use_transmute), 2L)
  expect_identical(ncol(use_transmute), 2L)
  
  expect_identical(use_transmute$string, c("value_hi", "string_hi"))
})

test_that("dplyr::slice works", {

  new <- '[1, 2, 3]' %>% gather_array %>% dplyr::slice(1:2)

  expect_s3_class(new, "tbl_json")
  expect_identical(nrow(new), 2L)
  expect_identical(length(json_get(new)), 2L)

})

test_that('dplyr::select works', {
  json <- '[{"id":1, "object":"first"}, {"id":2, "object":"second"}]'
  
  f <- json %>% as.tbl_json %>% gather_array %>% spread_all %>%
    dplyr::select(ID=id, object)
  
  expect_equal(names(f), c('ID','object','..JSON'))
  expect_equal(nrow(f),2)
  expect_s3_class(f,'tbl_json')
  
  # Specifically trying to avoid "Adding missing grouping variables: `..JSON`"
  expect_silent(hm <- as_tbl_json(json) %>% select(document.id))
  
  expect_identical(
    select(f, ..JSON, ID, object),
    f
  )
})

test_that("dplyr::rename works", {

  new <- '[1, 2, 3]' %>% gather_array %>% dplyr::rename(blah = document.id)

  expect_s3_class(new, "tbl_json")
  expect_identical(names(new), c("blah", "array.index","..JSON"))

})

test_that("dplyr::transmute works", {

  new <- '[1, 2, 3]' %>% gather_array %>% dplyr::transmute(blah = document.id)

  expect_s3_class(new, "tbl_json")
  expect_identical(names(new), c("blah", "..JSON"))

})

test_that("dplyr::sample_n works", {

  new <- '[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]' %>% gather_array %>% dplyr::sample_n(2)

  expect_s3_class(new, "tbl_json")
  expect_identical(new$array.index, json_get(new) %>% purrr::flatten_int())

})

test_that("bind_rows works with tbl_json", {

  # Define a simple JSON array
  people <- '
  [
      {
      "name": "bob",
      "age": 32
      }, 
      {
      "name": "susan", 
      "age": 54
      }
  ]'
  
  # Structure the data
  people_df <- people %>%
    gather_array %>%
    spread_values(
      name = jstring("name"),
      age = jnumber("age"))
  
  z <- people_df %>% dplyr::bind_rows(people_df)
  

  expect_type(json_get(z),'list')
  expect_s3_class(z, 'tbl_json')
  expect_equal(nrow(z), nrow(people_df) * 2)
  expect_equal(length(json_get(z)), nrow(people_df) * 2)
})

test_that("bind_rows falls back to normal behavior if not tbl_json", {
  a <- dplyr::tibble(a=c(1,2), b=c('one','two'))
  c <- dplyr::tibble(a=c(3,4), b=c('three','four'))
  
  out <- dplyr::bind_rows(a,c)
  expect_equal(nrow(out), nrow(a) + nrow(c))
  expect_equal(names(out), c('a','b'))
  expect_s3_class(out,'tbl_df')
})

# tbl_json: dplyr SE verbs ------------------------------------------------

test_that('dplyr::filter works with programming', {
  json <- '[{"a": "fun", "b": 2},{"a": "blam", "b": 3}]'
  v <- c(rlang::quo(a == "fun"))
  
  f <- json %>% gather_array %>% spread_all %>%
    dplyr::filter(!!!v)
  
  expect_identical(f$a,c('fun'))
  expect_identical(f$b,c(2))
  expect_identical(nrow(f),1L)
  expect_s3_class(f,'tbl_json')
})

test_that('dplyr::mutate works with programming', {
  json <- '{ "one": "zip", "two": "zap", "three": "zzz" }'
  v <- c(four=rlang::quo(paste(one,two,sep="/")), five=rlang::quo(three))
  
  f <- json %>% spread_all %>% dplyr::mutate(!!!v)
  
  expect_identical(f$four,'zip/zap')
  expect_identical(f$five, 'zzz')
  expect_s3_class(f,'tbl_json')
})

test_that('dplyr::rename works with programming', {
  json <- '{ "first": "bill", "last":"bo" }'
  v <- c(firstName='first', lastName='last')
  
  f <- json %>% spread_all %>% dplyr::rename(!!!v)
  
  expect_identical(names(f),c('document.id','firstName','lastName','..JSON'))
  expect_s3_class(f,'tbl_json')
})

test_that('dplyr::select works with programming', {
  json <- '{ "hill": "top", "valley": "floor", "mountain": "top" }'
  v <- c(Hill='hill','valley')
  
  f <- json %>% spread_all %>% dplyr::select(!!!v)
  
  expect_identical(names(f),c('Hill','valley','..JSON'))
  expect_s3_class(f,'tbl_json')
})

test_that('dplyr::arrange works with programming', {
  json <- '[{ "somewhere": "over" },{"somewhere": "fun"}, {"somewhere": "else"}]'
  v <- c('somewhere')
  
  # as.name currently required by dplyr::arrange
  f <- json %>% gather_array %>% spread_all %>% dplyr::arrange(!!as.name(v))
  
  expect_identical(f$somewhere,c('else','fun','over'))
  expect_identical(f$array.index, c(3L,2L,1L))
  expect_s3_class(f,'tbl_json')
})

test_that('dplyr::transmute works with programming', {
  json <- '{ "first": "frodo", "last": "baggins"}'
  v <- c(firstName='first')
  
  f <- json %>% spread_all %>% dplyr::transmute(!!!v)
  
  expect_identical(names(f), c('firstName','..JSON'))
  expect_s3_class(f,'tbl_json')
})

test_that('dplyr::slice works with programming', {
  json <- '[{"id":7, "obj":"a"}
  ,{"id":8, "obj":"a"}
  ,{"id":9, "obj":"b"}
  ,{"id":10, "obj":"c"}]'
  v <- 1
  
  f <- json %>% gather_array %>% spread_all %>% slice(!!v)
  expect_identical(nrow(f),1L)
  expect_identical(f$id,7)
  expect_s3_class(f,'tbl_json')
})

# json_get ----------------------------------------------------------------

test_that('json_get works', {
  tj <- as.tbl_json('{"a": "b"}')
  
  expect_identical(json_get(tj), list(list("a" = "b")))
})

test_that('json_get handles error cases', {
  tjn <- as.tbl_json('{"a": "b"}')
  tjn[["..JSON"]] <- NULL
  expect_identical(json_get(tjn), NULL)
  
  expect_identical(json_get(data.frame()), NULL)
  expect_identical(json_get(data.frame(`..JSONA` = 1)), NULL)
})

# json_get_column ---------------------------------------------------------

test_that('json_get_column works', {
  tj <- as.tbl_json('{"a": "b", "b": [1,2,3]}')
  
  expect_identical(
    json_get_column(tj)[["json"]],
    json_get(tj)
  )
  expect_identical(
    json_get_column(tj, "alt")[["alt"]],
    json_get(tj)
  )
  expect_identical(
    json_get_column(tj, something.special)[["something.special"]],
    json_get(tj)
  )
})


# join --------------------------------------------------------------------

test_that("all joins work", {
  tj <- as_tbl_json(c('{"a": "b", "b": [1,2,3]}','{"a": "c"}'))
  tj2 <- as_tbl_json('{"a": "b", "c": [4,5,6]}')
  
  joined <- inner_join(tj, tj2, by = "document.id")
  expect_equal(nrow(joined), 1)
  expect_false(inherits(joined, "tbl_json"))
  
  joined <- full_join(tj, tj2, by = "document.id")
  expect_equal(nrow(joined), 2)
  expect_false(inherits(joined, "tbl_json"))

  joined <- right_join(tj, tj2, by = "document.id")
  expect_equal(nrow(joined), 1)
  expect_false(inherits(joined, "tbl_json"))
  
  joined <- left_join(tj, tj2, by = "document.id")
  expect_equal(nrow(joined), 2)
  expect_false(inherits(joined, "tbl_json"))
  
  joined <- anti_join(tj, tj2, by = "document.id")
  expect_equal(nrow(joined), 1)
  expect_s3_class(joined, "tbl_json")
  
  joined <- semi_join(tj, tj2, by = "document.id")
  expect_equal(nrow(joined), 1)
  expect_s3_class(joined, "tbl_json")
})
jeremystan/tidyjson documentation built on Feb. 4, 2023, 6:54 p.m.