tests/testthat/test-tokenizer.R

engine <- make_engine(load_prelude = FALSE)

thin <- make_cran_thinner()

test_that("tokenizer handles basic tokens", {
  thin()
  tokens <- engine_field(engine, "tokenizer")$tokenize("(+ 1 2)")
  expect_equal(length(tokens), 5)
  expect_equal(tokens[[1]]$type, "LPAREN")
  expect_equal(tokens[[2]]$type, "SYMBOL")
  expect_equal(tokens[[2]]$value, "+")
  expect_equal(tokens[[3]]$type, "NUMBER")
  expect_equal(tokens[[3]]$value, 1)
  expect_equal(tokens[[4]]$type, "NUMBER")
  expect_equal(tokens[[4]]$value, 2)
  expect_equal(tokens[[5]]$type, "RPAREN")
})

test_that("tokenizer handles strings", {
  thin()
  tokens <- engine_field(engine, "tokenizer")$tokenize('"hello world"')
  expect_equal(length(tokens), 1)
  expect_equal(tokens[[1]]$type, "STRING")
  expect_equal(tokens[[1]]$value, "hello world")
})

test_that("tokenizer handles escape sequences", {
  thin()
  tokens <- engine_field(engine, "tokenizer")$tokenize('"hello\\nworld"')
  expect_equal(tokens[[1]]$value, "hello\nworld")
})

test_that("tokenizer preserves unknown escapes", {
  thin()
  tokens <- engine_field(engine, "tokenizer")$tokenize('"C:\\\\Users\\\\runner\\\\file.arl"')
  expect_equal(tokens[[1]]$value, "C:\\Users\\runner\\file.arl")
})

test_that("tokenizer handles booleans and nil", {
  thin()
  tokens <- engine_field(engine, "tokenizer")$tokenize("#t #f #nil")
  expect_equal(tokens[[1]]$type, "BOOLEAN")
  expect_equal(tokens[[1]]$value, TRUE)
  expect_equal(tokens[[2]]$type, "BOOLEAN")
  expect_equal(tokens[[2]]$value, FALSE)
  expect_equal(tokens[[3]]$type, "NIL")
  expect_null(tokens[[3]]$value)
})

test_that("tokenizer handles comments", {
  thin()
  tokens <- engine_field(engine, "tokenizer")$tokenize("; comment\n(+ 1 2)")
  expect_equal(length(tokens), 5)
  expect_equal(tokens[[1]]$type, "LPAREN")
})

test_that("tokenizer handles quote syntax", {
  thin()
  tokens <- engine_field(engine, "tokenizer")$tokenize("'x")
  expect_equal(tokens[[1]]$type, "QUOTE")
  expect_equal(tokens[[2]]$type, "SYMBOL")
  expect_equal(tokens[[2]]$value, "x")
})

test_that("tokenizer handles :: operator in symbols", {
  thin()
  tokens <- engine_field(engine, "tokenizer")$tokenize("base::mean")
  expect_equal(length(tokens), 1)
  expect_equal(tokens[[1]]$type, "SYMBOL")
  expect_equal(tokens[[1]]$value, "base::mean")
})

test_that("tokenizer handles ::: operator in symbols", {
  thin()
  tokens <- engine_field(engine, "tokenizer")$tokenize("pkg:::internal")
  expect_equal(length(tokens), 1)
  expect_equal(tokens[[1]]$type, "SYMBOL")
  expect_equal(tokens[[1]]$value, "pkg:::internal")
})

test_that("keywords are tokenized correctly", {
  thin()
  tokens <- engine_field(engine, "tokenizer")$tokenize(":data")
  expect_equal(length(tokens), 1)
  expect_equal(tokens[[1]]$type, "KEYWORD")
  expect_equal(tokens[[1]]$value, "data")
})

test_that("keywords in expressions", {
  thin()
  tokens <- engine_field(engine, "tokenizer")$tokenize("(plot x y :col \"red\")")
  expect_equal(tokens[[5]]$type, "KEYWORD")
  expect_equal(tokens[[5]]$value, "col")
})

test_that("tokenizer handles integer literals", {
  thin()
  tokens <- engine_field(engine, "tokenizer")$tokenize("4L 42L -10L")
  expect_equal(length(tokens), 3)
  expect_equal(tokens[[1]]$type, "NUMBER")
  expect_equal(tokens[[1]]$value, 4L)
  expect_true(is.integer(tokens[[1]]$value))
  expect_equal(tokens[[2]]$type, "NUMBER")
  expect_equal(tokens[[2]]$value, 42L)
  expect_true(is.integer(tokens[[2]]$value))
  expect_equal(tokens[[3]]$type, "NUMBER")
  expect_equal(tokens[[3]]$value, -10L)
  expect_true(is.integer(tokens[[3]]$value))
})

test_that("tokenizer handles pure imaginary numbers", {
  thin()
  tokens <- engine_field(engine, "tokenizer")$tokenize("4i 3.14i -2.5i")
  expect_equal(length(tokens), 3)
  expect_equal(tokens[[1]]$type, "NUMBER")
  expect_equal(tokens[[1]]$value, 0+4i)
  expect_true(is.complex(tokens[[1]]$value))
  expect_equal(tokens[[2]]$type, "NUMBER")
  expect_equal(tokens[[2]]$value, 0+3.14i)
  expect_true(is.complex(tokens[[2]]$value))
  expect_equal(tokens[[3]]$type, "NUMBER")
  expect_equal(tokens[[3]]$value, 0-2.5i)
  expect_true(is.complex(tokens[[3]]$value))
})

test_that("tokenizer handles full complex number syntax", {
  thin()
  tokens <- engine_field(engine, "tokenizer")$tokenize("2+4i 3.14-2.5i -1+2i -5-3i")
  expect_equal(length(tokens), 4)
  # 2+4i
  expect_equal(tokens[[1]]$type, "NUMBER")
  expect_equal(tokens[[1]]$value, 2+4i)
  expect_true(is.complex(tokens[[1]]$value))
  # 3.14-2.5i
  expect_equal(tokens[[2]]$type, "NUMBER")
  expect_equal(tokens[[2]]$value, 3.14-2.5i)
  expect_true(is.complex(tokens[[2]]$value))
  # -1+2i
  expect_equal(tokens[[3]]$type, "NUMBER")
  expect_equal(tokens[[3]]$value, -1+2i)
  expect_true(is.complex(tokens[[3]]$value))
  # -5-3i
  expect_equal(tokens[[4]]$type, "NUMBER")
  expect_equal(tokens[[4]]$value, -5-3i)
  expect_true(is.complex(tokens[[4]]$value))
})

test_that("tokenizer handles NA values", {
  thin()
  tokens <- engine_field(engine, "tokenizer")$tokenize("NA NA_real_ NA_integer_ NA_character_ NA_complex_")
  expect_equal(length(tokens), 5)
  expect_equal(tokens[[1]]$type, "NA")
  expect_true(is.na(tokens[[1]]$value))
  expect_equal(tokens[[2]]$type, "NA")
  expect_true(is.na(tokens[[2]]$value))
  expect_equal(typeof(tokens[[2]]$value), "double")
  expect_equal(tokens[[3]]$type, "NA")
  expect_true(is.na(tokens[[3]]$value))
  expect_equal(typeof(tokens[[3]]$value), "integer")
  expect_equal(tokens[[4]]$type, "NA")
  expect_true(is.na(tokens[[4]]$value))
  expect_equal(typeof(tokens[[4]]$value), "character")
  expect_equal(tokens[[5]]$type, "NA")
  expect_true(is.na(tokens[[5]]$value))
  expect_equal(typeof(tokens[[5]]$value), "complex")
})

# =============================================================================
# Dotted-pair (standalone dot) tokenizer tests
# =============================================================================

test_that("standalone dot in dotted-pair syntax yields DOT token", {
  thin()
  tokens <- engine_field(engine, "tokenizer")$tokenize("(a . b)")
  expect_equal(length(tokens), 5)
  expect_equal(tokens[[1]]$type, "LPAREN")
  expect_equal(tokens[[2]]$type, "SYMBOL")
  expect_equal(tokens[[2]]$value, "a")
  expect_equal(tokens[[3]]$type, "DOT")
  expect_equal(tokens[[3]]$value, ".")
  expect_equal(tokens[[4]]$type, "SYMBOL")
  expect_equal(tokens[[4]]$value, "b")
  expect_equal(tokens[[5]]$type, "RPAREN")
})

test_that("dot with no surrounding space is part of symbol", {
  thin()
  tokens <- engine_field(engine, "tokenizer")$tokenize("(a.b)")
  expect_equal(length(tokens), 3)
  expect_equal(tokens[[1]]$type, "LPAREN")
  expect_equal(tokens[[2]]$type, "SYMBOL")
  expect_equal(tokens[[2]]$value, "a.b")
  expect_equal(tokens[[3]]$type, "RPAREN")
})

test_that("dot at start of list yields DOT token", {
  thin()
  tokens <- engine_field(engine, "tokenizer")$tokenize("( . b)")
  expect_equal(length(tokens), 4)
  expect_equal(tokens[[2]]$type, "DOT")
  expect_equal(tokens[[2]]$value, ".")
})

test_that("dot before closing paren yields DOT token", {
  thin()
  tokens <- engine_field(engine, "tokenizer")$tokenize("(a . )")
  expect_equal(length(tokens), 4)
  expect_equal(tokens[[3]]$type, "DOT")
  expect_equal(tokens[[4]]$type, "RPAREN")
})

Try the arl package in your browser

Any scripts or data that you put into this service are public.

arl documentation built on March 19, 2026, 5:09 p.m.