tests/testthat/test-tokenize.R

# Copyright 2021 Bedford Freeman & Worth Pub Grp LLC DBA Macmillan Learning.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

test_that("Simple tokenization works.", {
  to_tokenize <- c("This is some text.", "So is this!")

  expect_identical(
    tokenize_space(to_tokenize),
    list(
      c("This", "is", "some", "text."),
      c("So", "is", "this!")
    )
  )

  expect_identical(
    prepare_and_tokenize(to_tokenize),
    list(
      c("This", "is", "some", "text", "."),
      c("So", "is", "this", "!")
    )
  )

  expect_identical(
    prepare_and_tokenize(to_tokenize, space_punctuation = FALSE),
    list(
      c("This", "is", "some", "text."),
      c("So", "is", "this!")
    )
  )
})

Try the piecemaker package in your browser

Any scripts or data that you put into this service are public.

piecemaker documentation built on June 7, 2023, 5:55 p.m.