test-numberize.R
In numberize: Convert Words to Numbers in Multiple Languages

test_df <- data.frame(
  stringsAsFactors = FALSE,
  num = c(
    100, 400, 1515, 7545, 5670, 91192, 833377, 8333776, 98397717, 3400615618,
    839740543461, 27499856960, 808204960098, 578208177855, 494466250917,
    808204960098, 808204960019
  ),
  fr = c(
    "cent",
    "quatre cents", # edge case when "cent" is plural
    "mille cinq cent quinze",
    "sept mille cinq cent quarante-cinq",
    "Cinq mille six cent soixante-dix",
    "Quatre-vingt-onze mille cent quatre-vingt-douze",
    "Huit cent trente-trois mille trois cent soixante-dix-sept",
    "Huit millions trois cent trente-trois mille sept cent soixante-seize",
    "Quatre-vingt-dix-huit millions trois cent quatre-vingt-dix-sept mille sept cent dix-sept",
    "Trois milliards quatre cent millions six cent quinze mille six cent dix-huit",
    "Huit cent trente-neuf milliards sept cent quarante millions cinq cent quarante-trois mille quatre cent soixante-et-un",
    "Vingt-sept milliards quatre cent quatre-vingt-dix-neuf millions huit cent cinquante-six mille neuf cent soixante",
    "Huit cent huit milliards deux cent quatre millions neuf cent soixante mille quatre-vingt-dix-huit",
    "Cinq cent soixante-dix-huit milliards deux cent huit millions cent soixante-dix-sept mille huit cent cinquante-cinq",
    "Quatre cent quatre-vingt-quatorze milliards quatre cent soixante-six millions deux cent cinquante mille neuf cent dix-sept",
    "Huit cent huit milliards deux cent quatre millions neuf cent soixante mille quatre-vingt-dix-huit",
    "Huit cent huit milliards deux cent quatre millions neuf cent soixante mille dix-neuf"
  ),
  es = c(
    "Cien",
    "cuatrocientos",
    "mil quinientos quince",
    "Siete mil quinientos cuarenta y cinco",
    "Cinco mil seiscientos setenta",
    "Noventa y un mil ciento noventa y dos",
    "ochocientos treinta y tres mil trescientos setenta y siete",
    "Ocho millones trescientos treinta y tres mil setecientos setenta y seis",
    "Noventa y ocho millones trescientos noventa y siete mil setecientos diecisiete",
    "Tres mil cuatrocientos millones seiscientos quince mil seiscientos dieciocho",
    "ochocientos treinta y nueve mil setecientos cuarenta millones quinientos cuarenta y tres mil cuatrocientos sesenta y uno",
    "Veintisiete mil cuatrocientos noventa y nueve millones ochocientos cincuenta y seis mil novecientos sesenta",
    "Ochocientos ocho mil doscientos cuatro millones novecientos sesenta mil noventa y ocho",
    "Quinientos setenta y ocho mil doscientos ocho millones ciento setenta y siete mil ochocientos cincuenta y cinco",
    "Cuatrocientos noventa y cuatro mil cuatrocientos sesenta y seis millones doscientos cincuenta mil novecientos diecisiete",
    "Ochocientos ocho mil doscientos cuatro millones novecientos sesenta mil noventa y ocho",
    "Ochocientos ocho mil doscientos cuatro millones novecientos sesenta mil diecinueve"
  ),
  en = c(
    "one hundred",
    "four hundred",
    "one thousand five hundred and fifteen",
    "seven thousand, five hundred and forty-five",
    "five thousand, six hundred and seventy",
    "ninety-one thousand, one hundred and ninety-two",
    "eight hundred and thirty-three thousand, three hundred and seventy-seven",
    "eight million, three hundred and thirty-three thousand,seven hundred and seventy-six",
    "ninety-eight million, three hundred and ninety-seven thousand,seven hundred and seventeen",
    "three billion, four hundred million, six hundred and fifteen thousand,six hundred and eighteen",
    "eight hundred and thirty-nine billion, seven hundred and forty million,five hundred and forty-three thousand, four hundred and sixty-one",
    "twenty-seven billion, four hundred and ninety-nine million,eight hundred and fifty-six thousand, nine hundred and sixty",
    "eight hundred and eight billion, two hundred and four million,nine hundred and sixty thousand and ninety-eight",
    "five hundred and seventy-eight billion, two hundred and eight million,one hundred and seventy-seven thousand, eight hundred and fifty-five",
    "four hundred and ninety-four billion, four hundred and sixty-six million,two hundred and fifty thousand, nine hundred and seventeen",
    "eight hundred and eight billion, two hundred and four million,nine hundred and sixty thousand and ninety-eight",
    "eight hundred and eight billion, two hundred and four million,nine hundred and sixty thousand and nineteen"
  )
)

test_that("translating vector of English numbers works", {
  res <- numberize(test_df[["en"]])
  expect_identical(res, test_df[["num"]])
})

test_that("translating vector of French numbers works", {
  res <- numberize(test_df[["fr"]], lang = "fr")
  expect_identical(res, test_df[["num"]])
})

test_that("translating vector of Spanish numbers works", {
  res <- numberize(test_df[["es"]], lang = "es")
  expect_identical(res, test_df[["num"]])
})

test_that("text with non digit word returns NA", {
  res <- numberize("epiverse", lang = "en")
  expect_true(is.na(res))
})

# NB: this vector is coerced into character by R
test_that("vector with number and words and NA is properly handled", {
  res <- numberize(
    c(17, "dix", "soixante-cinq", "deux mille vingt-quatre", NA),
    lang = "fr"
  )
  expect_identical(res, c(17, 10, 65, 2024, NA))
})

test_that("text with leading and trailing whitespace works", {
  res <- numberize("  mille cinq  cent quinze    ", lang = "fr")
  expect_identical(res, 1515)
})

test_that("warning for ambiguous conversion", {
  expect_warning(res <- numberize("twenty twenty four"))
  expect_true(is.na(res))
})

test_that("NA to return NA", {
  res <- numberize(NA, lang = "es")
  expect_true(is.na(res))
})

test_that("NaN to return NA", {
  res <- numberize(NaN, lang = "es")
  expect_true(is.na(res))
})

test_that("TRUE to return NA", {
  res <- numberize(TRUE, lang = "en")
  expect_true(is.na(res))
})
test_that("FALSE to return NA", {
  res <- numberize(FALSE, lang = "en")
  expect_true(is.na(res))
})
test_that("NULL to return NA", {
  res <- numberize(NULL, lang = "fr")
  expect_true(is.na(res))
})
test_that("Inf to return NA", {
  res <- numberize(Inf, lang = "fr")
  expect_true(is.na(res))
})
test_that("Spanish words with accents are correctly numberized", {
  res <- numberize(
    c(
      "DIECISÉIS", "VEINTIÚN", "VEINTIDÓS", "VEINTITRÉS", "VEINTISÉIS", "MILLÓN",
      "BILLÓN",
      "Seis billones dieciséis mil veintiún millones veintidós mil veintitrés"
    ),
    lang = "es"
  )
  expect_identical(res, c(16, 21, 22, 23, 26, 1000000, 1000000000000, 6016021022023))
})
test_that("French words with accents are correctly numberized", {
  res <- numberize(c("zéro", "ZÉRO"), lang = "FR")
  expect_identical(res, c(0, 0))
})

Any scripts or data that you put into this service are public.

numberize documentation built on Oct. 29, 2024, 1:08 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

numberize
Convert Words to Numbers in Multiple Languages

tests/testthat/test-numberize.R
In numberize: Convert Words to Numbers in Multiple Languages

Try the numberize package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

numberize Convert Words to Numbers in Multiple Languages

tests/testthat/test-numberize.R In numberize: Convert Words to Numbers in Multiple Languages

Try the numberize package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

numberize
Convert Words to Numbers in Multiple Languages

tests/testthat/test-numberize.R
In numberize: Convert Words to Numbers in Multiple Languages