tests/testthat/test-limpiar_emojis.R

test_that("limpiar_recode_emojis recodes not removes", {


  emojis <- data.frame(
    text = c("Hello πŸ‘‹ World",
     "Family: πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦",
     "Coding πŸ‘¨πŸ½β€πŸ’»",
     "Flags πŸ³οΈβ€πŸŒˆ πŸ‡ΊπŸ‡Έ",
     "Weather β˜€οΈ β›ˆοΈ ❄️")
   )

  expect_true(grepl("πŸ‘‹", emojis$text[1]))
  expect_true(grepl("πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦", emojis$text[2]))

  recoded <- limpiar_recode_emojis(emojis, text)
  expect_true(grepl("waving hand", recoded$text[[1]]))
  expect_true(grepl("man.*woman.*girl", recoded$text[[2]]))


})


test_that("limpiar_remove_emojis input validation works", {
  df <- data.frame(text = c("Hello πŸ‘‹", "World 🌍"))

  expect_error(limpiar_remove_emojis("not_a_df", "text"))
  expect_error(limpiar_remove_emojis(df, 123))


  expect_error(limpiar_remove_emojis(df, "missing_col"))
})

test_that("limpiar_remove_emojis preserves special characters", {
  input_df <- tibble::tribble(
    ~text,
    "cafΓ© Ο€ ΞΌ",                    # Accents and Greek
    "MΓΌnchen ist schΓΆn",           # German umlauts
    "русский язык",                # Russian
    "ζΌ’ε­—γ¨γ²γ‚‰γŒγͺ",                # Chinese and Japanese
    "Hello! @ # $ % ^",            # Punctuation
    "Hβ‚‚O + COβ‚‚",                   # Subscripts
    "β†’ ← + Γ· Γ— β‰ "                  # Math symbols
  )

  output_df <- limpiar_remove_emojis(input_df, text)

  # check we didn't remove stuff we didn't want to
  expect_equal(output_df$text[1], "cafΓ© Ο€ ΞΌ")
  expect_equal(output_df$text[2], "MΓΌnchen ist schΓΆn")
  expect_equal(output_df$text[3], "русский язык")
  expect_equal(output_df$text[4], "ζΌ’ε­—γ¨γ²γ‚‰γŒγͺ")
})

test_that("limpiar_remove_emojis removes all emoji types", {
  input_df <- tibble::tribble(
    ~text,
    "Hello πŸ‘‹ World",
    "Family: πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦",
    "Coding πŸ‘¨πŸ½β€πŸ’»",
    "Flags πŸ³οΈβ€πŸŒˆ πŸ‡ΊπŸ‡Έ",
    "Weather β˜€οΈ β›ˆοΈ ❄️"
  )

  # First verify emojis are present
  expect_match(input_df$text[1], "\U{1F44B}")  # Wave emoji
  expect_match(input_df$text[2], "πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦")    # Family
  expect_match(input_df$text[3], "πŸ‘¨πŸ½β€πŸ’»")      # Professional with skin tone
  expect_match(input_df$text[4], "πŸ³οΈβ€πŸŒˆ")      # Pride flag
  expect_match(input_df$text[5], "[β˜€οΈβ›ˆοΈβ„οΈ]")   # Weather symbols

  # Remove emojis
  output_df <- limpiar_remove_emojis(input_df, text)

  # Verify emojis are gone
  expect_false(any(grepl("\U{1F44B}", output_df$text[1])))
  expect_false(any(grepl("πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦", output_df$text[2])))
  expect_false(any(grepl("πŸ‘¨πŸ½β€πŸ’»", output_df$text[3])))
  expect_false(any(grepl("πŸ³οΈβ€πŸŒˆ", output_df$text[4])))
  expect_false(any(grepl("[β˜€οΈβ›ˆοΈβ„οΈ]", output_df$text[5])))

})
jpcompartir/LimpiaR documentation built on Dec. 9, 2024, 9:43 p.m.