Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
eval = FALSE
)
## ----setup--------------------------------------------------------------------
# library(polyglotr)
# library(dplyr)
# library(tibble)
# library(purrr)
#
## ----basic_detection----------------------------------------------------------
# # Sample texts in different languages
# sample_texts <- c(
# "Hello, how are you today?", # English
# "Bonjour, comment allez-vous?", # French
# "Hola, ¿cómo estás hoy?", # Spanish
# "Guten Tag, wie geht es Ihnen?", # German
# "Ciao, come stai oggi?" # Italian
# )
#
# # Detect languages
# detected_languages <- sapply(sample_texts, language_detect)
# print("Detected languages:")
# print(detected_languages)
## ----conditional_translation--------------------------------------------------
# translate_if_not_english <- function(text, target_language = "en") {
# # Detect language of the input text
# detected_lang <- language_detect(text)
#
# # Check if the detected language is English
# is_english <- grepl("en", detected_lang, ignore.case = TRUE)
#
# if (is_english) {
# # Return original text if already English
# return(list(
# original = text,
# translated = text,
# was_translated = FALSE,
# detected_language = detected_lang
# ))
# } else {
# # Translate to English if not English
# translated_text <- google_translate(text, target_language = target_language, source_language = "auto")
# return(list(
# original = text,
# translated = translated_text,
# was_translated = TRUE,
# detected_language = detected_lang
# ))
# }
# }
#
# # Test the function
# test_text_fr <- "Bonjour, j'aimerais acheter un billet."
# result <- translate_if_not_english(test_text_fr)
#
# print("Conditional translation result:")
# print(paste("Original:", result$original))
# print(paste("Translated:", result$translated))
# print(paste("Was translated:", result$was_translated))
# print(paste("Detected language:", result$detected_language))
## ----mixed_language_tibble----------------------------------------------------
# # Create a dataset with mixed languages (typical of user-generated content)
# mixed_data <- tibble(
# id = 1:8,
# user_feedback = c(
# "Great product, very satisfied!", # English
# "Excelente producto, muy satisfecho!", # Spanish
# "Produit fantastique, je le recommande!", # French
# "This service exceeded my expectations.", # English
# "Der Service war wirklich hervorragend.", # German
# "Servizio eccellente, davvero impressionante!", # Italian
# "The delivery was fast and reliable.", # English
# "La livraison était rapide et fiable." # French
# ),
# rating = c(5, 5, 4, 5, 4, 5, 4, 4),
# category = rep(c("product", "service"), 4)
# )
#
# print("Original mixed-language dataset:")
# print(mixed_data)
## ----detect_and_translate-----------------------------------------------------
# # Function to process each text entry
# process_feedback <- function(text) {
# result <- translate_if_not_english(text)
# return(tibble(
# original_text = result$original,
# english_text = result$translated,
# was_translated = result$was_translated,
# detected_language = result$detected_language
# ))
# }
#
# # Apply to all feedback entries
# processed_results <- purrr::map_dfr(mixed_data$user_feedback, process_feedback)
#
# # Combine with original data
# enhanced_data <- bind_cols(mixed_data, processed_results)
#
# print("Enhanced dataset with language detection and translation:")
# print(enhanced_data)
## ----advanced_tidyverse-------------------------------------------------------
# library(stringr)
#
# # Enhanced processing function with more details
# enhanced_language_processing <- function(df, text_column) {
# df %>%
# mutate(
# # Detect language for each text entry
# detected_lang = map_chr(!!rlang::sym(text_column),
# ~ tryCatch(language_detect(.x), error = function(e) "unknown")),
#
# # Determine if translation is needed
# needs_translation = !str_detect(detected_lang, "en"),
#
# # Translate only non-English text
# english_text = map2_chr(!!rlang::sym(text_column), needs_translation,
# ~ if (.y) {
# tryCatch(google_translate(.x, target_language = "en"),
# error = function(e) .x)
# } else {
# .x
# }),
#
# # Add translation confidence/status
# translation_status = case_when(
# detected_lang == "unknown" ~ "detection_failed",
# !needs_translation ~ "already_english",
# english_text != !!rlang::sym(text_column) ~ "translated",
# TRUE ~ "translation_failed"
# )
# )
# }
#
# # Apply enhanced processing
# result_data <- enhanced_language_processing(mixed_data, "user_feedback")
#
# print("Advanced processing results:")
# print(result_data %>% select(id, detected_lang, needs_translation, translation_status))
## ----batch_filtering----------------------------------------------------------
# # Create larger sample dataset
# large_dataset <- tibble(
# id = 1:20,
# content = c(
# # Mix of English and non-English content
# "Amazing service quality", # EN
# "Fantástico servicio al cliente", # ES
# "Service client exceptionnel", # FR
# "Great user experience", # EN
# "Esperienza utente eccellente", # IT
# "Ausgezeichnete Benutzerführung", # DE
# "Fast shipping and delivery", # EN
# "Livraison rapide et efficace", # FR
# "Excellent product quality", # EN
# "Qualità del prodotto superiore", # IT
# "Easy to use interface", # EN
# "Interfaz muy fácil de usar", # ES
# "Highly recommend this product", # EN
# "Je recommande vivement ce produit", # FR
# "Outstanding customer support", # EN
# "Soporte al cliente sobresaliente", # ES
# "Very satisfied with purchase", # EN
# "Sehr zufrieden mit dem Kauf", # DE
# "Will definitely buy again", # EN
# "Sicuramente acquisterò di nuovo" # IT
# ),
# timestamp = Sys.time() + sample(-1000:1000, 20),
# priority = sample(c("high", "medium", "low"), 20, replace = TRUE)
# )
#
# # Efficient batch processing workflow
# batch_process_languages <- function(df, text_col, batch_size = 5) {
# # First, detect languages for all entries
# df_with_detection <- df %>%
# mutate(
# row_id = row_number(),
# detected_lang = map_chr(!!rlang::sym(text_col),
# ~ tryCatch(language_detect(.x), error = function(e) "en")),
# is_english = str_detect(detected_lang, "en")
# )
#
# # Separate English and non-English content
# english_content <- df_with_detection %>% filter(is_english)
# non_english_content <- df_with_detection %>% filter(!is_english)
#
# # Process non-English content in batches
# if (nrow(non_english_content) > 0) {
# non_english_content <- non_english_content %>%
# mutate(
# batch_id = ceiling(row_number() / batch_size),
# english_text = map_chr(!!rlang::sym(text_col),
# ~ tryCatch(google_translate(.x, target_language = "en"),
# error = function(e) .x))
# )
# } else {
# non_english_content <- non_english_content %>%
# mutate(batch_id = integer(0), english_text = character(0))
# }
#
# # For English content, keep original text
# english_content <- english_content %>%
# mutate(
# batch_id = NA_integer_,
# english_text = !!rlang::sym(text_col)
# )
#
# # Combine results
# result <- bind_rows(english_content, non_english_content) %>%
# arrange(row_id) %>%
# select(-row_id)
#
# return(result)
# }
#
# # Apply batch processing
# processed_large <- batch_process_languages(large_dataset, "content", batch_size = 3)
#
# # Summary statistics
# summary_stats <- processed_large %>%
# summarise(
# total_entries = n(),
# english_entries = sum(is_english),
# translated_entries = sum(!is_english),
# translation_rate = mean(!is_english),
# unique_languages = n_distinct(detected_lang)
# )
#
# print("Processing summary:")
# print(summary_stats)
#
# print("Sample of processed data:")
# print(processed_large %>%
# select(id, detected_lang, is_english, content, english_text) %>%
# head(10))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.