Nothing
acep_source_path <- function(...) {
relative_path <- file.path(...)
candidates <- unique(c(
getwd(),
testthat::test_path("..", ".."),
testthat::test_path("..", "..", "..")
))
paths <- file.path(candidates, relative_path)
existing <- paths[file.exists(paths)]
if (length(existing) > 0) {
return(existing[[1]])
}
testthat::skip(sprintf(
"Source file not available in installed-package test environment: %s",
relative_path
))
}
test_that("optimization fixtures provide offline clean inputs", {
inputs <- acep_fixture_clean_text()
expect_equal(length(inputs), 2)
expect_equal(inputs[[1]], "El SUTEBA fue al paro. Reclaman mejoras salariales.")
expect_match(inputs[[2]], "#Paro", fixed = TRUE)
})
test_that("optimization fixtures provide deterministic offline flows", {
count_fixture <- acep_fixture_count()
pos_fixture <- acep_fixture_pos()
provider_success <- acep_fixture_provider_success()
provider_error <- acep_fixture_provider_error()
expect_equal(count_fixture$dic, c("paro", "lucha", "piquetes"))
expect_s3_class(pos_fixture, "tokenIndex")
expect_equal(names(provider_success), c("provider", "status_code", "body"))
expect_equal(provider_error$status_code, 401L)
})
test_that("acep_clean current behavior is characterized", {
cleaned <- acep_clean(acep_fixture_clean_text())
expect_equal(
cleaned,
c("suteba paro reclaman mejoras salariales", "marcha mar plata")
)
expect_snapshot_value(cleaned, style = "json2")
})
test_that("acep_clean edge behavior is approval-tested before optimization", {
edge_text <- c(
"ÁÉÍÓÚ Ñandú!!! https://example.com/@uno #Etiqueta 😊\nMartes 25 de Mayo",
"Sin cambios? ABC 123 @user #Hash http://foo.bar"
)
accent_text <- c("Árbol Ñandú y Educación", "Lunes, Mayo 25!!!")
expect_equal(acep_clean(edge_text), c("aeiou nandu", "cambios abc"))
expect_equal(
acep_clean(accent_text, rm_stopwords = FALSE, rm_shortwords = FALSE),
c("arbol nandu y educacion", "")
)
expect_equal(
acep_clean(accent_text, rm_cesp = FALSE, rm_stopwords = FALSE, rm_shortwords = FALSE),
c("árbol ñandú y educación", "")
)
expect_equal(
acep_clean("Uno\nDos Tres", rm_stopwords = FALSE, rm_shortwords = FALSE, rm_punt = FALSE, rm_num = FALSE),
"uno dos tres"
)
expect_equal(
acep_clean("Uno\nDos Tres", rm_stopwords = FALSE, rm_shortwords = FALSE, rm_whitespace = FALSE),
"uno dos tres"
)
expect_equal(length(acep_clean(edge_text)), length(edge_text))
})
test_that("acep_count current behavior is characterized", {
fixture <- acep_fixture_count()
counted <- acep_count(fixture$texto, fixture$dic)
expect_equal(counted, c(3L, 2L, 0L))
expect_snapshot_value(counted, style = "json2")
})
test_that("acep_count edge semantics are approval-tested before optimization", {
edge_text <- c(
"paro paros paro.",
"Paro PARO paro",
"sin datos",
NA_character_,
"",
"piquetes, lucha; paro!"
)
repeated_dic <- c("paro", "lucha", "piquetes", "paro")
expect_equal(acep_count(edge_text, repeated_dic), c(3L, 1L, 0L, NA_integer_, 0L, 3L))
expect_equal(
acep_count(edge_text, repeated_dic, use_cache = FALSE),
acep_count(edge_text, repeated_dic, use_cache = TRUE)
)
expect_equal(acep_count(c("el paro y paros", "paro"), " paro ", use_cache = FALSE), c(1L, 1L))
expect_equal(acep_count("paro, paro. paros", "paro", use_cache = FALSE), 3L)
expect_equal(acep_count("Árbol árbol arbol", c("árbol", "arbol"), use_cache = FALSE), 2L)
})
test_that("acep_count cache reuses normalized repeated dictionaries", {
acep_clear_regex_cache()
on.exit(acep_clear_regex_cache(), add = TRUE)
expect_equal(acep_count("paro paro", c("paro", "paro")), 2L)
expect_equal(acep_count("paro paro", "paro"), 2L)
expect_equal(acep_regex_cache_size(), 1L)
})
test_that("acep_svo stable return schemas are characterized", {
svo <- acep_svo(acep_fixture_svo())
expect_equal(
names(svo),
c(
"acep_annotate_svo", "acep_pro_svo", "acep_list_svo", "acep_sp",
"acep_lista_lemmas", "acep_no_procesadas"
)
)
expect_equal(svo$acep_list_svo$eventos[[1]], "SOIP -> declara -> Mar huelga")
expect_equal(svo$acep_list_svo$sujeto[[1]], "SOIP")
expect_equal(svo$acep_list_svo$verbo[[1]], "declara")
expect_equal(svo$acep_list_svo$objeto[[1]], "Mar huelga")
expect_snapshot_value(
lapply(svo, function(x) list(class = class(x), dim = dim(x), names = names(x))),
style = "json2"
)
})
test_that("acep_svo repeated-document semantics are approval-tested before optimization", {
tokenindex <- do.call(rbind, replicate(3L, acep_fixture_svo(), simplify = FALSE))
tokenindex$doc_id <- rep(seq_len(3L), each = nrow(acep_fixture_svo()))
class(tokenindex) <- class(acep_fixture_svo())
svo <- acep_svo(tokenindex)
expected_events <- rep("SOIP -> declara -> Mar huelga", 3L)
expect_equal(svo$acep_list_svo$doc_id, 1:3)
expect_equal(svo$acep_list_svo$oracion_id, rep(1L, 3L))
expect_equal(svo$acep_list_svo$eventos, expected_events)
expect_equal(svo$acep_list_svo$sujeto, rep("SOIP", 3L))
expect_equal(svo$acep_list_svo$verbo, rep("declara", 3L))
expect_equal(svo$acep_list_svo$objeto, rep("Mar huelga", 3L))
expect_equal(nrow(svo$acep_no_procesadas), 0L)
})
test_that("acep_svo token aggregation helper preserves fallback semantics", {
collapse_tokens <- getFromNamespace(".acep_svo_collapse_tokens", "ACEP")
tokenindex <- acep_fixture_svo()
annotated <- tokenindex
annotated$s_p <- ifelse(annotated$token %in% c("Mar", "Plata", "SOIP"), "sujeto", "predicado")
subject_tokens <- subset(annotated, s_p == "sujeto")
collapsed_subject <- collapse_tokens(subject_tokens, annotated, "sujeto")
expect_equal(names(collapsed_subject), c("doc_id", "sentence", "sujeto"))
expect_equal(collapsed_subject$doc_id, 1L)
expect_equal(collapsed_subject$sentence, 1L)
expect_equal(collapsed_subject$sujeto, "Mar Plata SOIP")
empty_predicate <- collapse_tokens(
annotated[0, ],
annotated,
"predicados"
)
expect_equal(names(empty_predicate), c("doc_id", "sentence", "predicados"))
expect_equal(nrow(empty_predicate), 1L)
expect_true(is.na(empty_predicate$predicados[[1]]))
collapsed_sent <- collapse_tokens(subject_tokens, annotated, "conjugaciones", c("doc_id", "sentence", "sent"))
expect_equal(names(collapsed_sent), c("doc_id", "sentence", "sent", "conjugaciones"))
expect_equal(collapsed_sent$conjugaciones, "Mar Plata SOIP")
})
test_that("acep_postag helper normalizes parsed chunks without changing row semantics", {
normalize_parse <- getFromNamespace(".acep_postag_normalize_parse", "ACEP")
parsed <- data.frame(
doc_id = c("text1", "text2", "text3"),
sentence = c(1L, 1L, 1L),
token = c("Mar", "", "SOIP"),
morph = I(list("Number=Sing", "Space=Yes", "Number=Sing")),
sent = I(list(" En Mar\n", "\n", " reclaman ")),
stringsAsFactors = FALSE
)
normalized <- normalize_parse(parsed, doc_id_offset = 10L)
expect_equal(normalized$doc_id, c(11L, 13L))
expect_equal(normalized$sent, c("En Mar", "reclaman"))
expect_equal(normalized$morph, c("Number=Sing", "Number=Sing"))
expect_equal(normalized$token, c("Mar", "SOIP"))
empty_normalized <- normalize_parse(parsed[2, ], doc_id_offset = 0L)
expect_equal(nrow(empty_normalized), 0L)
expect_equal(names(empty_normalized), names(parsed))
})
test_that("acep_postag chunk helper preserves contiguous text boundaries", {
text_chunks <- getFromNamespace(".acep_postag_text_chunks", "ACEP")
chunks <- text_chunks(letters[1:5], chunk_size = 2L)
expect_equal(length(chunks), 3L)
expect_equal(chunks[[1]]$start_idx, 1L)
expect_equal(chunks[[1]]$end_idx, 2L)
expect_equal(chunks[[1]]$texto, letters[1:2])
expect_equal(chunks[[2]]$start_idx, 3L)
expect_equal(chunks[[2]]$end_idx, 4L)
expect_equal(chunks[[3]]$start_idx, 5L)
expect_equal(chunks[[3]]$end_idx, 5L)
expect_equal(chunks[[3]]$texto, "e")
one_chunk <- text_chunks(c("uno", "dos"), chunk_size = 10L)
expect_equal(length(one_chunk), 1L)
expect_equal(one_chunk[[1]]$texto, c("uno", "dos"))
})
test_that("acep_postag location helper preserves LOC merge and empty schemas", {
prepare_locations <- getFromNamespace(".acep_postag_prepare_loc_entities", "ACEP")
entities <- data.frame(
doc_id = c(1L, 1L, 2L, 2L),
sentence = c(1L, 1L, 1L, 1L),
entity = c("Mar_del_Plata", "Mar_del_Plata", "SOIP", "Buenos_Aires"),
entity_type = c("LOC", "LOC", "ORG", "LOC"),
stringsAsFactors = FALSE
)
tokenindex <- data.frame(
doc_id = c(1L, 1L, 2L),
sentence = c(1L, 1L, 1L),
token = c("Mar", "Plata", "Buenos"),
stringsAsFactors = FALSE
)
prepared <- prepare_locations(entities, tokenindex)
expect_equal(names(prepared), c("entity_", "doc_id", "sentence", "entity", "entity_type"))
expect_equal(prepared$entity_, c("Mar del Plata", "Buenos Aires"))
expect_equal(prepared$doc_id, c(1L, 2L))
expect_equal(prepared$entity_type, c("LOC", "LOC"))
empty_prepared <- prepare_locations(entities[entities$entity_type == "ORG", ], tokenindex)
expect_equal(
names(empty_prepared),
c("entity_", "doc_id", "sentence", "entity", "entity_type", "lat", "long")
)
expect_equal(nrow(empty_prepared), 0L)
})
test_that("acep_postag_hibrido helpers preserve chunk, parse, and LOC pre-geocode semantics", {
normalize_parse <- getFromNamespace(".acep_postag_hibrido_normalize_parse", "ACEP")
text_chunks <- getFromNamespace(".acep_postag_hibrido_text_chunks", "ACEP")
prepare_locations <- getFromNamespace(".acep_postag_hibrido_prepare_loc_entities", "ACEP")
parsed <- data.frame(
doc_id = c("text1", "text2", "text3"),
sentence = c(1L, 1L, 1L),
token = c("Mar", "", "SOIP"),
morph = I(list("Number=Sing", "Space=Yes", "Number=Sing")),
sent = I(list(" En Mar\n", "\n", " reclaman ")),
stringsAsFactors = FALSE
)
normalized <- normalize_parse(parsed, doc_id_offset = 20L)
expect_equal(normalized$doc_id, c(21L, 23L))
expect_equal(normalized$sent, c("En Mar", "reclaman"))
expect_equal(normalized$morph, c("Number=Sing", "Number=Sing"))
expect_equal(normalized$token, c("Mar", "SOIP"))
chunks <- text_chunks(letters[1:5], chunk_size = 2L)
expect_equal(length(chunks), 3L)
expect_equal(chunks[[1]]$start_idx, 1L)
expect_equal(chunks[[1]]$end_idx, 2L)
expect_equal(chunks[[1]]$texto, letters[1:2])
expect_equal(chunks[[2]]$start_idx, 3L)
expect_equal(chunks[[2]]$end_idx, 4L)
expect_equal(chunks[[3]]$start_idx, 5L)
expect_equal(chunks[[3]]$end_idx, 5L)
expect_equal(chunks[[3]]$texto, "e")
entities <- data.frame(
doc_id = c(1L, 1L, 2L, 2L),
sentence = c(1L, 1L, 1L, 1L),
entity = c("Mar_del_Plata", "Mar_del_Plata", "SOIP", "Buenos_Aires"),
entity_type = c("LOC", "LOC", "ORG", "LOC"),
stringsAsFactors = FALSE
)
prepared <- prepare_locations(entities)
expect_equal(names(prepared), c("doc_id", "sentence", "entity", "entity_type", "entity_"))
expect_equal(prepared$entity_, c("Mar del Plata", "Buenos Aires"))
expect_equal(prepared$doc_id, c(1L, 2L))
expect_equal(prepared$entity_type, c("LOC", "LOC"))
empty_prepared <- prepare_locations(entities[entities$entity_type == "ORG", ])
expect_equal(
names(empty_prepared),
c("entity_", "doc_id", "sentence", "entity", "entity_type", "lat", "long")
)
expect_equal(nrow(empty_prepared), 0L)
})
test_that("public exports and hot-path formals are snapshotted", {
hot_path_formals <- function(fn) {
vapply(formals(fn), function(arg) paste(deparse(arg), collapse = ""), character(1))
}
api <- list(
exports = sort(getNamespaceExports("ACEP")),
formals = list(
acep_clean = hot_path_formals(acep_clean),
acep_count = hot_path_formals(acep_count),
acep_svo = hot_path_formals(acep_svo)
)
)
expect_true("acep_clean" %in% api$exports)
expect_true("acep_count" %in% api$exports)
expect_true("acep_svo" %in% api$exports)
expect_equal(api$formals$acep_count[["use_cache"]], "TRUE")
expect_snapshot_value(api, style = "json2")
})
test_that("manual optimization baseline script is present but not CI-wired", {
baseline_path <- acep_source_path("bench", "optimization-baseline.R")
expect_true(file.exists(baseline_path))
expect_equal(basename(baseline_path), "optimization-baseline.R")
})
test_that("package footprint artifacts are excluded from source builds", {
buildignore_path <- acep_source_path(".Rbuildignore")
buildignore <- readLines(buildignore_path, warn = FALSE)
expect_true("^vignettes/.*\\.udpipe$" %in% buildignore)
expect_true("^bench/.*\\.csv$" %in% buildignore)
expect_true("^bench/.*\\.rds$" %in% buildignore)
})
test_that("acep_bases canonical data file exposes stable object names and metadata", {
describe_data_file <- function(path) {
env <- new.env(parent = emptyenv())
objects <- load(path, envir = env)
value <- env[["acep_bases"]]
list(
objects = sort(objects),
class = class(value),
names = names(value),
element_classes = lapply(value, class),
rows = vapply(value, NROW, integer(1)),
columns = vapply(value, NCOL, integer(1)),
column_names = lapply(value, names)
)
}
rda_path <- acep_source_path("data", "acep_bases.rda")
rdata_path <- file.path(dirname(rda_path), "acep_bases.RData")
rda_metadata <- describe_data_file(rda_path)
expect_false(file.exists(rdata_path))
expect_equal(rda_metadata$objects, "acep_bases")
expect_equal(
rda_metadata$names,
c(
"ed_neco", "la_nueva", "lc_720", "lc_mdp", "ln_arg", "ln_bb",
"rev_puerto", "rp_mdp", "rp_procesada", "spacy_postag", "titulares"
)
)
expect_equal(
rda_metadata$rows[c("lc_720", "rp_procesada", "spacy_postag", "titulares")],
c(lc_720 = 720L, rp_procesada = 7816L, spacy_postag = 15L, titulares = 8L)
)
expect_equal(
names(ACEP::acep_bases),
rda_metadata$names
)
})
test_that("optional dependency helper reports clear Spanish installation guidance", {
require_helper <- getFromNamespace("acep_require_namespace", "ACEP")
expect_null(require_helper("stats", "conteo base"))
expect_error(
require_helper("acepPaqueteInexistente", "prueba opcional"),
"La funcionalidad 'prueba opcional' requiere instalar el paquete opcional 'acepPaqueteInexistente'. Instala el paquete con: install.packages\\(\"acepPaqueteInexistente\"\\)",
fixed = FALSE
)
})
test_that("heavy NLP and geocoding dependencies are optional in DESCRIPTION", {
description_path <- acep_source_path("DESCRIPTION")
description <- read.dcf(description_path)[1, ]
imports <- trimws(unlist(strsplit(description[["Imports"]], ",")))
suggests <- trimws(unlist(strsplit(description[["Suggests"]], ",")))
optional_packages <- c("spacyr", "reticulate", "udpipe", "rsyntax", "tidygeocoder")
core_packages <- c("httr", "jsonlite", "stringr", "magrittr")
expect_false(any(optional_packages %in% imports))
expect_true(all(optional_packages %in% suggests))
expect_true(all(core_packages %in% imports))
})
test_that("optional dependency entry points guard their package-specific paths", {
function_body <- function(name) paste(deparse(body(get(name, envir = asNamespace("ACEP")))), collapse = "\n")
postag_body <- function_body("acep_postag")
postag_hibrido_body <- function_body("acep_postag_hibrido")
upos_body <- function_body("acep_upos")
svo_body <- function_body("acep_svo")
expect_match(postag_body, 'acep_require_namespace\\("spacyr", "acep_postag"\\)')
expect_match(postag_body, 'acep_require_namespace\\("rsyntax", "acep_postag"\\)')
expect_match(postag_body, 'acep_require_namespace\\("tidygeocoder", "acep_postag"\\)')
expect_match(postag_body, 'acep_require_namespace\\("reticulate", "acep_postag"\\)')
expect_match(postag_hibrido_body, 'acep_require_namespace\\("spacyr", "acep_postag_hibrido"\\)')
expect_match(postag_hibrido_body, 'acep_require_namespace\\("rsyntax", "acep_postag_hibrido"\\)')
expect_match(postag_hibrido_body, 'acep_require_namespace\\("tidygeocoder", "acep_postag_hibrido"\\)')
expect_match(postag_hibrido_body, 'acep_require_namespace\\("reticulate", "acep_postag_hibrido"\\)')
expect_match(upos_body, 'acep_require_namespace\\("udpipe", "acep_upos"\\)')
expect_match(upos_body, 'acep_require_namespace\\("rsyntax", "acep_upos"\\)')
expect_match(svo_body, 'acep_require_namespace\\("rsyntax", "acep_svo"\\)')
})
test_that("provider wrappers keep only core HTTP/JSON dependencies mandatory", {
provider_functions <- c(
"acep_claude", "acep_gemini", "acep_gpt", "acep_ollama",
"acep_openrouter", "acep_together"
)
provider_sources <- vapply(
provider_functions,
function(name) paste(deparse(body(get(name, envir = asNamespace("ACEP")))), collapse = "\n"),
character(1)
)
expect_true(any(grepl("httr::", provider_sources, fixed = TRUE)))
expect_true(any(grepl("jsonlite::", provider_sources, fixed = TRUE)))
expect_false(any(grepl("spacyr::|reticulate::|tidygeocoder::|rsyntax::|udpipe::", provider_sources)))
})
test_that("provider shared helpers preserve schema protection, prompts, endpoints, and headers", {
default_schema <- getFromNamespace(".acep_provider_default_schema", "ACEP")
user_prompt <- getFromNamespace(".acep_provider_user_prompt", "ACEP")
schema_fields <- getFromNamespace(".acep_provider_schema_field_descriptions", "ACEP")
provider_endpoint <- getFromNamespace(".acep_provider_endpoint", "ACEP")
provider_headers <- getFromNamespace(".acep_provider_auth_headers", "ACEP")
openai_token_field <- getFromNamespace(".acep_openai_token_limit_field", "ACEP")
openrouter_structured <- getFromNamespace(".acep_openrouter_model_supports_structured_outputs", "ACEP")
schema <- default_schema()
expect_equal(schema$type, "object")
expect_equal(names(schema$properties), "respuesta")
expect_equal(unclass(schema$required), "respuesta")
expect_true(inherits(schema$required, "AsIs"))
expect_false(is.null(schema$additionalProperties))
gemini_schema <- default_schema(additional_properties = FALSE, protect_arrays = FALSE)
expect_equal(names(gemini_schema), c("type", "properties", "required"))
expect_false(inherits(gemini_schema$required, "AsIs"))
expect_equal(
user_prompt("Texto de prueba", "Extrae campos"),
"Texto a analizar:\nTexto de prueba\n\nInstrucciones:\nExtrae campos"
)
expect_equal(
unname(schema_fields(acep_gpt_schema("clasificacion"))),
c(
"- categoria: Categoria principal del texto",
"- confianza: Nivel de confianza de 0 a 1",
"- justificacion: Breve justificacion de la clasificacion"
)
)
expect_equal(provider_endpoint("openai"), "https://api.openai.com/v1/chat/completions")
expect_equal(provider_endpoint("anthropic"), "https://api.anthropic.com/v1/messages")
expect_equal(
provider_endpoint("gemini", modelo = "gemini-2.5-flash"),
"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent"
)
expect_equal(provider_endpoint("together"), "https://api.together.xyz/v1/chat/completions")
expect_equal(provider_endpoint("openrouter"), "https://openrouter.ai/api/v1/chat/completions")
expect_equal(openai_token_field("gpt-4o-mini"), "max_tokens")
expect_equal(openai_token_field("gpt-5-mini"), "max_completion_tokens")
expect_equal(openai_token_field("o4-mini"), "max_completion_tokens")
expect_true(openrouter_structured("openai/gpt-4o-mini"))
expect_true(openrouter_structured("google/gemini-2.5-flash"))
expect_false(openrouter_structured("anthropic/claude-sonnet-4.5"))
openrouter_headers <- provider_headers(
"openrouter",
api_key = "clave",
site_url = "https://acep.test",
app_name = "ACEP"
)
expect_equal(openrouter_headers$Authorization, "Bearer clave")
expect_equal(openrouter_headers$`HTTP-Referer`, "https://acep.test")
expect_equal(openrouter_headers$`X-Title`, "ACEP")
})
test_that("provider response helpers parse JSON and preserve Spanish error behavior without network", {
validate_inputs <- getFromNamespace(".acep_provider_validate_request_inputs", "ACEP")
clean_json <- getFromNamespace(".acep_provider_clean_json_response", "ACEP")
parse_json <- getFromNamespace(".acep_provider_parse_json_response", "ACEP")
chat_content <- getFromNamespace(".acep_provider_extract_chat_content", "ACEP")
expect_null(validate_inputs("texto", "instrucciones", "clave", "OPENAI_API_KEY"))
expect_error(
validate_inputs("", "instrucciones", "clave", "OPENAI_API_KEY"),
"El parametro 'texto' debe ser una cadena de caracteres no vacia",
fixed = TRUE
)
expect_error(
validate_inputs("texto", "", "clave", "OPENAI_API_KEY"),
"El parametro 'instrucciones' debe ser una cadena de caracteres no vacia",
fixed = TRUE
)
expect_error(
validate_inputs("texto", "instrucciones", "", "OPENAI_API_KEY"),
"API key no encontrada. Define la variable de entorno OPENAI_API_KEY o pasa el parametro api_key",
fixed = TRUE
)
fenced <- "```json\n{\"respuesta\":\"ok\"}\n```"
expect_equal(clean_json(fenced), "{\"respuesta\":\"ok\"}")
expect_equal(parse_json(fenced, parse_json = FALSE), "{\"respuesta\":\"ok\"}")
expect_equal(parse_json(fenced, parse_json = TRUE), list(respuesta = "ok"))
expect_error(
parse_json("{no valido", parse_json = TRUE),
"Error al parsear JSON de la respuesta. Contenido recibido",
fixed = TRUE
)
parsed <- list(choices = list(list(message = list(content = "{\"respuesta\":\"ok\"}"))))
expect_equal(chat_content(parsed), "{\"respuesta\":\"ok\"}")
expect_error(
chat_content(list(choices = list())),
"La API devolvio una respuesta vacia. Verifica tu prompt y esquema.",
fixed = TRUE
)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.