tests/testthat/test-sandbox.R

# Tests for R-Native Programmatic Sandbox
library(testthat)
library(aisdk)

# ============================================================================
# SandboxManager: Basic Execution
# ============================================================================

test_that("SandboxManager can execute simple R code", {
    sandbox <- SandboxManager$new(tools = list(), preload_packages = character(0))
    result <- sandbox$execute("print(1 + 1)")
    expect_true(grepl("2", result))
})

test_that("SandboxManager captures print output", {
    sandbox <- SandboxManager$new(tools = list(), preload_packages = character(0))
    result <- sandbox$execute("print('hello world')")
    expect_true(grepl("hello world", result))
})

test_that("SandboxManager handles syntax errors gracefully", {
    sandbox <- SandboxManager$new(tools = list(), preload_packages = character(0))
    result <- sandbox$execute("if (TRUE {")
    expect_true(grepl("Error executing R code:", result))
})

test_that("SandboxManager handles runtime errors gracefully", {
    sandbox <- SandboxManager$new(tools = list(), preload_packages = character(0))
    result <- sandbox$execute("stop('intentional error')")
    expect_true(grepl("Error executing R code:", result))
    expect_true(grepl("intentional error", result))
})

test_that("SandboxManager handles empty code", {
    sandbox <- SandboxManager$new(tools = list(), preload_packages = character(0))
    result <- sandbox$execute("")
    expect_true(grepl("Error", result))
})

# ============================================================================
# SandboxManager: Variable Persistence
# ============================================================================

test_that("Variables persist across executions", {
    sandbox <- SandboxManager$new(tools = list(), preload_packages = character(0))
    sandbox$execute("x <- 42")
    result <- sandbox$execute("print(x)")
    expect_true(grepl("42", result))
})

test_that("Data frames persist across executions", {
    sandbox <- SandboxManager$new(tools = list(), preload_packages = character(0))
    sandbox$execute("df <- data.frame(a = 1:3, b = c('x', 'y', 'z'))")
    result <- sandbox$execute("print(nrow(df))")
    expect_true(grepl("3", result))
})

# ============================================================================
# SandboxManager: Tool Binding
# ============================================================================

test_that("Tool is bound and callable in sandbox", {
    mock_tool <- tool(
        name = "greet",
        description = "Greets a person",
        parameters = z_object(name = z_string("Person's name")),
        execute = function(args) paste("Hello,", args$name)
    )

    sandbox <- SandboxManager$new(
        tools = list(mock_tool),
        preload_packages = character(0)
    )

    result <- sandbox$execute("result <- greet('Alice')\nprint(result)")
    expect_true(grepl("Hello.*Alice", result))
})

test_that("Multiple tools are bound and callable", {
    add_tool <- tool(
        name = "add_numbers",
        description = "Add two numbers",
        parameters = z_object(
            a = z_number("First number"),
            b = z_number("Second number")
        ),
        execute = function(args) args$a + args$b
    )

    multiply_tool <- tool(
        name = "multiply_numbers",
        description = "Multiply two numbers",
        parameters = z_object(
            a = z_number("First number"),
            b = z_number("Second number")
        ),
        execute = function(args) args$a * args$b
    )

    sandbox <- SandboxManager$new(
        tools = list(add_tool, multiply_tool),
        preload_packages = character(0)
    )

    result <- sandbox$execute("
    s <- add_numbers(3, 4)
    p <- multiply_numbers(3, 4)
    print(paste('Sum:', s, 'Product:', p))
  ")
    expect_true(grepl("Sum:.* 7", result))
    expect_true(grepl("Product:.* 12", result))
})

test_that("Tool results as data.frame are usable in pipeline", {
    data_tool <- tool(
        name = "get_data",
        description = "Returns sample data",
        parameters = z_object(n = z_number("Number of rows")),
        execute = function(args) {
            jsonlite::toJSON(data.frame(
                id = seq_len(args$n),
                value = seq_len(args$n) * 10
            ), auto_unbox = FALSE)
        }
    )

    sandbox <- SandboxManager$new(
        tools = list(data_tool),
        preload_packages = character(0)
    )

    result <- sandbox$execute("
    d <- get_data(5)
    print(sum(d$value))
  ")
    expect_true(grepl("150", result))
})

# ============================================================================
# SandboxManager: Batch Processing with purrr/lapply
# ============================================================================

test_that("Batch tool calls work via lapply in sandbox", {
    budget_tool <- tool(
        name = "get_budget",
        description = "Get budget for a department",
        parameters = z_object(dept = z_string("Department name")),
        execute = function(args) {
            # Simulate budget data
            budgets <- list(
                HR = list(allocated = 100, spent = 120),
                IT = list(allocated = 200, spent = 150),
                Sales = list(allocated = 150, spent = 160)
            )
            jsonlite::toJSON(budgets[[args$dept]] %||% list(allocated = 0, spent = 0),
                auto_unbox = TRUE
            )
        }
    )

    sandbox <- SandboxManager$new(
        tools = list(budget_tool),
        preload_packages = character(0)
    )

    result <- sandbox$execute("
    depts <- c('HR', 'IT', 'Sales')
    results <- lapply(depts, function(d) {
      b <- get_budget(d)
      data.frame(dept = d, allocated = b$allocated, spent = b$spent,
                 over = b$spent > b$allocated)
    })
    all_data <- do.call(rbind, results)
    over_budget <- all_data[all_data$over == TRUE, ]
    print(over_budget)
  ")
    expect_true(grepl("HR", result))
    expect_true(grepl("Sales", result))
    # IT should NOT appear (150 < 200)
    expect_false(grepl("IT", result))
})

# ============================================================================
# SandboxManager: Output Truncation
# ============================================================================

test_that("Output is truncated when exceeding max_output_chars", {
    sandbox <- SandboxManager$new(
        tools = list(),
        preload_packages = character(0),
        max_output_chars = 100
    )

    result <- sandbox$execute("print(paste(rep('aaaa', 100), collapse = ' '))")
    expect_true(grepl("truncated", result))
    # Should be close to max_output_chars but not wildly over
    expect_true(nchar(result) < 200)
})

# ============================================================================
# SandboxManager: Environment Isolation
# ============================================================================

test_that("Sandbox cannot access global environment variables", {
    # Create a variable in global env
    global_test_var_xyz <- "should not be accessible"

    sandbox <- SandboxManager$new(
        tools = list(),
        preload_packages = character(0)
    )

    result <- sandbox$execute("
    tryCatch({
      print(global_test_var_xyz)
    }, error = function(e) {
      print(paste('Isolated:', e$message))
    })
  ")
    expect_true(grepl("Isolated|not found|Error", result))

    rm(global_test_var_xyz)
})

# ============================================================================
# SandboxManager: Tool Signatures
# ============================================================================

test_that("get_tool_signatures returns formatted signatures", {
    mock_tool <- tool(
        name = "search_docs",
        description = "Search documentation by query",
        parameters = z_object(
            query = z_string("Search query string"),
            limit = z_number("Max results to return")
        ),
        execute = function(args) "results"
    )

    sandbox <- SandboxManager$new(
        tools = list(mock_tool),
        preload_packages = character(0)
    )

    sigs <- sandbox$get_tool_signatures()
    expect_true(grepl("search_docs", sigs))
    expect_true(grepl("query", sigs))
    expect_true(grepl("limit", sigs))
    expect_true(grepl("Search documentation", sigs))
})

test_that("get_tool_signatures returns empty for no tools", {
    sandbox <- SandboxManager$new(tools = list(), preload_packages = character(0))
    expect_equal(sandbox$get_tool_signatures(), "")
})

# ============================================================================
# SandboxManager: Reset
# ============================================================================

test_that("reset clears user variables but keeps tools", {
    mock_tool <- tool(
        name = "ping",
        description = "Returns pong",
        execute = function() "pong"
    )

    sandbox <- SandboxManager$new(
        tools = list(mock_tool),
        preload_packages = character(0)
    )

    sandbox$execute("user_var <- 'hello'")
    sandbox$reset()

    # User variable should be gone
    result <- sandbox$execute("
    tryCatch(print(user_var), error = function(e) print('gone'))
  ")
    expect_true(grepl("gone|not found", result))

    # But tool should still work
    result2 <- sandbox$execute("print(ping())")
    expect_true(grepl("pong", result2))
})

# ============================================================================
# SandboxManager: Preloaded Packages
# ============================================================================

test_that("dplyr functions are available when preloaded", {
    skip_if_not_installed("dplyr")

    sandbox <- SandboxManager$new(
        tools = list(),
        preload_packages = c("dplyr")
    )

    result <- sandbox$execute("
    df <- data.frame(x = 1:5, y = c(10, 20, 30, 40, 50))
    filtered <- filter(df, y > 25)
    print(nrow(filtered))
  ")
    expect_true(grepl("3", result))
})

test_that("purrr functions are available when preloaded", {
    skip_if_not_installed("purrr")

    sandbox <- SandboxManager$new(
        tools = list(),
        preload_packages = c("purrr")
    )

    result <- sandbox$execute("
    result <- map_dbl(1:5, ~ .x * 2)
    print(result)
  ")
    expect_true(grepl("2.*4.*6.*8.*10", result))
})

# ============================================================================
# create_r_code_tool
# ============================================================================

test_that("create_r_code_tool returns a valid Tool", {
    sandbox <- SandboxManager$new(tools = list(), preload_packages = character(0))
    r_tool <- create_r_code_tool(sandbox)

    expect_s3_class(r_tool, "Tool")
    expect_equal(r_tool$name, "execute_r_code")
    expect_true(grepl("execute_r_code|R code", r_tool$description, ignore.case = TRUE))
})

test_that("create_r_code_tool includes tool names in description", {
    mock_tool <- tool(
        name = "fetch_report",
        description = "Fetch a report by ID",
        parameters = z_object(id = z_string("Report ID")),
        execute = function(args) "report data"
    )

    sandbox <- SandboxManager$new(
        tools = list(mock_tool),
        preload_packages = character(0)
    )

    r_tool <- create_r_code_tool(sandbox)
    expect_true(grepl("fetch_report", r_tool$description))
})

test_that("create_r_code_tool$run() executes code", {
    mock_tool <- tool(
        name = "double_it",
        description = "Doubles a number",
        parameters = z_object(x = z_number("Number")),
        execute = function(args) args$x * 2
    )

    sandbox <- SandboxManager$new(
        tools = list(mock_tool),
        preload_packages = character(0)
    )

    r_tool <- create_r_code_tool(sandbox)
    result <- r_tool$run(list(code = "print(double_it(21))"))
    expect_true(grepl("42", result))
})

test_that("create_r_code_tool validates input", {
    expect_error(create_r_code_tool("not a sandbox"), "SandboxManager")
})

# ============================================================================
# create_sandbox_system_prompt
# ============================================================================

test_that("create_sandbox_system_prompt returns instructions", {
    mock_tool <- tool(
        name = "query_db",
        description = "Execute SQL query",
        parameters = z_object(sql = z_string("SQL query")),
        execute = function(args) "results"
    )

    sandbox <- SandboxManager$new(
        tools = list(mock_tool),
        preload_packages = character(0)
    )

    prompt <- create_sandbox_system_prompt(sandbox)
    expect_true(grepl("R Code Execution Environment", prompt))
    expect_true(grepl("query_db", prompt))
    expect_true(grepl("dplyr|filter|print", prompt))
})

test_that("create_sandbox_system_prompt validates input", {
    expect_error(create_sandbox_system_prompt("not a sandbox"), "SandboxManager")
})

# ============================================================================
# SandboxManager: Print Method
# ============================================================================

test_that("SandboxManager prints correctly", {
    mock_tool <- tool("t1", "Tool 1", execute = function() "ok")
    sandbox <- SandboxManager$new(tools = list(mock_tool), preload_packages = character(0))

    output <- capture.output(print(sandbox))
    expect_true(any(grepl("SandboxManager", output)))
    expect_true(any(grepl("Tools: 1", output)))
    expect_true(any(grepl("t1", output)))
})

# ============================================================================
# SandboxManager: Parent Environment Integration
# ============================================================================

test_that("Sandbox with parent_env inherits variables", {
    parent <- new.env(parent = emptyenv())
    parent$shared_data <- "from_parent"

    sandbox <- SandboxManager$new(
        tools = list(),
        preload_packages = character(0),
        parent_env = parent
    )

    result <- sandbox$execute("print(shared_data)")
    expect_true(grepl("from_parent", result))
})

# ============================================================================
# SandboxManager: list_tools
# ============================================================================

test_that("list_tools returns names of bound tools", {
    t1 <- tool("tool_a", "A", execute = function() "a")
    t2 <- tool("tool_b", "B", execute = function() "b")

    sandbox <- SandboxManager$new(
        tools = list(t1, t2),
        preload_packages = character(0)
    )

    expect_equal(sort(sandbox$list_tools()), c("tool_a", "tool_b"))
})

# ============================================================================
# SandboxManager: Self-correction scenario
# ============================================================================

test_that("Error message is clear enough for LLM self-correction", {
    sandbox <- SandboxManager$new(tools = list(), preload_packages = character(0))

    # Simulate a typical LLM mistake: wrong variable name
    result <- sandbox$execute("
    my_data <- data.frame(x = 1:3)
    print(mydata)
  ")
    expect_true(grepl("Error", result))
    expect_true(grepl("mydata|not found", result))
})

Try the aisdk package in your browser

Any scripts or data that you put into this service are public.

aisdk documentation built on May 29, 2026, 9:07 a.m.