Nothing
#' Enable LLM to draft and execute R code
#'
#' This function adds a prompt wrap to a [tidyprompt()] that instructs the
#' LLM to answer the prompt with R code. There are various options to customize
#' the behavior of this prompt wrap, concerning the evaluation of the R code,
#' the packages that may be used, the objects that already exist in the R
#' session, and if the console output that should be sent back to the LLM.
#'
#' @details For the evaluation of the R code, the 'callr' package is required.
#' Please note: automatic evaluation of generated R code may be dangerous to your
#' system; you must use this function with caution.
#'
#' @param prompt A single string or a [tidyprompt()] object
#'
#' @param add_text Single string which will be added to the prompt text,
#' informing the LLM that they must code in R to answer the prompt
#'
#' @param pkgs_to_use A character vector of package names that may be used
#' in the R code that the LLM will generate. If evaluating the R code, these
#' packages will be pre-loaded in the R session
#'
#' @param objects_to_use A named list of objects that may be used in the R code
#' that the LLM will generate. If evaluating the R code, these objects will be pre-loaded
#' in the R session. The names of the list will be used as the object names in the
#' R session
#'
#' @param list_packages Logical indicating whether the LLM should be informed
#' about the packages that may be used in their R code (if TRUE, a list of the
#' loaded packages will be shown in the initial prompt)
#'
#' @param list_objects Logical indicating whether the LLM should be informed
#' about the existence of 'objects_to_use' (if TRUE, a list of the objects
#' plus their types will be shown in the initial prompt)
#'
#' @param skim_dataframes Logical indicating whether the LLM should be informed
#' about the structure of dataframes present in 'objects_to_use' (if TRUE,
#' a skim summary of each `data.frame` type object will be shown in the initial prompt).
#' This uses the function [skim_with_labels_and_levels()]
#'
#' @param evaluate_code Logical indicating whether the R code should be
#' evaluated. If TRUE, the R code will be evaluated in a separate R session
#' (using 'callr' to create an isolated R session via \link[callr]{r_session}).
#' Note that setting this to 'TRUE' means that code generated by the LLM will
#' run on your system; use this setting with caution
#'
#' @param r_session_options A list of options to pass to the \link[callr]{r_session}.
#' This can be used to customize the R session. See \link[callr]{r_session_options}
#' for the available options. If no options are provided, the default options
#' will be used but with 'system_profile' and 'user_profile' set to FALSE
#'
#' @param output_as_tool Logical indicating whether the console output of the
#' evaluated R code should be sent back to the LLM, meaning the LLM will use
#' R code as a tool to formulate a final answer to the prompt. If TRUE, the LLM
#' can decide if they can answer the prompt with the output, or if they need to modify
#' their R code. Once the LLM does not provide new R code (i.e., the prompt is being answered)
#' this prompt wrap will end (it will continue for as long as the LLM provides R code).
#' When this option is enabled, the resulting [prompt_wrap()] will be of type 'tool'.
#' If TRUE, the return mode will also always be set to 'llm_answer'
#'
#' @param return_mode Single string indicating the return mode. One of:
#' \itemize{
#' \item 'full': Return a list with the final LLM answer, the extracted R code,
#' and (if argument 'evaluate_code' is TRUE) the output of the R code
#' \item 'code': Return only the extracted R code
#' \item 'console': Return only the console output of the evaluated R code
#' \item 'object': Return only the object produced by the evaluated R code
#' \item 'formatted_output': Return a formatted string with the extracted R code
#' and its console output, and a print of the last object (this is identical to how it would
#' be presented to the LLM if 'output_as_tool' is TRUE)
#' \item 'llm_answer': Return only the final LLM answer
#' }
#' When choosing 'console' or 'object', an additional instruction will be added to
#' the prompt text to inform the LLM about the expected output of the R code.
#' If 'output_as_tool' is TRUE, the return mode will always be set to 'llm_answer'
#' (as the LLM will be using the R code as a tool to answer the prompt)
#'
#' @return A [tidyprompt()] object with the [prompt_wrap()] added to it, which
#' will handle R code generation and possibly evaluation
#'
#' @export
#'
#' @example inst/examples/answer_using_r.R
#'
#' @seealso [answer_using_tools()]
#'
#' @family pre_built_prompt_wraps
#' @family answer_using_prompt_wraps
answer_using_r <- function(
prompt,
add_text = "You must code in the programming language 'R' to answer this prompt.",
pkgs_to_use = c(),
objects_to_use = list(),
list_packages = TRUE,
list_objects = TRUE,
skim_dataframes = TRUE,
evaluate_code = FALSE,
r_session_options = list(),
output_as_tool = FALSE,
return_mode = c(
"full",
"code",
"console",
"object",
"formatted_output",
"llm_answer"
)
) {
## Validate settings
return_mode <- match.arg(return_mode)
stopifnot(
is.character(add_text), length(add_text) == 1,
length(pkgs_to_use) == 0 || is.vector(pkgs_to_use) & all(sapply(pkgs_to_use, is.character)),
is.list(objects_to_use),
length(objects_to_use) == 0 || !is.null(names(objects_to_use)),
is.logical(list_packages),
is.logical(list_objects),
is.logical(skim_dataframes),
is.logical(evaluate_code),
is.logical(output_as_tool),
is.list(r_session_options)
)
if (evaluate_code & !requireNamespace("callr", quietly = TRUE))
stop("The 'callr' package is required to evaluate R code.")
if (!evaluate_code & output_as_tool)
output_as_tool <- FALSE
if (output_as_tool)
return_mode <- "llm_answer"
if (!evaluate_code & return_mode %in% c("console", "object", "formatted_output"))
stop("The return mode must be 'full', 'code', or 'llm_answer' if 'evaluate_code' is FALSE")
## Validate evaluation_session & load packages
if (evaluate_code) {
if (length(r_session_options) == 0) {
r_session_options <- callr::r_session_options()
r_session_options$system_profile <- FALSE
r_session_options$user_profile <- FALSE
}
evaluation_session <- callr::r_session$new(options = r_session_options)
# Check if packages are installed using requireNamespace
installed_pkgs <- evaluation_session$run(function(pkgs_to_use) {
# Check if each package is installed using requireNamespace and return as a named list
sapply(pkgs_to_use, function(pkg) {
requireNamespace(pkg, quietly = TRUE)
}, simplify = TRUE, USE.NAMES = TRUE)
}, args = list(pkgs_to_use = pkgs_to_use))
if (any(installed_pkgs == FALSE)) {
stop(paste0(
"The following packages are not installed: ",
names(installed_pkgs)[installed_pkgs == FALSE]
))
}
# Load the packages
loaded_pkgs <- evaluation_session$run(function(pkgs_to_use) {
for (pkg_name in pkgs_to_use) {
library(pkg_name, character.only = TRUE)
}
session_info <- utils::sessionInfo()
loaded_pkgs <- names(session_info$otherPkgs)
loaded_pkgs
}, args = list(pkgs_to_use = pkgs_to_use))
# Load the objects
loaded_objects <- evaluation_session$run(function(objects_to_use) {
for (i in seq_along(objects_to_use)) {
obj <- objects_to_use[[i]]
obj_name <- names(objects_to_use)[i]
# Assign to the global environment of the r_session
assign(obj_name, obj, envir = parent.env(environment()))
}
# List objects in the global environment
ls(envir = .GlobalEnv)
}, args = list(objects_to_use = objects_to_use))
loaded_objects
if (!all(loaded_objects %in% names(objects_to_use))) {
stop(paste0(
"The following objects could not be loaded: ",
names(objects_to_use)[!(names(objects_to_use) %in% names(loaded_objects))]
))
}
} else {
loaded_pkgs <- pkgs_to_use
loaded_objects <- names(objects_to_use)
}
## Define modify_fn which will add information about the setting
## in which R code can be generated
modify_fn <- function(original_prompt_text) {
new_text <- glue::glue(
"{original_prompt_text}\n\n",
"{add_text}"
)
if (list_packages & length(loaded_pkgs) > 0) {
new_text <- glue::glue(
"{new_text}\n",
"You can use functions from these packages: ",
"{loaded_pkgs |> paste(collapse = ', ')}."
)
}
new_text <- glue::glue(
"{new_text}\n",
"You may not install or load any additional packages."
)
if (list_objects) {
object_types <- sapply(objects_to_use, function(obj) class(obj))
objects_df <- data.frame(Object_name = names(objects_to_use), Type = object_types)
if (nrow(objects_df) > 0) {
new_text <- glue::glue(
"{new_text}\n",
"These objects already exist in the R session:\n\n",
"{objects_df |> df_to_string()}.\n\n",
"Do not define these objects in your R code."
)
if (skim_dataframes) {
dataframes <- objects_df$Object_name[objects_df$Type == "data.frame"]
if (length(dataframes) > 0) {
if (!requireNamespace("skimr", quietly = TRUE)) {
warning(paste0(
"The 'skimr' package is required to skim dataframes.",
" Skim summary of dataframes currently not shown in prompt"
))
} else {
for (df_name in dataframes) {
df <- objects_to_use[[df_name]]
new_text <- glue::glue(
"{new_text}\n\n",
"Summary of the dataframe '{df_name}':\n",
"{df |> skim_with_labels_and_levels() |> df_to_string()}\n\n"
)
}
}
}
}
if (output_as_tool) {
new_text <- glue::glue(
"{new_text}\n",
"If you need more information about these objects,",
" you can call R functions to describe them."
)
}
}
}
if (evaluate_code & return_mode == "console") {
new_text <- glue::glue(
"{new_text}\n",
"The R code should produce console output that answers the prompt."
)
}
if (evaluate_code & return_mode == "object") {
new_text <- glue::glue(
"{new_text}\n",
"The R code should produce an object that answers the prompt."
)
}
if (output_as_tool) {
new_text <- glue::glue(
"{new_text}\n",
"The console output of your R code will be sent back to you.",
" Use print() on all objects or values that you need to see.",
" You can not view plots, all output must be text-based.",
" After you get console output from me, decide if you can answer the prompt or if",
" you need to modify your R code. When you can formulate your final answer,",
" do not provide any R code in it."
)
}
return(new_text)
}
## Define extraction_fn which will extract R code from the response
## and handle it according to the settings of this function
extraction_fn <- function(x) {
return_list <- list()
return_list$llm_answer <- x
extracted_code <- answer_using_r_extract_r_code(x)
if (length(extracted_code) == 0) {
if (output_as_tool) {
return(x)
}
return(llm_feedback(paste0(
"No R code detected. You must provide R code",
" between ```r and ```."
)))
}
# Check if the R code is valid
parsed_code <- tryCatch(parse(text = extracted_code), error = function(e) e)
if (inherits(parsed_code, "error")) {
return(llm_feedback(glue::glue(
"Invalid R code detected:\n",
" {parsed_code$message}\n",
"Please provide syntactically correct R code."
)))
}
return_list$code <- parsed_code
if (!evaluate_code) {
if (return_mode == "code") return(parsed_code)
if (return_mode == "llm_answer") return(x)
return(return_list)
}
clone_session <- evaluation_session$clone() # Reset the session every time
output <- clone_session$run_with_output(function(r_code) {
eval(parse(text = r_code))
}, args = list(parsed_code))
# Check if errors occurred during execution
if (!is.null(output$error)) {
return(llm_feedback(glue::glue(
"An error occurred while executing the R code:\n",
" {output$error}"
)))
}
# Check if the code produced any relevant output
if (output$stdout == "" & return_mode == "console") {
return(llm_feedback(glue::glue(
"The R code did not produce any console output.",
" Please provide R code that produces console output."
)))
}
if (is.null(output$result) & return_mode == "object") {
return(llm_feedback(glue::glue(
"The R code did not produce an object.",
" Please provide R code that produces an object."
)))
}
if (is.null(output$stdout) & is.null(output$result)) {
return(llm_feedback(glue::glue(
"The R code did not produce any output.",
" Please provide R code that produces output."
)))
}
return_list$output <- output
return_list$formatted_output <- glue::glue(
"--- R code: ---\n",
"{extracted_code |> paste(collapse = \"\\n\")}\n\n",
"--- Console output: ---\n",
"{
if (is.null(output$stdout) || output$stdout == \"\") {
\"No console output produced.\"
} else {
output$stdout |>
paste(collapse = \"\\n\") |>
stringr::str_trunc(1000) |>
print()
}
}\n\n",
"--- Last object: ---\n",
"{
if (is.null(output$result)) {
\"No object produced.\"
} else {
output$result |>
paste(collapse = \"\\n\") |>
stringr::str_trunc(100) |>
print()
}
}"
)
if (output_as_tool) {
return(llm_feedback(return_list$formatted_output, tool_result = TRUE))
}
if (return_mode == "full")
return(return_list)
if (return_mode == "code")
return(return_list$code)
if (return_mode == "console")
return(return_list$output$stdout)
if (return_mode == "object")
return(return_list$output$result)
if (return_mode == "formatted_output")
return(return_list$formatted_output)
if (return_mode == "llm_answer")
return(x)
return(output$stdout)
}
## If we are sending back output, we can consider this wrapper a tool
type <- "unspecified"
if (output_as_tool) {
type <- "tool"
}
## Finally, wrap the prompt with the new prompt wrap
prompt_wrap(
prompt, modify_fn, extraction_fn,
type = type, name = "answer_using_r"
)
}
#' Helper function to extract R code from a string
#'
#' This function extracts R code from a string by matching all content between
#' '```r' and '```'.
#'
#' @param input_string A string containing R code, typically a response
#' from an LLM
#'
#' @return A character vector containing the extracted R code
#'
#' @noRd
answer_using_r_extract_r_code <- function(input_string) {
# Use regular expression to match all content between ```r and ```, with case-insensitive matching
matches <- gregexpr("(?s)```[rR]\\s*(.*?)\\s*```", input_string, perl = TRUE)
extracted_code <- regmatches(input_string, matches)
# Remove the ```r and ``` wrappers (works for both lowercase and uppercase R)
extracted_code <- lapply(extracted_code, function(x) {
sub("(?s)```[rR]\\s*(.*?)\\s*```", "\\1", x, perl = TRUE)
})
return(unlist(extracted_code))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.