Nothing
#' Save data using dot notation or file path
#'
#' @param data Data frame to save
#' @param path Either:
#'
#' - Dot notation: `inputs.raw.filename` resolves to inputs/raw/filename.rds
#' - Direct path: "inputs/raw/filename.csv" uses path as-is
#'
#' Dot notation uses your configured directories
#' (e.g., `inputs.raw`, `inputs.intermediate`, `outputs.private`).
#' @param type Type of data file ("csv" or "rds"). Auto-detected from extension if path includes one.
#' @param delimiter Delimiter for CSV files ("comma", "tab", "semicolon", "space")
#' @param locked Whether the file should be locked after saving
#' @param force If TRUE, creates missing directories. If FALSE (default), errors if directory doesn't exist.
#' @return Invisibly returns the saved data.
#' @export
data_save <- function(data, path, type = NULL, delimiter = "comma", locked = TRUE, force = FALSE) {
# Validate arguments
checkmate::assert_data_frame(data, min.rows = 1)
checkmate::assert_string(path, min.chars = 1)
checkmate::assert_choice(delimiter, c("comma", "tab", "semicolon", "space"))
checkmate::assert_flag(locked)
checkmate::assert_flag(force)
# Detect path format
has_slash <- grepl("/", path, fixed = TRUE)
has_extension <- grepl("\\.(csv|rds)$", path, ignore.case = TRUE)
if (has_slash || has_extension) {
# Direct file path mode
file_path <- path
# Auto-detect type from extension if not specified
if (is.null(type)) {
if (grepl("\\.csv$", path, ignore.case = TRUE)) {
type <- "csv"
} else if (grepl("\\.rds$", path, ignore.case = TRUE)) {
type <- "rds"
} else {
stop("Cannot auto-detect file type from path. Please specify 'type' parameter.")
}
}
# Ensure extension matches type
expected_ext <- paste0(".", type)
if (!grepl(paste0(expected_ext, "$"), path, ignore.case = TRUE)) {
file_path <- paste0(path, expected_ext)
message(sprintf("Note: Added .%s extension to path", type))
}
dir_path <- dirname(file_path)
file_name <- basename(file_path)
} else if (grepl("\\.", path)) {
# Dot notation mode - directory key required
# Example: "inputs.raw.filename" → inputs/raw/filename.rds
# Example: "outputs.private.results" → outputs/private/results.rds
parts <- strsplit(path, ".", fixed = TRUE)[[1]]
if (length(parts) < 3) {
stop("Dot notation requires at least three parts (e.g., 'inputs.raw.filename')")
}
# First two parts form the directory key (joined with underscore)
# e.g., "inputs.raw" -> "inputs_raw"
dir_key <- paste(parts[1:2], collapse = "_")
config_key <- sprintf("directories.%s", dir_key)
resolved_dir <- tryCatch(config(config_key), error = function(e) NULL)
if (is.null(resolved_dir)) {
# Get available directory keys for helpful error message
all_dirs <- tryCatch(config("directories"), error = function(e) list())
available_keys <- if (is.list(all_dirs)) names(all_dirs) else character(0)
# Convert keys to dot notation for display
display_keys <- gsub("_", ".", available_keys)
stop(sprintf(
"Unknown directory '%s.%s'. Use a configured directory.\n\nAvailable directories:\n %s\n\nExamples:\n data_save(df, 'inputs.raw.mydata')\n data_save(df, 'outputs.private.results')",
parts[1], parts[2],
paste(display_keys, collapse = ", ")
))
}
dir_path <- resolved_dir
file_base <- paste(parts[-(1:2)], collapse = "_")
# Default type to rds if not specified
if (is.null(type)) type <- "rds"
file_name <- paste0(file_base, ".", type)
file_path <- file.path(dir_path, file_name)
} else {
# Simple filename with no directory
stop(sprintf(
"Path '%s' has no directory. Either:\n - Use dot notation: 'inputs.raw.%s' or 'inputs.intermediate.%s'\n - Provide full path: 'inputs/raw/%s.%s'",
path, path, path, path, type %||% "rds"
))
}
# Validate type after resolution
checkmate::assert_choice(type, c("csv", "rds"))
# Check if directory exists
if (!dir.exists(dir_path)) {
if (force) {
dir.create(dir_path, recursive = TRUE, showWarnings = FALSE)
message(sprintf("Created directory: %s", dir_path))
} else {
stop(sprintf(
"Directory '%s' does not exist. Use force = TRUE to create it.",
dir_path
))
}
}
# Save data
switch(type,
csv = {
readr::write_csv(data, file_path)
},
rds = saveRDS(data, file_path),
stop(sprintf("Unsupported file type: %s", type))
)
message(sprintf("Data saved to: %s", file_path))
# Create normalized name for database tracking
# Use the original path input as the identifier
data_name <- path
# Calculate hash and update data record
current_hash <- tryCatch(
.calculate_file_hash(file_path),
error = function(e) {
stop(sprintf("Failed to calculate file hash for '%s': %s", file_path, e$message))
}
)
tryCatch(
.set_data(
name = data_name,
path = file_path,
type = type,
delimiter = if (type == "csv") delimiter else NA,
locked = locked,
hash = current_hash
),
error = function(e) {
stop(sprintf("Failed to update database record for '%s': %s", data_name, e$message))
}
)
message(sprintf("\u2713 Database updated: %s", data_name))
# Register saved file in settings so data_read() works without manual catalog edits
if (!has_slash && !has_extension && grepl("\\.", path)) {
# Only attempt to update config for dot-notation paths
tryCatch({
data_add(
file_path = file_path,
name = data_name,
type = type,
delimiter = delimiter,
locked = locked,
update_config = TRUE
)
}, error = function(e) {
warning(sprintf(
"File saved but could not update data catalog for '%s': %s",
data_name,
e$message
))
})
}
invisible(data)
}
#' Add an existing file to the data catalog
#'
#' Registers an existing data file with the Framework data catalog. This allows
#' you to track files that were created outside of Framework (e.g., downloaded
#' from external sources, copied from other projects) and use them with
#' `data_read()` using dot notation.
#'
#' @param file_path Path to the existing file (must exist)
#' @param name Optional dot notation name for the data catalog (e.g., `inputs.raw.survey_data`).
#' If NULL, derives name from file path relative to project root.
#' @param type Optional type override. Auto-detected from file extension if NULL.
#' @param delimiter Delimiter for CSV files ("comma", "tab", "semicolon", "space")
#' @param locked Whether the file should be locked (hash-verified on read)
#' @param update_config If TRUE (default), also updates the YAML config with the data spec
#'
#' @return Invisibly returns the data spec that was created
#'
#' @examples
#' \donttest{
#' if (FALSE) {
#' # Add a downloaded CSV file to the catalog
#' data_add("inputs/raw/survey_results.csv", name = "inputs.raw.survey_results")
#'
#' # Now you can read it with dot notation
#' data_read("inputs.raw.survey_results")
#'
#' # Add with auto-generated name
#' data_add("inputs/intermediate/cleaned_data.rds")
#' # Name will be derived as "inputs.intermediate.cleaned_data"
#' }
#' }
#'
#' @export
data_add <- function(file_path, name = NULL, type = NULL, delimiter = "comma",
locked = TRUE, update_config = TRUE) {
# Validate arguments
checkmate::assert_string(file_path, min.chars = 1)
checkmate::assert_string(name, min.chars = 1, null.ok = TRUE)
checkmate::assert_choice(type, c("csv", "rds", "tsv", "excel", "stata", "spss", "sas", "parquet"), null.ok = TRUE)
checkmate::assert_choice(delimiter, c("comma", "tab", "semicolon", "space"))
checkmate::assert_flag(locked)
checkmate::assert_flag(update_config)
# Check file exists
if (!file.exists(file_path)) {
stop(sprintf("File not found: %s", file_path))
}
# Normalize the path
file_path <- normalizePath(file_path, mustWork = TRUE)
# Auto-detect type from extension if not specified
if (is.null(type)) {
file_ext <- tolower(sub(".*\\.", "", file_path))
type <- switch(file_ext,
csv = "csv",
tsv = "tsv",
txt = "tsv",
dat = "tsv",
rds = "rds",
xlsx = "excel",
xls = "excel",
dta = "stata",
sav = "spss",
zsav = "spss",
por = "spss",
sas7bdat = "sas",
xpt = "sas",
parquet = "parquet",
stop(sprintf("Cannot auto-detect type for extension: %s. Please specify 'type' parameter.", file_ext))
)
}
# Generate name from file path if not provided
if (is.null(name)) {
# Try to make a sensible dot-notation name from the path
# e.g., "inputs/raw/survey.csv" -> "inputs.raw.survey"
# e.g., "inputs/intermediate/cleaned.rds" -> "inputs.intermediate.cleaned"
rel_path <- file_path
# Try to make path relative to common directories
for (dir_key in c("inputs_raw", "inputs_intermediate", "inputs_final", "inputs_reference")) {
dir_path <- tryCatch(config(sprintf("directories.%s", dir_key)), error = function(e) NULL)
if (!is.null(dir_path) && startsWith(file_path, normalizePath(dir_path, mustWork = FALSE))) {
rel_path <- sub(paste0("^", normalizePath(dir_path, mustWork = FALSE), "/?"), "", file_path)
# Convert directory key to dot notation (inputs_raw -> inputs.raw)
prefix <- gsub("_", ".", dir_key)
break
}
}
# Remove extension and convert path separators to dots
name_base <- sub("\\.[^.]+$", "", basename(rel_path))
if (exists("prefix")) {
name <- paste(prefix, name_base, sep = ".")
} else {
# Use filename without directory structure
name <- name_base
message(sprintf("Note: Using '%s' as data name. Consider specifying a more descriptive name.", name))
}
}
# Calculate hash
current_hash <- tryCatch(
.calculate_file_hash(file_path),
error = function(e) {
stop(sprintf("Failed to calculate file hash for '%s': %s", file_path, e$message))
}
)
# Determine delimiter value for storage
delimiter_value <- if (type == "csv") delimiter else NA
# Register in the database
tryCatch(
.set_data(
name = name,
path = file_path,
type = type,
delimiter = delimiter_value,
locked = locked,
hash = current_hash
),
error = function(e) {
stop(sprintf("Failed to register data in database: %s", e$message))
}
)
message(sprintf("Data registered in database: %s", name))
# Build spec for config
spec <- list(
path = file_path,
type = type
)
if (!is.na(delimiter_value)) {
spec$delimiter <- delimiter
}
if (locked) {
spec$locked <- TRUE
}
# Update YAML config if requested
if (update_config) {
tryCatch({
.data_spec_update(name, spec)
message(sprintf("Data spec added to config: %s", name))
}, error = function(e) {
warning(sprintf("Could not update config (database record was still created): %s", e$message))
})
}
message(sprintf("\nYou can now read this data with: data_read(\"%s\")", name))
invisible(spec)
}
#' Set a data value
#' @param name The data name
#' @param path The file path
#' @param type The data type (csv, rds, etc.)
#' @param delimiter The delimiter for CSV files
#' @param locked Whether the data is locked
#' @param hash The hash of the data
#' @keywords internal
.set_data <- function(name, path = NULL, type = NULL, delimiter = NULL, locked = FALSE, hash = NULL) {
# Validate arguments
checkmate::assert_string(name, min.chars = 1)
checkmate::assert_string(path, null.ok = TRUE, na.ok = TRUE)
checkmate::assert_string(type, null.ok = TRUE, na.ok = TRUE)
checkmate::assert_string(delimiter, null.ok = TRUE, na.ok = TRUE)
checkmate::assert_flag(locked)
checkmate::assert_string(hash, null.ok = TRUE, na.ok = TRUE)
# Get database connection
con <- tryCatch(
.get_db_connection(),
error = function(e) {
stop(sprintf("Failed to connect to database: %s", e$message))
}
)
on.exit(DBI::dbDisconnect(con), add = TRUE)
now <- lubridate::now()
# Convert booleans to integers for SQLite
locked_int <- as.integer(locked)
# Convert NULL values to NA for SQLite
path_value <- if (is.null(path)) NA else path
type_value <- if (is.null(type)) NA else type
delimiter_value <- if (is.null(delimiter)) NA else delimiter
hash_value <- if (is.null(hash)) NA else hash
# Check if entry exists
entry_exists <- tryCatch(
DBI::dbGetQuery(
con,
"SELECT 1 FROM data WHERE name = ?",
list(name)
),
error = function(e) {
stop(sprintf("Failed to check for existing data record: %s", e$message))
}
)
if (nrow(entry_exists) > 0) {
# Update existing entry
tryCatch(
DBI::dbExecute(
con,
"UPDATE data SET path = ?, type = ?, delimiter = ?, locked = ?, hash = ?, last_read_at = ?, updated_at = ? WHERE name = ?",
list(path_value, type_value, delimiter_value, locked_int, hash_value, now, now, name)
),
error = function(e) {
stop(sprintf("Failed to update data record: %s", e$message))
}
)
} else {
# Insert new entry
tryCatch(
DBI::dbExecute(
con,
"INSERT INTO data (name, path, type, delimiter, locked, hash, last_read_at, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
list(name, path_value, type_value, delimiter_value, locked_int, hash_value, now, now, now)
),
error = function(e) {
stop(sprintf("Failed to insert data record: %s", e$message))
}
)
}
invisible(NULL)
}
#' Remove a data value
#' @param name The data name to remove
#' @keywords internal
.remove_data <- function(name) {
con <- .get_db_connection()
DBI::dbExecute(
con,
"DELETE FROM data WHERE name = ?",
params = list(name)
)
DBI::dbDisconnect(con)
}
#' Update data spec in the correct YAML file
#'
#' Traverses a dot-notated key like "final.public.test" and updates or inserts
#' the given spec in the corresponding YAML file (either embedded in settings.yml
#' or in an external settings/data.yml file). Automatically handles nested paths
#' and creates intermediate structures as needed.
#'
#' @param path Dot notation key (e.g., "final.public.test") indicating where
#' to place the spec in the configuration hierarchy
#' @param spec A named list containing the data spec fields (path, type,
#' delimiter, locked, encrypted, etc.)
#'
#' @return Invisibly returns NULL. Function is called for its side effect of
#' updating the YAML configuration file.
#'
#' @keywords internal
.data_spec_update <- function(path, spec) {
# Validate arguments
checkmate::assert_string(path, min.chars = 1)
checkmate::assert_list(spec)
parts <- strsplit(path, "\\.")[[1]]
# Discover settings file
config_path <- .get_settings_file()
if (is.null(config_path)) {
stop("Configuration file 'settings.yml' (or legacy config.yml) not found")
}
# Load raw config to determine where `data` is defined
raw_config <- tryCatch(
yaml::read_yaml(config_path, eval.expr = FALSE),
error = function(e) {
stop(sprintf("Failed to read %s: %s", config_path, e$message))
}
)
# Support both environment-scoped and flat configs
has_envs <- !is.null(raw_config$default) && is.list(raw_config$default)
env_key <- if (has_envs) "default" else NULL
env_config <- if (has_envs) raw_config$default else raw_config
data_source <- env_config$data
# Determine if `data` is a path or inline
is_external <- is.character(data_source) && length(data_source) == 1
if (is_external) {
data_path <- data_source
current <- if (file.exists(data_path)) {
tryCatch({
external_content <- yaml::read_yaml(data_path, eval.expr = FALSE)
# External data files have a 'data' wrapper, extract it
if (!is.null(external_content$data)) {
external_content$data
} else {
external_content
}
},
error = function(e) {
stop(sprintf("Failed to read data file '%s': %s", data_path, e$message))
})
} else {
list()
}
} else {
data_path <- config_path
current <- if (is.null(data_source)) list() else data_source
}
# Build nested path and insert the spec
# We need to use a recursive approach since R lists are not reference types
if (length(parts) == 1) {
current[[parts[1]]] <- spec
} else {
# Build the nested structure by evaluating from the deepest level up
path_str <- paste0("current", paste0("[[\"", parts, "\"]]", collapse = ""))
assign_str <- paste0(path_str, " <- spec")
# Ensure intermediate paths exist
for (i in seq_along(parts)[-length(parts)]) {
intermediate_path <- paste0("current", paste0("[[\"", parts[1:i], "\"]]", collapse = ""))
eval_str <- paste0("if (is.null(", intermediate_path, ") || !is.list(", intermediate_path, ")) { ",
intermediate_path, " <- list() }")
eval(parse(text = eval_str))
}
# Assign the spec
eval(parse(text = assign_str))
}
# Write back to the correct location
tryCatch({
if (is_external) {
dir.create(dirname(data_path), recursive = TRUE, showWarnings = FALSE)
# Wrap in 'data' key when writing to external file
yaml::write_yaml(list(data = current), data_path)
} else {
env_config$data <- current
if (!is.null(env_key)) {
raw_config[[env_key]] <- env_config
} else {
raw_config <- env_config
}
yaml::write_yaml(raw_config, config_path)
}
}, error = function(e) {
stop(sprintf("Failed to write configuration: %s", e$message))
})
invisible(NULL)
}
#' Update data with hash in the data table
#' @param name The data name
#' @param hash The hash to store
#' @keywords internal
.update_data_with_hash <- function(name, hash) {
con <- .get_db_connection()
now <- lubridate::now()
# Check if entry exists
entry_exists <- DBI::dbGetQuery(
con,
"SELECT 1 FROM data WHERE name = ?",
list(name)
)
if (nrow(entry_exists) > 0) {
# Update existing entry
DBI::dbExecute(
con,
"UPDATE data SET hash = ?, last_read_at = ?, updated_at = ? WHERE name = ?",
list(hash, now, now, name)
)
} else {
# Insert new entry
DBI::dbExecute(
con,
"INSERT INTO data (name, hash, last_read_at, created_at, updated_at) VALUES (?, ?, ?, ?, ?)",
list(name, hash, now, now, now)
)
}
DBI::dbDisconnect(con)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.