#' @title Create a template yaml file for supplying attribute metadata for a
#' tabular data object
#'
#' @description write_attributes creates a template as a yaml file for
#' supplying attribute metadata for a tabular data object that resides in the R
#' environment.
#'
#' @details The yaml template generated by write_attributes includes the field
#' names of the data entity. The number type, column class (e.g., factor,
#' numeric), minimum and maximum values (if numeric), and missing value code
#' and explanation (if provided) for each field. The template supports input of
#' format string, unit, definition, and attribute definition. The yaml file is
#' written with the name of the data object in R + "_attrs". The
#' create_dataTable function will search for this file will creating a EML
#' dataTable entity.
#'
#' @param dfname
#' (character) The quoted or unquoted name of the data object (data frame,
#' tibble, simple feature).
#' @param overwrite
#' (logical) Logical indicating if an existing attributes file in the target
#' directory should be overwritten.
#' @param return_type
#' (character) Quoted designator indicating the value returned as either a
#' attributes template yaml file (return_type = "yaml", the default) or a list
#' of entity attributes (return_type = "attributes") constructed from the data
#' entity, the latter primarily as a helper feature for updating an existing
#' attributes file.
#'
#' @import dplyr
#' @import yaml
#' @importFrom purrr map_chr map2
#' @importFrom sf st_drop_geometry
#' @importFrom stats na.omit
#' @importFrom lubridate is.POSIXt is.POSIXlt is.POSIXct
#'
#' @return The name of the file generated is returned, and a template for
#' providing attribute metadata as a yaml file with the file name of the R data
#' object + "_attrs.yaml" is created in the working directory.
#'
#' @examples
#' \dontrun{
#'
#' # create attributes file for mycars data object, overwriting an existing
#' # file if one exists
#'
#' mycars <- head(mtcars)
#'
#' capeml::write_attributes(
#' dfname = mycars,
#' overwrite = TRUE
#' )
#'
#' }
#'
#' @export
#'
write_attributes <- function(
dfname,
overwrite = FALSE,
return_type = "yaml"
) {
# get text reference of dataframe name for use throughout -------------------
if (rlang::is_expression(dfname)) {
namestr <- rlang::get_expr(dfname)
} else {
namestr <- deparse(substitute(dfname))
}
# load object from environment ----------------------------------------------
data_object <- get(namestr)
# do not write geometry column(s) if simple features
if (class(data_object)[[1]] == "sf") {
data_object <- data_object |>
sf::st_drop_geometry()
}
attribute_list <- purrr::map2(
.x = data_object,
.y = colnames(data_object),
.f = attributes_to_yaml
)
if (grepl("yaml", return_type, ignore.case = TRUE)) {
# establish yaml object name for checking if exists and writing to file
file_name <- paste0(namestr, "_attrs.yaml")
# check if attributes already exists for given data entity
if (file.exists(file_name) && overwrite == FALSE) {
stop(
paste0("file ", file_name, " already exists, use write_attributes(overwrite = TRUE) to overwrite")
)
}
attribute_yaml <- yaml::as.yaml(attribute_list)
yaml::write_yaml(
x = attribute_yaml,
file = file_name
)
message(paste0("constructed attribute yaml: ", file_name))
} else if (grepl("attr", return_type, ignore.case = TRUE)) {
return(attribute_list)
} else {
stop("ambiguous return_type, should be 'yaml' or 'attributes'")
}
}
#' @description a helper function used by write_attributes to determine the
#' type of a numeric variable
#'
#' @note internal to write_attributes and not exported
#'
#' @param numeric_object
#' (numeric) numeric object (e.g., 2, 3.2)
get_number_type <- function(numeric_object) {
raw <- na.omit(numeric_object)
raw <- raw[is.finite(raw)] # remove infs (just in case)
rounded <- floor(raw)
if (length(raw) - sum(raw == rounded, na.rm = TRUE) > 0) {
number_type <- "real" # all
} else if (min(raw, na.rm = TRUE) > 0) {
number_type <- "natural" # 1, 2, 3, ... (sans 0)
} else if (min(raw, na.rm = TRUE) < 0) {
number_type <- "integer" # whole + negative values
} else {
number_type <- "whole" # natural + 0
}
return(number_type)
}
#' @description a helper function used by write_attributes to construct a
#' type-specific yaml entry for each variable of a data object
#'
#' @note internal to write_attributes and not exported
#'
#' @param variable
#' (column) Column or variable of a data entity (e.g., mtcars$mpg)
#'
#' @param varName
#' (character) Name of @variable (e.g., "mpg" from above example)
#' @examples
#'
#' \dontrun{
#'
#' attributes_to_yaml(
#' variable = mtcars$mpg, "mpg",
#' varName = "mpg"
#' )
#'
#' attribute_list <- purrr::map2(
#' .x = mtcars,
#' .y = colnames(mtcars),
#' .f = attributes_to_yaml
#' )
#'
#'
#' }
#'
attributes_to_yaml <- function(variable, varName) {
# helper function to check the class of a variable; the column class of a
# spatial file can be a vector so pull the first entity only
check_class <- function(x) { class(x)[[1]] }
variableAttributes <- list(
attributeName = varName,
attributeDefinition = "",
propertyURI = "",
propertyLabel = "",
valueURI = "",
valueLabel = ""
)
if (is.numeric(variable)) {
variableAttributes <- c(
variableAttributes,
unit = "",
numberType = get_number_type(variable),
minimum = min(variable, na.rm = TRUE),
maximum = max(variable, na.rm = TRUE)
)
if (is.integer(variable)) {
variableAttributes <- c(
variableAttributes,
columnClasses = "numeric"
)
} else {
variableAttributes <- c(
variableAttributes,
columnClasses = check_class(variable)
)
}
} else if (is.character(variable)) {
variableAttributes <- c(
variableAttributes,
columnClasses = check_class(variable),
definition = ""
)
} else if (is.factor(variable)) {
variableAttributes <- c(
variableAttributes,
columnClasses = check_class(variable)
)
} else if (
lubridate::is.Date(variable) ||
lubridate::is.POSIXt(variable) ||
lubridate::is.POSIXlt(variable) ||
lubridate::is.POSIXct(variable)
) {
variableAttributes <- c(
variableAttributes,
columnClasses = "Date",
formatString = "YYYY-MM-DD"
)
} else {
stop("dataframe has a variable for which the class could not be determined")
}
return(variableAttributes)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.