R/write_attributes.R

Defines functions attributes_to_yaml get_number_type write_attributes

Documented in write_attributes

#' @title Create a template yaml file for supplying attribute metadata for a
#' tabular data object
#'
#' @description write_attributes creates a template as a yaml file for
#' supplying attribute metadata for a tabular data object that resides in the R
#' environment.
#'
#' @details The yaml template generated by write_attributes includes the field
#' names of the data entity. The number type, column class (e.g., factor,
#' numeric), minimum and maximum values (if numeric), and missing value code
#' and explanation (if provided) for each field. The template supports input of
#' format string, unit, definition, and attribute definition. The yaml file is
#' written with the name of the data object in R + "_attrs". The
#' create_dataTable function will search for this file will creating a EML
#' dataTable entity.
#'
#' @param dfname
#' (character) The quoted or unquoted name of the data object (data frame,
#' tibble, simple feature).
#' @param overwrite
#' (logical) Logical indicating if an existing attributes file in the target
#' directory should be overwritten.
#' @param return_type
#' (character) Quoted designator indicating the value returned as either a
#' attributes template yaml file (return_type = "yaml", the default) or a list
#' of entity attributes (return_type = "attributes") constructed from the data
#' entity, the latter primarily as a helper feature for updating an existing
#' attributes file.
#'
#' @import dplyr
#' @import yaml
#' @importFrom purrr map_chr map2
#' @importFrom sf st_drop_geometry
#' @importFrom stats na.omit
#' @importFrom lubridate is.POSIXt is.POSIXlt is.POSIXct
#'
#' @return The name of the file generated is returned, and a template for
#' providing attribute metadata as a yaml file with the file name of the R data
#' object + "_attrs.yaml" is created in the working directory.
#'
#' @examples
#' \dontrun{
#'
#'  # create attributes file for mycars data object, overwriting an existing
#'  # file if one exists
#'
#'  mycars <- head(mtcars)
#'
#'  capeml::write_attributes(
#'    dfname    = mycars,
#'    overwrite = TRUE
#'  )
#'
#' }
#'
#' @export
#'
write_attributes <- function(
  dfname,
  overwrite   = FALSE,
  return_type = "yaml"
  ) {

  # get text reference of dataframe name for use throughout -------------------

  if (rlang::is_expression(dfname)) {

    namestr <- rlang::get_expr(dfname)

  } else {

    namestr <- deparse(substitute(dfname))

  }


  # load object from environment ----------------------------------------------

  data_object <- get(namestr)


  # do not write geometry column(s) if simple features

  if (class(data_object)[[1]] == "sf") {

    data_object <- data_object |>
    sf::st_drop_geometry()

  }


  attribute_list <- purrr::map2(
    .x = data_object,
    .y = colnames(data_object),
    .f = attributes_to_yaml
  )


  if (grepl("yaml", return_type, ignore.case = TRUE)) {

    # establish yaml object name for checking if exists and writing to file
    file_name   <- paste0(namestr, "_attrs.yaml")

    # check if attributes already exists for given data entity
    if (file.exists(file_name) && overwrite == FALSE) {

      stop(
        paste0("file ", file_name, " already exists, use write_attributes(overwrite = TRUE) to overwrite")
      )

    }

    attribute_yaml <- yaml::as.yaml(attribute_list)

    yaml::write_yaml(
      x    = attribute_yaml,
      file = file_name
    )

    message(paste0("constructed attribute yaml: ", file_name))

  } else if (grepl("attr", return_type, ignore.case = TRUE)) {

    return(attribute_list)

  } else {

    stop("ambiguous return_type, should be 'yaml' or 'attributes'")

  }

}


#' @description a helper function used by write_attributes to determine the
#' type of a numeric variable
#'
#' @note internal to write_attributes and not exported
#'
#' @param numeric_object
#' (numeric) numeric object (e.g., 2, 3.2)

get_number_type <- function(numeric_object) {

  raw <- na.omit(numeric_object)
  raw <- raw[is.finite(raw)] # remove infs (just in case)

  rounded <- floor(raw)

  if (length(raw) - sum(raw == rounded, na.rm = TRUE) > 0) {

    number_type <- "real" # all

  } else if (min(raw, na.rm = TRUE) > 0) {

    number_type <- "natural" # 1, 2, 3, ... (sans 0)

  } else if (min(raw, na.rm = TRUE) < 0) {

    number_type <- "integer" # whole + negative values

  } else {

    number_type  <- "whole" # natural + 0

  }

  return(number_type)

}


#' @description a helper function used by write_attributes to construct a
#' type-specific yaml entry for each variable of a data object
#'
#' @note internal to write_attributes and not exported
#'
#' @param variable
#' (column) Column or variable of a data entity (e.g., mtcars$mpg)
#'
#' @param varName
#' (character) Name of @variable (e.g., "mpg" from above example)
#' @examples
#'
#' \dontrun{
#'
#'  attributes_to_yaml(
#'    variable = mtcars$mpg, "mpg",
#'    varName  = "mpg"
#'  )
#'
#'  attribute_list <- purrr::map2(
#'    .x = mtcars,
#'    .y = colnames(mtcars),
#'    .f = attributes_to_yaml
#'  )
#'
#'
#' }
#'
attributes_to_yaml <- function(variable, varName) {

  # helper function to check the class of a variable; the column class of a
  # spatial file can be a vector so pull the first entity only
  check_class <- function(x) { class(x)[[1]] }

  variableAttributes <- list(
    attributeName       = varName,
    attributeDefinition = "",
    propertyURI         = "",
    propertyLabel       = "",
    valueURI            = "",
    valueLabel          = ""
  )

  if (is.numeric(variable)) {

    variableAttributes <- c(
      variableAttributes,
      unit       = "",
      numberType = get_number_type(variable),
      minimum    = min(variable, na.rm = TRUE),
      maximum    = max(variable, na.rm = TRUE)
    )

    if (is.integer(variable)) {

      variableAttributes <- c(
        variableAttributes,
        columnClasses = "numeric"
      )

    } else {

      variableAttributes <- c(
        variableAttributes,
        columnClasses = check_class(variable)
      )

    }

  } else if (is.character(variable)) {

    variableAttributes <- c(
      variableAttributes,
      columnClasses = check_class(variable),
      definition    = ""
    )

  } else if (is.factor(variable)) {

    variableAttributes <- c(
      variableAttributes,
      columnClasses = check_class(variable)
    )

  } else if (
      lubridate::is.Date(variable) ||
      lubridate::is.POSIXt(variable) ||
      lubridate::is.POSIXlt(variable) ||
      lubridate::is.POSIXct(variable)
    ) {

    variableAttributes <- c(
      variableAttributes,
      columnClasses = "Date",
      formatString  = "YYYY-MM-DD"
    )

  } else {

    stop("dataframe has a variable for which the class could not be determined")

  }

  return(variableAttributes)

}
CAPLTER/capeml documentation built on April 3, 2024, 11:17 p.m.