R/write_factors.R

Defines functions write_factors

Documented in write_factors

#' @title Generate a template yaml file to detail metadata for attributes that
#' are factors
#'
#' @description \code{write_factors} creates a template as a yaml file for
#' supplying code definition metadata for factors in a tabular data object that
#' resides in the R environment.
#'
#' @details The yaml template generated by write_factors includes the field
#' names of all fields defined as factors, and each level of the factor. The
#' template supports the ability to provide a definition for each level. The
#' yaml file is written with the name of the data object in R + "_factors". The
#' create_dataTable function will search for this file when creating a EML
#' dataTable entity.
#'
#' @note The user must follow yaml convention to ensure that input is formatted
#' appropriately (e.g., quoting when needed; see
#' https://www.yaml.info/learn/quote.html).
#'
#' @param dfname
#'  (character) Unquoted name of the R data frame or tibble.
#' @param overwrite
#'  (logical) Logical indicating if an existing factors file in the target
#'  directory should be overwritten.
#'
#' @import dplyr
#' @import yaml
#' @importFrom purrr map
#' @importFrom sf st_drop_geometry
#'
#' @return The name of the file generated is returned, and a template for
#' providing code definition metadata as a yaml file with the file name of the
#' R data object + "_factors.yaml" is created in the working directory.
#'
#' @examples
#' \dontrun{
#'
#'  # overwrite existing factors file
#'
#'  capeml::write_factors(
#'    dfname    = data_object,
#'    overwrite = TRUE
#'  )
#'
#' }
#'
#' @export

write_factors <- function(dfname, overwrite = FALSE) {

  # establish object name for checking if exists and, ultimately, writing to file

  objectName <- paste0(deparse(substitute(dfname)), "_factors")
  fileName   <- paste0(objectName, ".yaml")

  # check if factors already exist for given data entity

  if (file.exists(fileName) & overwrite == FALSE) {

    stop(
      paste0("file ", fileName, " already exists, use write_factors(overwrite = TRUE) to overwrite")
    )

  }


  # helper function to map levels

  map_factor_levels <- function(level) {

    var_levs <- list(
      code       = level,
      definition = "metadata_not_provided"
    )

  }


  # helper function to map factors

  factors_to_yaml <- function(varName, df) {

    factors_yaml <- list(
      attribute = list(
        attributeName = varName,
        levels = map(
          .x = levels(df[[varName]]),
          .f = map_factor_levels
        )
      )
    )

    return(factors_yaml)

  }


  # drop geometry for sf objects

  if (class(dfname)[[1]] == "sf") {

    dfname <- dfname |>
      sf::st_drop_geometry()

  }


  # list of factors in target data entity (drop geometry for sf objects)

  list_of_factors <- dfname |>
    dplyr::select_if(is.factor) |>
    names()

  if (length(list_of_factors) == 0) {

    stop("data entity does not contain any variables of type factor")

  } else {

    # construct yaml entry for each factor
    factors_as_yaml <- yaml::as.yaml(
      purrr::map(
        .x = list_of_factors,
        .f = factors_to_yaml,
        df = dfname
      )
    )


    # write factors yaml to file
    yaml::write_yaml(
      x    = factors_as_yaml,
      file = fileName
    )

    message(paste0("constructed factors yaml: ", fileName))

    return(objectName)

  }

} # close function
CAPLTER/capeml documentation built on April 3, 2024, 11:17 p.m.