#' @title create EML entity of type otherEntity
#'
#' @description \code{create_otherEntity} generates a EML entity of type
#' otherEntity.
#'
#' @details A otherEntity entity is created from a single file (e.g.,
#' desert_fertilization_sampling_sites.kml) or a directory. The resulting
#' entity is renamed with the package number + base file name + md5sum + file
#' extension. File extension is always .zip if the otherEntity is being created
#' by zipping a directory.
#'
#' @note \code{create_otherEntity} will look for a package identifier and
#' baseURL in config.yaml. `baseURL` must be provided. `package identifier`
#' must be provided if project naming is enabled.
#'
#' @note The target data file or directory can be located anywhere on a local
#' computer but a renamed file or directory will be written to the current
#' working directory.
#'
#' @param target_file_or_directory
#' (charater) The quoted name and path of the data file or directory.
#' @param description
#' (character) A description of the data entity.
#' @param overwrite
#' (logial) If creating otherEntity by zipping a directory, this is a logical
#' indicating whether to overwrite an existing zip file that has the same name
#' in the location as the temporary zip object to be created.
#' @param project_naming
#' (logical) Indicating if the file or directory should be renamed per the
#' style used by the CAP LTER (default) with the package number + base file
#' name + file extension. The passed file or directory name will be used if
#' this parameter is set to FALSE.
#' @param additional_information
#' (character) Additional information about the data object. Should be quoted,
#' and accepts (but not required) markdown formatting.
#' @param entity_value_description
#' (character) For raster data, a desciption of the raster value.
#'
#' @import EML
#' @importFrom dplyr select pull mutate select_if
#' @importFrom readr read_csv
#' @importFrom tools md5sum file_ext
#' @importFrom stringr str_extract
#' @importFrom tools file_ext
#' @importFrom utils file_test
#' @importFrom yaml yaml.load_file
#' @importFrom purrr map_chr
#'
#' @return EML entity of type otherEntity is returned. Additionally, the data
#' file or directory is renamed with the package number + base file name + file
#' extension if project naming is enabled.
#'
#' @examples
#' \dontrun{
#'
#' # using default parameters
#'
#' desert_fertilization_sites <- capeml::create_otherEntity(
#' target_file_or_directory = "path-to-file/sampling_sites.Rmd",
#' description = "long-term study site documentation"
#' )
#'
#' # set overwrite = TRUE - has the effect of overwriting an existing zipped
#' # directory of the same name if the otherEntity is being generated by
#' # zipping a directory.
#'
#' pass_codebook_2011 <- capeml::create_otherEntity(
#' target_file_or_directory = "PASS-2011-Codebook-Feb2016rev.pdf",
#' description = "PASS 2011 survey codebook",
#' overwrite = TRUE
#' )
#'
#' # example without project naming:
#'
#' pass_codebook_2011 <- capeml::create_otherEntity(
#' target_file_or_directory = "path-to-directory/Desktop/max_temperature",
#' description = "rasters of max temperature years 2000-2016",
#' project_naming = FALSE
#' )
#'
#' }
#'
#' @export
#'
create_otherEntity <- function(
target_file_or_directory,
description,
overwrite = FALSE,
project_naming = TRUE,
additional_information = NULL,
entity_value_description
) {
# required parameters --------------------------------------------------------
# description
if (missing("description")) {
stop("please provide a description for this object")
}
# target file or directly
if (missing("target_file_or_directory")) {
stop("specify the name of the file or directory")
}
# defaults -------------------------------------------------------------------
is_directory <- FALSE
is_shape <- FALSE
# zip if targetfile is a directory -------------------------------------------
# create zip of directory if target_file_or_directory is in fact a directory
if (file_test(op = "-d", x = target_file_or_directory)) {
# flag that target_file_or_directory is a directory
is_directory <- TRUE
# new object name: base name + zip extension
zippedObject <- paste0(basename(target_file_or_directory), ".zip")
# need full path to zip a directory
targetFileFullPath <- path.expand(target_file_or_directory)
# check if we are zipping a shapefile
if ("shp" %in% purrr::map_chr(.x = list.files(targetFileFullPath), ~ tools::file_ext(.x))) {
is_shape <- TRUE
}
# stop if zipping the directory will overwrite an existing object without
# explicit overwrite - note that this is checking the existence of the
# temporary object (e.g., dirname.zip), not the ultimate object (e.g.,
# packagenumber_dirname_md5hash.zip)
if (file.exists(zippedObject) && overwrite == FALSE) {
stop("zip with that name at that location already exists, set 'overwrite = TRUE'")
}
# zip the target directory
system(
paste0(
"zip -jXr ",
shQuote(zippedObject, type = "sh"),
" ",
shQuote(targetFileFullPath, type = "sh")
)
)
target_file_or_directory <- zippedObject
}
# object size, type, and hash ------------------------------------------------
# determine the file extension
fileExtension <- tools::file_ext(target_file_or_directory)
# set authentication (md5)
fileAuthentication <- EML::eml$authentication(method = "MD5")
fileAuthentication$authentication <- tools::md5sum(target_file_or_directory)
# set file size
fileSize <- EML::eml$size(unit = "byte")
fileSize$size <- deparse(file.size(target_file_or_directory))
# set file format
if (grepl("pdf", tools::file_ext(target_file_or_directory), ignore.case = TRUE)) {
fileDataFormat <- EML::eml$dataFormat(
externallyDefinedFormat = EML::eml$externallyDefinedFormat(formatName = "Portable Document Format")
)
} else if (is_shape == TRUE) {
fileDataFormat <- EML::eml$dataFormat(
externallyDefinedFormat = EML::eml$externallyDefinedFormat(formatName = "Esri Shapefile (zipped)")
)
} else {
fileDataFormat <- EML::eml$dataFormat(
externallyDefinedFormat = EML::eml$externallyDefinedFormat(formatName = fileExtension)
)
}
targetFileBaseName <- basename(target_file_or_directory)
directoryName <- dirname(target_file_or_directory)
directoryNameFull <- sub("/$", "", path.expand(directoryName))
pathToFile <- path.expand(target_file_or_directory)
# retrieve dataset details from config.yaml
configurations <- read_package_configuration()
# project naming -------------------------------------------------------------
if (project_naming == TRUE) {
identifier <- configurations$identifier
targetFileBaseName <- paste0(
identifier, "_",
stringr::str_extract(targetFileBaseName, "^[^\\.]*"),
".",
fileExtension
)
file.copy(
from = target_file_or_directory,
to = paste0(directoryNameFull, "/", targetFileBaseName)
)
}
# construct physical ---------------------------------------------------------
fileURL <- configurations$fileURL
fileDistribution <- EML::eml$distribution(
EML::eml$online(url = paste0(fileURL, targetFileBaseName))
)
filePhysical <- EML::eml$physical(
objectName = targetFileBaseName,
authentication = fileAuthentication,
size = fileSize,
dataFormat = fileDataFormat,
distribution = fileDistribution
)
# create otherEntity ---------------------------------------------------------
newOE <- EML::eml$otherEntity(
entityName = targetFileBaseName,
entityDescription = description,
physical = filePhysical,
entityType = fileExtension,
id = targetFileBaseName
)
# attributes -----------------------------------------------------------------
# helper function to remove missing columns
not_all_na <- function(x) {
!all(is.na(x))
}
if (file.exists(paste0(tools::file_path_sans_ext(target_file_or_directory), "_attrs.yaml"))) {
attrs <- yaml::yaml.load_file(paste0(tools::file_path_sans_ext(target_file_or_directory), "_attrs.yaml"))
attrs <- yaml::yaml.load(attrs)
attrs <- tibble::enframe(attrs) |>
tidyr::unnest_wider(value) |>
dplyr::select(-one_of("name"))
# column classes to vector (req'd by set_attributes)
classes <- attrs |>
dplyr::pull(columnClasses)
# - copy attributeDefinition to defintion as appropriate;
# - remove col classes from attrs (req'd by set_attributes);
# - remove empty columns (especially max & min, which
# can throw an error for data without any numeric cols)
attrs <- attrs |>
dplyr::mutate(
definition = case_when(
grepl("character", columnClasses) & ((is.na(definition) | definition == "")) ~ attributeDefinition,
TRUE ~ definition
)
) |>
dplyr::select(-columnClasses) |>
dplyr::select_if(not_all_na)
has_attributes <- TRUE
} else {
has_attributes <- FALSE
}
# factors --------------------------------------------------------------------
# use factors if exist
if (file.exists(paste0(tools::file_path_sans_ext(target_file_or_directory), "_factors.yaml"))) {
entity_factors <- yaml.load_file(paste0(tools::file_path_sans_ext(target_file_or_directory), "_factors.yaml")) |>
yaml::yaml.load() |>
tibble::enframe() |>
tidyr::unnest_wider(value) |>
tidyr::unnest_wider(attribute) |>
tidyr::unnest_longer(levels) |>
tidyr::unnest_wider(levels) |>
dplyr::select(-one_of("name"))
has_factors <- TRUE
} else {
has_factors <- FALSE
}
# compile components for attributeList ---------------------------------------
# condition: factors only
if (has_attributes == FALSE && has_factors == TRUE) {
if (missing(entity_value_description)) {
entity_value_description <- "METADATA_NOT_PROVIDED"
}
attrs_factor_only <- data.frame(
attributeName = entity_factors[["attributeName"]][[1]],
attributeDefinition = entity_value_description
)
attr_list <- EML::set_attributes(
attributes = attrs_factor_only,
factors = entity_factors,
col_classes = "factor"
)
has_factor_only <- TRUE
message("added: ", targetFileBaseName, " factors")
} else {
has_factor_only <- FALSE
}
# condition: attributes and factors
if (has_attributes == TRUE && has_factors == TRUE) {
attr_list <- EML::set_attributes(
attributes = attrs,
factors = entity_factors,
col_classes = classes
)
message("added: ", targetFileBaseName, " attributes")
message("added: ", targetFileBaseName, " factors")
}
# condition: attributes only
if (has_attributes == TRUE && has_factors == FALSE) {
attr_list <- EML::set_attributes(
attributes = attrs,
col_classes = classes
)
message("added: ", targetFileBaseName, " attributes")
}
# add attributeList
if (has_attributes == TRUE | has_factor_only == TRUE) {
newOE$attributeList <- attr_list
}
# additional information -----------------------------------------------------
if (!is.null(additional_information)) {
newOE$additionalInfo <- additional_information
}
# remove temporary objects ---------------------------------------------------
# remove temporary zip if otherEntity is created by zipping a directory
if (is_directory == TRUE && file.exists(zippedObject)) {
file.remove(zippedObject)
}
# return other entity object -------------------------------------------------
message("created otherEntity: ", targetFileBaseName, " sans attributes")
return(newOE)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.