R/get_eml_attributes.R

Defines functions get_eml_attributes_url download_eml_attributes get_eml_attributes list_depth

Documented in download_eml_attributes get_eml_attributes get_eml_attributes_url

# Return the maximum depth (or levels) of a list
# From: https://stackoverflow.com/questions/13432863/determine-level-of-nesting-in-r
list_depth <- function(input_list) {
    ifelse(is.list(input_list),
           1L + max(sapply(input_list, list_depth)),
           0L)
}


#' Return attribute (column) metadata from a DataONE metadata object
#'
#' Return attribute metadata from an EML object. This is largely a
#' wrapper for the function [EML::get_attributes()].
#'
#' @param doc (emld) EML object.
#'
#' @return (list) A list of all attribute metadata from the EML in data.frame objects
#'
#' @export
#'
#' @author Dominic Mullen, \email{dmullen17@@gmail.com}
#'
#' @examples
#' \dontrun{
#' cn <- dataone::CNode('PROD')
#' mn <- dataone::getMNode(cn, 'urn:node:ARCTIC')
#' doc <- EML::read_eml(rawToChar(dataone::getObject(mn, "doi:10.18739/A23W02")))
#' attributes <- get_eml_attributes(doc)
#'
#' # switch nodes
#' cn <- dataone::CNode('PROD')
#' knb <- dataone::getMNode(cn,"urn:node:KNB")
#' doc <- EML::read_eml(rawToChar(dataone::getObject(knb, "doi:10.5063/F1639MWV")))
#' attributes <- get_eml_attributes(doc)
#' }
get_eml_attributes <- function(doc) {
    # TODO - make sure it works for otherEntities
    stopifnot(methods::is(doc, "emld"))

    if (!is.null(names(doc$dataset$dataTable))){
        doc$dataset$dataTable <- list(doc$dataset$dataTable)
    }
    # add this in when otherEntity functionality is added
    # indices <- vector("numeric")
    # indices <- arcticdatautils::which_in_eml(doc$dataset$dataTable,
    #                         "attributeList",
    #                         function(x) {length(x) > 0})

    indices <- 1:length(doc$dataset$dataTable) # all dataTable have attributeLists so this is fine for now

    names <- vector("character", length = length(indices))
    results <- vector("list", length = length(indices))

    for (i in seq_along(indices)) {
        results[[i]] <- EML::get_attributes(doc$dataset$dataTable[[i]]$attributeList)
        names[i] <- doc$dataset$dataTable[[i]]$entityName
    }

    names(results) <- names

    # Unlist results if depth (levels) > 2
    if (list_depth(results) > 2) {
        results <- unlist(results, recursive = FALSE)
    }

    return(results)
}


#' Download attribute (column) metadata from a DataONE metadata object to csv files
#'
#' Download attribute metadata from an EML object as csvs. The name
#' of each csv corresponds to the file name of the Data Object it describes.
#' This can be prepended with the package identifier by setting \code{prefix_file_names = TRUE} (recommended).
#'
#' @param doc (emld) EML object.
#' @param download_directory (character) Directory to download attribute metadata csvs to.
#' @param prefix_file_names (logical) Optional. Whether to prefix file names with the package metadata identifier.
#'   This is useful when downloading files from multiple packages to one directory.
#'
#' @export
#'
#' @author Dominic Mullen, \email{dmullen17@@gmail.com}
#'
#' @examples
#' \dontrun{
#  cn <- dataone::CNode('PROD')
#' mn <- dataone::getMNode(cn, 'urn:node:ARCTIC')
#' doc <- EML::read_eml(rawToChar(dataone::getObject(mn, "doi:10.18739/A23W02")))
#' attributes <- datamgmt::download_eml_attributes(doc, download_directory = tempdir(),
#' prefix_file_names = TRUE)
#'}
download_eml_attributes <- function(doc,
                                    download_directory,
                                    prefix_file_names = FALSE) {
    stopifnot(methods::is(doc, "emld"))
    stopifnot(file.exists(download_directory))
    stopifnot(is.logical(prefix_file_names))


    attributes <- get_eml_attributes(doc)

    prefix <- character(0)
    if (prefix_file_names == TRUE) {
        prefix <- EML::eml_get(doc, "packageId") %>%
            as.character() %>%
            remove_special_characters() %>%
            paste0("_")
    }

    file_names <- paste0(prefix, names(attributes)) %>%
        gsub(pattern = "\\..*\\.", replacement = "_") %>%
        paste0(".csv")

    file_paths <- file.path(download_directory, file_names)

    for (i in seq_along(attributes)) {
        if (!is.null(attributes[[i]])) {
            write.csv(data.frame(attributes[[i]]), file = file_paths[i], row.names = FALSE)
        }
    }

    return(invisible())
}


#' Return attribute (column) metadata from a DataONE package URL
#'
#' Return attribute metadata from an EML object or DataONE package URL.
#' This is largely a wrapper for the function [EML::get_attributes()].
#'
#' @param mn (MNode/CNode) The DataONE Node that stores the Metadata object, from \url{https://cn.dataone.org/cn/v2/node}
#' @param url_path (character) The URL of the DataONE Package.
#' @param write_to_csv (logical) Optional. Option whether to download the attribute metadata to csv files. Defaults to \code{FALSE}
#' @param prefix_file_names (logical) Optional. Whether to prefix file names with the package metadata identifier.
#'   This is useful when downloading files from multiple packages to one directory.
#' @param download_directory (character) Optional. Directory to download attribute metadata csv files to.
#'   Required if \code{write_to_csv} is \code{TRUE}.
#'
#' @return (list) A list of all attribute metadata from the EML in data.frame objects.
#'
#' @export
#'
#' @author Dominic Mullen, \email{dmullen17@@gmail.com}
#'
#' @examples
#' \dontrun{
#' attributes <- get_eml_attributes(mn,
#' "https://arcticdata.io/catalog/#view/doi:10.18739/A23W02")
#'
#' # Download attribute metadata in csv format:
#' attributes <- get_eml_attributes(mn,
#' "https://arcticdata.io/catalog/#view/doi:10.18739/A23W02",
#' write_to_csv = TRUE,
#' download_directory = tempdir())

#' # switch nodes
#' cn <- dataone::CNode('PROD')
#' knb <- dataone::getMNode(cn,"urn:node:KNB")
#' attributes <- get_eml_attributes(knb,
#' "https://knb.ecoinformatics.org/#view/doi:10.5063/F1639MWV")
#' }
get_eml_attributes_url <- function(mn,
                                   url_path,
                                   write_to_csv = FALSE,
                                   prefix_file_names = FALSE,
                                   download_directory = NULL) {
    stopifnot(methods::is(mn, "MNode"))
    stopifnot(is.character(url_path))
    stopifnot(is.logical(write_to_csv))
    if (!is.null(download_directory)){
        stopifnot(is.character(download_directory))
        stopifnot(file.exists(download_directory))
    }

    pid <- unlist(strsplit(url_path, "view/"))[[2]]
    doc <- EML::read_eml(rawToChar(dataone::getObject(mn, pid)))

    if (write_to_csv == TRUE) {
        download_eml_attributes(doc, download_directory, prefix_file_names)
    }

    results <- get_eml_attributes(doc)

    return(results)
}
NCEAS/datamgmt documentation built on June 5, 2023, 6:14 a.m.