knitr::opts_chunk$set(echo = TRUE, eval = FALSE)

Download a data object

cn <- dataone::CNode()
data_info <- dataone::resolve(cn, "urn:uuid:45dd7166-f4d3-4d32-abb9-f402af6fb237")
print(data_info)

data <- read.csv(data_info$data$url, header = FALSE)
print(head(data))

Read EML

Generally, you can read EML into R in the following way:

eml <- EML::read_eml(rawToChar(dataone::getObject(cn, pkg$metadata)))

In some cases, this is needed:

obj <- dataone::getObject(cn, pkg$metadata)
char <- rawToChar(obj)
eml1 <- EML::read_eml(stringr::str_replace(char, "\n$", ""))

Package relationships

To get the PIDs for the metadata, data, and resource map objects in a data package, you can run:

pid <- "doi:10.6073/pasta/c89e6f25b1ac3b0bf8f13ea538fc490c"
cn <- dataone::CNode()
pkg <- arcticdatautils::get_package(cn, pid)

print(pkg)

This will also work for a specific data object, but it gives a warning message.

pid <- "https://pasta.lternet.edu/package/data/eml/knb-lter-cap/651/1/c36ce118a59ed1508f963ea3ab5453b2"
pkg <- arcticdatautils::get_package(cn, pid)

print(pkg)

Download a package

You can download a package to a directory on your computer using this code:

# Use a query to identify the member node and resource map PID
result <- dataone::query(dataone::CNode(), 
                         list(q='id:"doi:10.18739/A2MQ00"'), 
                         as="data.frame")
rm_pid <- result$resourceMap[1]
mn <- dataone::getMNode(cn, result$datasource[1])

package_path <- dataone::getPackage(mn, rm_pid)
unzip(package_path, exdir = ".") #exdir = directory to extract package to

The downloaded package will be stored in a folder named after the resource map, which contains a "data" folder with the metadata, resource map, and data objects.

Dom's method

See this vignette for more details:

Switching between DataOne/LTER Member Nodes Suppose we want to download this package from LTER: https://search.dataone.org/#view/https://pasta.lternet.edu/package/metadata/eml/knb-lter-ntl/276/13

We first need to tell R that we're changing member nodes. The fastest way of doing this is to reset your mn with the guess_member_node function. Note: This function accepts metadata and resource map pids, or DataOne package URLs as inputs

# The following three calls return the same member node (mn)
# Metadata pid input: 
mn <- guess_member_node("https://pasta.lternet.edu/package/metadata/eml/knb-lter-ntl/276/13")
# Resource map pid input: 
mn <- guess_member_node("doi:10.6073/pasta/97c6d83a5f6a0a065ef3209fcb491b6e")
# Package URL input: 
mn <- guess_member_node("https://search.dataone.org/#view/https://pasta.lternet.edu/package/metadata/eml/knb-lter-ntl/276/13")


# We can now use any of the above functions:
get_eml_attributes_url(mn, "https://search.dataone.org/#view/https://pasta.lternet.edu/package/metadata/eml/knb-lter-ntl/276/13")

Getting attributes for a specific data object

(work in progress)

#attributes for a specific data object ==================
library(tidyverse)
data_pid <- "urn:uuid:84276deb-95c1-4f99-a1ae-e795cd6fbee3"
data_pid <- "https://pasta.lternet.edu/package/data/eml/knb-lter-arc/20036/8/58ea279a915035de63dd49efe5199b91"
data_pid <- "https://pasta.lternet.edu/package/data/eml/knb-lter-arc/20036/8/3d01ac18a00f43002baab41784029ca7" #no attr

result <- dataone::query(dataone::CNode(), 
                         list(q = paste0('id:"', data_pid, '"')), 
                         as="data.frame")

rm_pid <- result$resourceMap[1] %>% stringr::str_replace(".* ", "")
meta_pid <- result$isDocumentedBy[1] %>% stringr::str_replace(".* ", "")
mn <- dataone::getMNode(dataone::CNode(), result$datasource[1])

eml <- EML::read_eml(rawToChar(dataone::getObject(mn, meta_pid)))

dT_url <- stringr::str_detect(EML::eml_get(eml@dataset@dataTable, "url"), data_pid)
oE_url <- stringr::str_detect(EML::eml_get(eml@dataset@otherEntity, "url"), data_pid)

if(sum(dT_url) == 1) {
    data_info <- eml@dataset@dataTable[which(dT_url)]
} else if(sum(oE_url) == 1) {
    data_info <- eml@dataset@otherEntity[which(oE_url)]
} else {
    stop("The data could not be found")
}

if(length(data_info[[1]]@attributeList@.Data) == 0) {
    stop("This data does not have any attributes")
} else {
    attr <- EML::get_attributes(data_info[[1]]@attributeList)
}

raw <- dataone::getObject(mn, data_pid)

Notes from Peter

Useful dataone functions to keep in mind:

pkg <- getDataPackage()
do <- getMember(pkg) #data object
getData(do)


isteves/dataimport documentation built on May 7, 2019, 8:39 a.m.