Nothing
#
# This work was created by participants in the DataONE project, and is
# jointly copyrighted by participating institutions in DataONE. For
# more information on DataONE, see our web site at http://dataone.org.
#
# Copyright 2011-2013
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#' @title Provides R API to DataONE Member Node services.
#' @description MNode provides functions that interact with a DataONE Member Node (MN). A MN
#' is a repository that provides access for reading and writing data and metadata using the
#' DataONE MN service API. The MN API includes functions for retrieving data and metadata based on its
#' unique persistent identifier (pid), as well as for creating, updating, and archiving these data and
#' metadata objects.
#' @details
#' Methods that perform write operations (such as createObject and updateObject) on the MN generally
#' require authentication. For MNs that have implemented the DataONE API version 2.0 and higher, these operations can utilize an
#' authentication token to provide credentials for write operations in DataONE.
#' The authentication token is obtained from DataONE (see your account profile on https://search.dataone.org).
#' See the \code{vignette("dataone-overview")} for details.
#' Alternatively, the version 1.0 approach of using an X.509 certificate in a default location of the file
#' system can also be used. This certificate provides authentication credentials from
#' CILogon \url{https://cilogon.org/?skin=DataONE}.
#' @slot endpoint The url to access node services, which is the baseURL plus the version string
#' @rdname MNode-class
#' @aliases MNode-class
#' @include D1Node.R
#' @include auth_request.R
#' @import methods
#' @importFrom utils URLencode
#' @section Methods:
#' \itemize{
#' \item{\code{\link{MNode}}}{: Create a MNode object representing a DataONE Member Node repository.}
#' \item{\code{\link{createObject}}}{: Create an object on a Member Node.}
#' \item{\code{\link{getObject}}}{: Get the bytes associated with an object on the Member Node}
#' \item{\code{\link{getCapabilities}}}{: Get the node capabilities description, and store the information in the MNode.}
#' \item{\code{\link{generateIdentifier}}}{: Get a unique identifier that is generated by the Member Node repository and guaranteed to be unique.}
#' \item{\code{\link{getPackage}}}{: Download a data package from a member node.}
#' \item{\code{\link{updateObject}}}{: Update an object to a Member Node, by creating a new object that replaces an original.}
#' \item{\code{\link{updateSystemMetadata}}}{: Update the system metadata associated with an object.}
#' }
#' @seealso \code{\link{dataone}}{ package description.}
#' @export
#' @examples \dontrun{
#' library(dataone)
#' library(uuid)
#' library(digest)
#' cn <- CNode("STAGING")
#' mn <- getMNode(cn, "urn:node:mnStageUCSB2")
#' mnid <- mn@identifier
#' # Have Dataone create an identifier for you (requires authentication)
#' \dontrun{
#' newid <- generateIdentifier(mn, "UUID")
#' }
#' # Create an identifier manually
#' newid <- paste("urn:uuid:", UUIDgenerate(), sep="")
#' testdf <- data.frame(x=1:10,y=11:20)
#' csvfile <- paste(tempfile(), ".csv", sep="")
#' write.csv(testdf, csvfile, row.names=FALSE)
#' f <- "text/csv"
#' size <- file.info(csvfile)$size
#' sha256 <- digest(csvfile, algo="sha256", serialize=FALSE, file=TRUE)
#' sysmeta <- new("SystemMetadata", identifier=newid, formatId=f, size=size,
#' checksum=sha256, originMemberNode=mnid, authoritativeMemberNode=mnid)
#' # Upload data to DataONE (requires authentication)
#' \dontrun{
#' response <- createObject(mn, newid, csvfile, sysmeta)
#' }
#' }
setClass("MNode", slots = c(endpoint = "character"), contains="D1Node")
#########################
## MNode constructors
#########################
#' Create a MNode object representing a DataONE Member Node repository.
#' @description Construct an instance of MNode to provide mechanisms to access, create, and update data and
#' metadata objects on the associated Member Node.
#' @details If the \code{'x'} is a string, it is treated as a URI and an attempt to find an associated
#' Member Node at that base URL is attempted. If \code{'x'} is a Node reference, then it is cast to a MNode
#' instance. This typically is used from the getMNode() function from the CNode class, which is the preferred
#' way to retrieve an instance of an MNode.
#' @param x a URI representing a base URL (i.e. https://knb.ecoinformatics.org/knb/d1/mn/v2); or a reference to a dataone::Node instance
#' @rdname MNode
#' @return the MNode object-
#' @seealso \code{\link[=MNode-class]{MNode}}{ class description.}
#' @export
#' @examples \dontrun{
#' mn <- MNode("https://knb.ecoinformatics.org/knb/d1/mn/v2")
#' }
setGeneric("MNode", function(x) {
standardGeneric("MNode")
})
#' @rdname MNode
#' @export
setMethod("MNode", signature("character"), function(x) {
## create new MNode object and insert uri endpoint
mnode <- new("MNode")
mnode@endpoint <- x
## Lookup the rest of the node information
xml <- getCapabilities(mnode)
# getCapabilties returns NULL if an error was encoutered
if (is.null(xml)) return(NULL)
mnode <- parseCapabilities(mnode, xmlRoot(xml))
# Set the service URL fragment for the solr query engine
mnode@serviceUrls <- data.frame(service="query.solr", Url=paste(mnode@endpoint, "query", "solr/", sep="/"), row.names = NULL, stringsAsFactors = FALSE)
# Determine if this node is in the production or a test environment.
# If it is is a test domain, then mark as a test node.
if(length(mnode@env) == 0) {
if (grepl("test.dataone.org", mnode@endpoint)) {
mnode@env <- "test"
} else {
# Only call the cn if there is no other way to determine
# the network that this node is in.
# List nodes in the production environment, if the node
# is not in the production enviroment, then it must be in
# a test environment.
cn <- CNode("PROD")
nodelist <- listNodes(cn)
match <- sapply(nodelist, function(x) {
x@identifier == mnode@identifier && x@type == "mn"
})
output.list <- nodelist[match]
if (length(output.list) == 1) {
mnode@env <- "prod"
} else {
mnode@env <- "test"
}
}
}
return(mnode)
})
#' @rdname MNode
#' @export
setMethod("MNode", signature("D1Node"), function(x) {
if (x@type == "mn") {
## create new MNode object and insert uri endpoint
mnode <- new("MNode")
mnode@identifier = x@identifier
mnode@name = x@name
mnode@description = x@description
mnode@baseURL = x@baseURL
mnode@subject = x@subject
mnode@contactSubject = x@contactSubject
mnode@replicate = x@replicate
mnode@type = x@type
mnode@state = x@state
mnode@services <- x@services
mnode@endpoint <- paste(x@baseURL, x@APIversion, sep="/")
# Set the service URL fragment for the solr query engine
mnode@serviceUrls <- data.frame(service="query.solr", Url=paste(mnode@endpoint, "query", "solr/", sep="/"), row.names = NULL, stringsAsFactors = FALSE)
mnode@APIversion <- x@APIversion
mnode@env <- x@env
return(mnode)
} else {
stop("Error: Node is not of type 'mn'.")
}
})
##########################
## Methods
##########################
#' Get the node capabilities description, and store the information in the MNode.
#' @description Access the DataONE getCapabilities() service for the Member Node, which returns an XML
#' description of the repository and the services it offers.
#' @rdname getCapabilities
#' @aliases getCapabilities
#' @param x The node identifier with which this node is registered in DataONE
#' @param ... (Not yet used.)
#' @return an XMLInternalDocument object representing the DataONE environment
#' @seealso \url{https://purl.dataone.org/architecture/apis/MN_APIs.html#MN_core.getCapabilities}
#' @import XML
#' @import httr
#' @export
#' @examples \dontrun{
#' library(dataone)
#' cn <- CNode()
#' mn <- getMNode(cn, "urn:node:KNB")
#' xml <- getCapabilities(mn)
#' }
setGeneric("getCapabilities", function(x, ...) {
standardGeneric("getCapabilities")
})
#' @rdname getCapabilities
setMethod("getCapabilities", signature("MNode"), function(x) {
url <- paste(x@endpoint, "node", sep="/")
# Don't need privileged access, so call GET directly vs auth_get
response <- GET(url, user_agent(get_user_agent()))
# Use charset 'utf-8' if not specified in response headers
charset <- "utf-8"
if(response$status_code != "200") {
stop(sprintf("Error accessing %s: %s\n", x@endpoint, getErrorDescription(response)))
} else {
if("content-type" %in% names(response$headers)) {
media <- parse_media(response$headers[['content-type']])
if("params" %in% names(media) && "charset" %in% names(media$params)) {
charset <- media$params$charset
}
}
}
xml <- xmlParse(content(response, as="text", encoding=charset))
return(xml)
})
#' @param check A logical value, if TRUE check if this object has been obsoleted by another object in DataONE.
#' @rdname getObject
setMethod("getObject", signature("MNode"), function(x, pid, check=as.logical(FALSE)) {
stopifnot(is.character(pid))
if(!inherits(check, "logical")) {
stop("Invalid argument: 'check' must be specified as a logical.")
}
# TODO: need to properly URL-escape the PID
url <- paste(x@endpoint, "object", URLencode(pid, reserved=T), sep="/")
# Check if the requested pid has been obsoleted by a newer version
# and print a warning
if (check) {
sysmeta <- getSystemMetadata(x, pid)
if (!is.na(sysmeta@obsoletedBy)) {
message(sprintf('Warning: pid "%s" is obsoleted by pid "%s"', pid, sysmeta@obsoletedBy))
}
}
response <- auth_get(url, node=x)
if (response$status_code != "200") {
stop(sprintf("get() error: %s\n", getErrorDescription(response)))
}
return(content(response, as = "raw"))
})
#' @import datapack
#' @export
#' @rdname getSystemMetadata
setMethod("getSystemMetadata", signature("MNode"), function(x, pid) {
stopifnot(is.character(pid))
url <- paste(x@endpoint, "meta", URLencode(pid, reserved=T), sep="/")
response <- auth_get(url, node=x)
# Use charset 'utf-8' if not specified in response headers
charset <- "utf-8"
if(response$status_code != "200") {
warning(sprintf("Error getting SystemMetadata: %s\n", getErrorDescription(response)))
return(NULL)
} else {
if("content-type" %in% names(response$headers)) {
media <- parse_media(response$headers[['content-type']])
if("params" %in% names(media) && "charset" %in% names(media$params)) {
charset <- media$params$charset
}
}
}
# Convert the response into a SystemMetadata object
sysmeta <- SystemMetadata(xmlRoot(xmlParse(content(response, as="text", encoding=charset))))
return(sysmeta)
})
#' @rdname getChecksum
#' @param checksumAlgorithm The algorithm used to calculate the checksum. Default="SHA-256"
#' @export
setMethod("getChecksum", signature("MNode"), function(x, pid, checksumAlgorithm="SHA-256") {
stopifnot(is.character(pid))
url <- paste(x@endpoint, "checksum", URLencode(pid, reserved=T), sep="/")
url <- paste0(url, '?checksumAlgorithm=', checksumAlgorithm)
response <- auth_get(url, node=x)
if(response$status_code != "200") {
warning(sprintf("Error getting checksum: %s\n", getErrorDescription(response)))
return(NULL)
}
if (is.raw(response$content)) {
tmpres <- content(response, as="raw")
resultText <- rawToChar(tmpres)
} else {
# Use charset 'utf-8' if not specified in response headers
charset <- "utf-8"
if("content-type" %in% names(response$headers)) {
media <- parse_media(response$headers[['content-type']])
if("params" %in% names(media) && "charset" %in% names(media$params)) {
charset <- media$params$charset
}
}
resultText <- content(response, as="text", encoding=charset)
}
checksum<-(xmlToList(xmlParse(resultText)))
returnVal <- checksum$text
# Set an attribute of the algorithm used to calculate the checksum
attr(returnVal, "algorithm") <- checksum$.attrs[['algorithm']]
return(returnVal)
})
#' Create an object on a Member Node.
#' @description This method provides the ability to upload a data or metadata object to the Member Node
#' provided in the \code{'mnode'} parameter.
#' @details In the version 2.0 library and higher, this operation can utilize an
#' 'dataone_token' option to provide credentials for write operations in DataONE.
#' The authentication token is obtained from DataONE (see your profile on https://search.dataone.org).
#' See the \code{vignette("dataone-overview")} for details.
#' Alternatively, the version 1.0 approach of using an X.509 certificate in a default location of the file
#' system can also be used. This certificate provides authentication credentials from
#' CILogon \url{https://cilogon.org/?skin=DataONE}. See \code{vignette("dataone-overview")} for details.
#' @rdname createObject
#' @aliases createObject
#' @param x The MNode instance on which the object will be created
#' @param pid The identifier of the object to be created
#' @param ... (Not yet used.)
#' @return a \code{character} containing the identifier that was created.
#' @seealso \url{https://purl.dataone.org/architecture/apis/MN_APIs.html#MNStorage.create}
#' @import datapack
#' @export
#' @examples \dontrun{
#' # Create an object in the DataONE "STAGING" environment
#' library(dataone)
#' library(uuid)
#' library(digest)
#' library(datapack)
#' cn <- CNode("STAGING")
#' mn <- getMNode(cn, "urn:node:mnStageUCSB2")
#' # Have Dataone create an identifier for you (requires authentication)
#' \dontrun{
#' newid <- generateIdentifier(mn, "UUID")
#' }
#' # Create an identifier manually
#' newid <- paste("urn:uuid:", UUIDgenerate(), sep="")
#' testdf <- data.frame(x=1:10,y=11:20)
#' csvfile <- paste(tempfile(), ".csv", sep="")
#' write.csv(testdf, csvfile, row.names=FALSE)
#' format <- "text/csv"
#' size <- file.info(csvfile)$size
#' sha256 <- digest(csvfile, algo="sha256", serialize=FALSE, file=TRUE)
#' sysmeta <- new("SystemMetadata", identifier=newid, formatId=format, size=size, checksum=sha256)
#' sysmeta <- addAccessRule(sysmeta, "public", "read")
#' # Upload the data to DataONE (requires authentication)
#' \dontrun{
#' createObject(mn, newid, csvfile, sysmeta)
#' }
#' }
setGeneric("createObject", function(x, ...) {
standardGeneric("createObject")
})
#' @rdname createObject
#' @param file the absolute file location of the object to be uploaded
#' @param sysmeta a SystemMetadata instance describing properties of the object
#' @param dataobj a \code{raw} object to use for the upload, instead of the contents of the \code{file} argument.
setMethod("createObject", signature("MNode"), function(x, pid, file=as.character(NA), sysmeta, dataobj=NULL, ...) {
stopifnot(is.character(pid))
# TODO: need to properly URL-escape the PID
url <- paste(x@endpoint, "object", sep="/")
# Check if the user has set the sysmeta submitter and rightsHolder,
# if not, then set them to the values contained in their authentication token
# or X.509 certificate.
am <- AuthenticationManager()
suppressMessages(isValid <- isAuthValid(am, x))
# If authentication isn't valid, then let this call fail in auth_post, so the
# appropriate messages are printed.
if (isValid) {
if(is.na(sysmeta@submitter)) {
sysmeta@submitter <- getAuthSubject(am, x)
}
if(is.na(sysmeta@rightsHolder)) {
sysmeta@rightsHolder <- sysmeta@submitter
}
if(is.na(sysmeta@authoritativeMemberNode)) {
sysmeta@authoritativeMemberNode <- x@identifier
}
}
sysmetaxml <- serializeSystemMetadata(sysmeta, version=x@APIversion)
sm_file <- tempfile()
writeLines(sysmetaxml, sm_file)
if(!is.null(dataobj)) {
if(!is.na(file)) {
stop("Both 'file' and 'dataobj' arguments have been specified")
}
if(!is.raw(dataobj)) {
stop(sprintf("Invalid type \"%s\" for \"dataobj\" argument, only values of \"raw\" type are accepted.", class(dataobj)))
} else {
file <- tempfile()
writeBin(dataobj, file)
}
}
response <- auth_post(url, encode="multipart",
body=list(pid=pid, object=upload_file(file),
sysmeta=upload_file(sm_file, type='text/xml')), node=x)
# Use charset 'utf-8' if not specified in response headers
charset <- "utf-8"
if(response$status_code != "200") {
#d1_errors(response)
stop(sprintf("Error creating %s: %s\n", pid, getErrorDescription(response)))
} else {
if("content-type" %in% names(response$headers)) {
media <- parse_media(response$headers[['content-type']])
if("params" %in% names(media) && "charset" %in% names(media$params)) {
charset <- media$params$charset
}
}
xml <- xmlParse(content(response, as="text", encoding=charset))
# Return the identifier, ignoring the namespace (in case it changes in the future).
# xml is similiar to:
# <d1:identifier xmlns:d1="http://ns.dataone.org/service/types/v1">urn:uuid:6b36a8eb-75c8-4651-b6b6-b954786933f2</d1:identifier>
returnPid <- ""
tryCatch({
returnPid <- xmlValue(getNodeSet(xml, "/*[local-name()='identifier']")[[1]])
}, error = function(err) {
returnPid <- pid
})
return(returnPid)
}
})
#' Update an object on a Member Node, by creating a new object that replaces an original.
#' @rdname updateObject
#' @aliases updateObject
#' @description This method provides the ability to update a data or metadata object to the Member Node
#' provided in the \code{'x'} parameter. In DataONE, both the original object and the new object are
#' maintained, each with its own persistent identifier, and the 'obsoletes' field in the SystemMetadata is
#' used to reflect the fact that the new object replaces the old. Both objects remain accessible.
#' @details In the version 2.0 library and higher, this operation can utilize an
#' 'dataone_token' option to provide credentials for write operations in DataONE.
#' The authentication token is obtained from DataONE (see your profile on https://search.dataone.org).
#' See the \code{vignette("dataone-overview")} for details.
#' Alternatively, the version 1.0 approach of using an X.509 certificate in a default location of the file
#' system can also be used. This certificate provides authentication credentials from
#' CILogon \url{https://cilogon.org/?skin=DataONE}. See \code{vignette("dataone-overview")} for details.
#' @param x The MNode instance on which the object will be created
#' @param pid The identifier of the object to be updated
#' @param ... (Not yet used.)
#' @return A \code{character} containing the identifier if successful.
#' @seealso \url{https://purl.dataone.org/architecture/apis/MN_APIs.html#MNStorage.update}
#' @import datapack
#' @export
#' @note Please see the vignette *upload-data* for an example: \code{vignette("upload-data")}
setGeneric("updateObject", function(x, ...) {
standardGeneric("updateObject")
})
#' @param file the absolute file location of the object to be uploaded
#' @param newpid The identifier of the new object to be created
#' @param sysmeta a SystemMetadata instance describing properties of the object
#' @param dataobj a \code{raw} object to use for the upload, instead of the contents of the \code{file} argument.
#' @rdname updateObject
setMethod("updateObject", signature("MNode"), function(x, pid, file=as.character(NA), newpid, sysmeta, dataobj=NULL) {
stopifnot(is.character(pid))
# TODO: need to properly URL-escape the PID
url <- paste(x@endpoint, "object", sep="/")
# Check if the user has set the sysmeta submitter and rightsHolder,
# if not, then set them to the values contained in their authentication token
# or X.509 certificate.
am <- AuthenticationManager()
suppressWarnings(isValid <- isAuthValid(am, x))
# If authentication isn't valid, then let this call fail in auth_put, so the
# appropriate messages are printed.
if (isValid) {
if(is.na(sysmeta@submitter)) {
sysmeta@submitter <- getAuthSubject(am, x)
}
if(is.na(sysmeta@rightsHolder)) {
sysmeta@rightsHolder <- sysmeta@submitter
}
if(is.na(sysmeta@authoritativeMemberNode)) {
sysmeta@authoritativeMemberNode <- x@identifier
}
}
sysmetaxml <- serializeSystemMetadata(sysmeta, version=x@APIversion)
sm_file <- tempfile()
writeLines(sysmetaxml, sm_file)
if(!is.null(dataobj)) {
if(!is.na(file)) {
stop("Both 'file' and 'dataobj' arguments have been specified")
}
file <- tempfile()
writeBin(dataobj, file)
}
response <- auth_put(url, encode="multipart",
body=list(pid=pid, object=upload_file(file),
newPid=newpid, sysmeta=upload_file(sm_file, type='text/xml')), node=x)
if(response$status_code != "200") {
#d1_errors(response)
stop(sprintf("Error updating %s: %s\n", pid, getErrorDescription(response)))
} else {
# Use charset 'utf-8' if not specified in response headers
charset <- "utf-8"
if("content-type" %in% names(response$headers)) {
media <- parse_media(response$headers[['content-type']])
if("params" %in% names(media) && "charset" %in% names(media$params)) {
charset <- media$params$charset
}
}
xml <- xmlParse(content(response, as="text", encoding=charset))
# Return the identifier, ignoring the namespace (in case it changes in the future).
# xml is similiar to:
# <d1:identifier xmlns:d1="http://ns.dataone.org/service/types/v1">urn:uuid:6b36a8eb-75c8-4651-b6b6-b954786933f2</d1:identifier>
returnPid <- ""
tryCatch({
returnPid <- xmlValue(getNodeSet(xml, "/*[local-name()='identifier']")[[1]])
}, error = function(err) {
returnPid <- pid
})
}
return(returnPid)
})
#' Update the system metadata associated with an object.
#' @description A modified SytemMetadata object can be sent to DataONE that contains
#' updated information. This function allow updating of the system metadata without
#' updating the object that it describes, so that mutable attributes such as accessPolicy
#' can be updated easily.
#' @details In the version 2.0 library and higher, this operation can utilize an
#' 'dataone_token' option to provide credentials for write operations in DataONE.
#' The authentication token is obtained from DataONE (see your profile on https://search.dataone.org).
#' See the \code{vignette("dataone-overview")} for details.
#' Alternatively, the version 1.0 approach of using an X.509 certificate in a default location of the file
#' system can also be used. This certificate provides authentication credentials from
#' CILogon \url{https://cilogon.org/?skin=DataONE}. See \code{vignette("dataone-overview")} for details.
#' @param x The MNode instance from which the SystemMetadata will be downloaded
#' @param ... (Not yet used.)
#' @return A logical value, TRUE if the operation was successful, FALSE if there was an error.
#' @seealso \url{https://purl.dataone.org/architecture/apis/MN_APIs.html#MNStorage.updateSystemMetadata}
#' @import datapack
#' @export
#' @note Please see the vignette *upload-data* for an example: \code{vignette("upload-data")}
setGeneric("updateSystemMetadata", function(x, ...) {
standardGeneric("updateSystemMetadata")
})
#' @rdname updateSystemMetadata
#' @param pid The identifier of the object
#' @param sysmeta a SystemMetadata instance with updated information.
#' @export
setMethod("updateSystemMetadata", signature("MNode"), function(x, pid, sysmeta) {
stopifnot(is.character(pid))
stopifnot(inherits(sysmeta, "SystemMetadata"))
url <- paste(x@endpoint, "meta", URLencode(pid, reserved=T), sep="/")
# Check if the user has set the sysmeta submitter and rightsHolder,
# if not, then set them to the values contained in their authentication token
# or X.509 certificate.
am <- AuthenticationManager()
suppressWarnings(isValid <- isAuthValid(am, x))
# If authentication isn't valid, then let this call fail in auth_put, so the
# appropriate messages are printed.
if (isValid) {
if(is.na(sysmeta@submitter)) {
sysmeta@submitter <- getAuthSubject(am)
}
if(is.na(sysmeta@rightsHolder)) {
sysmeta@rightsHolder <- sysmeta@submitter
}
}
sysmetaxml <- serializeSystemMetadata(sysmeta, version=x@APIversion)
sm_file <- tempfile()
writeLines(sysmetaxml, sm_file)
response <- auth_put(url, encode="multipart", body=list(pid=pid, sysmeta=upload_file(sm_file, type='text/xml')), node=x)
if(response$status_code != "200") {
warning(sprintf("Error updating %s: %s\n", pid, getErrorDescription(response)))
return(FALSE)
} else {
return(TRUE)
}
})
#' Get a unique identifier that is generated by the Member Node repository and guaranteed to be unique.
#' @description Creating objects requires use of a unique persistent identifier (pid) when calling the create
#' function. Member Nodes may optionally provide the generateIdentifier service to issue such identifiers,
#' ensuring that they are unique. Each identifier conforms to an identifier scheme, which determines the syntax and
#' rules for how the identifier that is generated is formatted. All Member Nodes that implement this method must
#' support the UUID scheme, but may also support other schemes such as DOI and others.
#' @details In the version 2.0 library and higher, this operation can utilize an
#' 'dataone_token' option to provide credentials for write operations in DataONE.
#' The authentication token is obtained from DataONE (see your profile on https://search.dataone.org).
#' See the \code{vignette("dataone-overview")} for details.
#' Alternatively, the version 1.0 approach of using an X.509 certificate in a default location of the file
#' system can also be used. This certificate provides authentication credentials from
#' CILogon \url{https://cilogon.org/?skin=DataONE}. See \code{vignette("dataone-overview")} for details.
#' @param x The MNode instance on which the object will be created
#' @param ... (Not yet used.)
#' @rdname generateIdentifier
#' @aliases enerateIdentifier
#' @return the character string of the generated unique identifier
#' @seealso \url{https://purl.dataone.org/architecture/apis/MN_APIs.html#MNStorage.generateIdentifier}
#' @export
#' @examples
#' \dontrun{
#' library(dataone)
#' cn <- CNode("STAGING")
#' mn <- getMNode(cn, "urn:node:mnStageUCSB2")
#' newid <- generateIdentifier(mn, "UUID")
#' }
setGeneric("generateIdentifier", function(x, ...) {
standardGeneric("generateIdentifier")
})
#' @rdname generateIdentifier
#' @param scheme The identifier scheme to be used, such as DOI, UUID, etc.
#' @param fragment An optional fragment to be prepended to the identifier for schemes that support it (not all do).
setMethod("generateIdentifier", signature("MNode"), function(x, scheme="UUID", fragment=NULL) {
# TODO: need to properly URL-escape the PID
url <- paste(x@endpoint, "generate", sep="/")
body = list(scheme = scheme, fragment = fragment)
if (is.null(fragment)) {
body = list(scheme = scheme)
}
response <- auth_post(url=url, encode="multipart", body=body, node=x)
charset <- "utf-8"
if(response$status_code != "200") {
stop(sprintf("Error generating ID of type %s: %s\n", scheme, getErrorDescription(response)))
} else {
if("content-type" %in% names(response$headers)) {
media <- parse_media(response$headers[['content-type']])
if("params" %in% names(media) && "charset" %in% names(media$params)) {
charset <- media$params$charset
}
}
}
# extract the identifier as a character string from the XML response
xml <- xmlParse(content(response, as="text", encoding=charset))
new_identifier <- xmlValue(xmlRoot(xml))
return(new_identifier)
})
#' Download a data package from a member node.
#' @description Given a valid identifier, download a file containing all of the package
#' members of the corresponding DataONE data package.
#' @details The default data package file format is a Bagit file (\url{https://tools.ietf.org/html/draft-kunze-bagit-09}).
#' The downloaded package file is compressed using the ZIP format and will be located in an R session temporary
#' file. Other packaging formats can be requested if they have been implemented by the requested member node.
#' @param x A MNode instance representing a DataONE Member Node repository.
#' @param ... (not yet used)
#' @return The location of the package file downloaded from the member node.
#' @seealso \code{\link[=MNode-class]{MNode}}{ class description.}
#' @import uuid
#' @export
#' @examples \dontrun{
#' cn <- CNode()
#' mn <- getMNode(cn, "urn:node:KNB")
#' packageFileName <- getPackage(mn, id="resourceMap_Blandy.76.2")
#' }
setGeneric("getPackage", function(x, ...) {
standardGeneric("getPackage")
})
#' @rdname getPackage
#' @param identifier The identifier of the package to retrieve. The identifier can be for the
#' resource map, metadata file, data file, or any other package member.
#' @param format The format to send the package in.
#' @param dirPath The directory path to save the package to.
#' @param unzip (logical) If the dirPath is specified, the package can also be unzipped automatically (unzip=TRUE).
setMethod("getPackage", signature("MNode"), function(x, identifier, format="application/bagit-097", dirPath=NULL, unzip=FALSE) {
# The identifier provided could be the package id (resource map), the metadata id or a package member (data, etc)
# The solr queries attempt to determine which id was specified and may issue additional queries to get all the
# data, for example, the metadata solr record must be retrieved to obtain all the package members.
resmapId <- as.character(NA)
metadataPid <- as.character(NA)
# First find the metadata object for a package. Try to get all required info, but not all record types have all
# these fields filled out.
queryParamList <- list(q=sprintf('id:\"%s\"', identifier), fl='isDocumentedBy,resourceMap,documents,formatType')
result <- query(x, queryParamList, as="list")
# Didn't get a result from the CN, query the MN directly. This may happen for several reasons including
# a new package hasn't been synced to the CN, the package is in a dev environment where CN sync is off, etc.
if(is.null(result) || length(result) == 0) {
stop(sprintf("Identifier %s not found on node %s", identifier, x@identifier))
}
formatType <- result[[1]]$formatType[[1]]
# Check if we have the metadata object, and if not, then get it. If a data object pid was specified, then it is possible that
# it can be contained in mulitple packages. For now, just use the first package returned.
# TODO: follow the obsolesence chain up to the most current version.
if(formatType == "METADATA") {
# We have the metadata object, which contains the list of package members in the 'documents' field
resmapId <- result[[1]]$resourceMap
metadataPid <- identifier
packageMembers <- as.list(result[[1]]$documents)
} else if(formatType == "RESOURCE") {
resmapId <- identifier
# Get the metadata object for this resource map
queryParamList <- list(q=sprintf('resourceMap:\"%s\"', identifier), fq='formatType:METADATA', fl='id,documents,formatType')
result <- query(x, queryParamList, as="list")
if (length(result) == 0) {
stop(sprintf("Unable to find metadata object for identifier: %s on node %s", identifier, x@identifier))
}
metadataPid <- result[[1]]$id
packageMembers <- as.list(result[[1]]$documents)
} else {
# This must be a package member, so get the metadata pid for the package
metadataPid <- result[[1]]$isDocumentedBy
queryParamList <- list(q=sprintf('id:\"%s\"', metadataPid), fl='documents,formatType,resourceMap')
result <- query(x, queryParamList, as="list")
if (length(result) == 0) {
stop(sprintf("Unable to find metadata object with identifier: %s on node %", identifier, x@identifier))
}
resmapId <- result[[1]]$resourceMap
packageMembers <- as.list(result[[1]]$documents)
}
# The Solr index can contain multiple resource maps that refer to our metadata object. There should be only
# one current resource map that refers to this metadata, the others may be previous versions of the resmap
# that are now obsolete. If multple resource map pids were returned, filter out the obsolete ones.
if(length(resmapId) > 1) {
quoteSetting <- getOption("useFancyQuotes")
options(useFancyQuotes = FALSE)
newIds <- dQuote(unlist(resmapId))
options(useFancyQuotes = quoteSetting)
qStr <- sprintf("id:(%s)", paste(newIds, collapse=" OR "))
queryParamList <- list(q=qStr, fq="NOT obsoletedBy:* AND archived:false", fl="id")
result <- query(x, queryParamList, as="list")
resmapId <- unlist(result)
if(length(resmapId) == 0) {
stop("It appears that all resource maps that reference this package are obsolete or archived.")
}
if(length(resmapId) > 1) {
resmapStr <- paste(resmapId, collapse=", ")
stop(sprintf("The metadata identifier %s is referenced by more than one current resource map: %s", metadataPid, resmapStr))
}
}
# getPackage was implemented in API v1.2
url <- sprintf("%s/packages/%s/%s", x@endpoint, URLencode(format, reserved=T), resmapId)
response <- auth_get(url, node=x)
if (response$status_code == "200") {
if(!is.null(dirPath)){
stopifnot(is.character(dirPath))
stopifnot(dir.exists(dirPath))
fileName <- paste0(gsub("[[:punct:]]", "_", identifier), ".zip")
packageFile <- file.path(dirPath, fileName)
if(!file.exists(packageFile)){
file.create(packageFile)
}
packageBin <- content(response, as="raw")
writeBin(packageBin, packageFile)
if(unzip == TRUE){
unzip(packageFile, exdir = dirPath)
file.remove(packageFile) #remove zip
return(gsub(".zip$", "", packageFile)) #unzipped directory path
} else {
return(packageFile)
}
} else {
packageFile <- tempfile(pattern=sprintf("%s-", UUIDgenerate()), fileext=".zip")
packageBin <- content(response, as="raw")
writeBin(packageBin, packageFile)
return(packageFile)
}
} else {
warning(sprintf("Error calling getPackage: %s\n", getErrorDescription(response)))
return(NULL)
}
})
############# Private functions, internal to this class, not for external callers #################
show_auth_message <- function() {
message(sprintf('Exception name: %s', "NotAuthenticated"), "\n")
message(sprintf('Exception description: %s', "You must be logged in with a valid certificate file."), "\n")
message("You can log in and download a certificate at https://cilogon.org/?skin=DataONE")
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.