R/MNode.R

Defines functions show_auth_message

#
#   This work was created by participants in the DataONE project, and is
#   jointly copyrighted by participating institutions in DataONE. For
#   more information on DataONE, see our web site at http://dataone.org.
#
#     Copyright 2011-2013
#
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.
#

#' @title Provides R API to DataONE Member Node services.
#' @description MNode provides functions that interact with a DataONE Member Node (MN). A MN 
#' is a repository that provides access for reading and writing data and metadata using the
#' DataONE MN service API.  The MN API includes functions for retrieving data and metadata based on its
#' unique persistent identifier (pid), as well as for creating, updating, and archiving these data and
#' metadata objects.  
#' @details   
#' Methods that perform write operations (such as createObject and updateObject) on the MN generally 
#' require authentication. For MNs that have implemented the DataONE API version 2.0 and higher, these operations can utilize an 
#' authentication token to provide credentials for write operations in DataONE.
#' The authentication token is obtained from DataONE (see your account profile on https://search.dataone.org).
#' See the \code{vignette("dataone-overview")} for details.
#' Alternatively, the version 1.0 approach of using an X.509 certificate in a default location of the file 
#' system can also be used. This certificate provides authentication credentials from 
#' CILogon \url{https://cilogon.org/?skin=DataONE}.
#' @slot endpoint The url to access node services, which is the baseURL plus the version string
#' @rdname MNode-class
#' @aliases MNode-class
#' @include D1Node.R
#' @include auth_request.R
#' @import methods
#' @importFrom utils URLencode
#' @section Methods:
#' \itemize{
#'  \item{\code{\link{MNode}}}{: Create a MNode object representing a DataONE Member Node repository.}
#'  \item{\code{\link{createObject}}}{: Create an object on a Member Node.}
#'  \item{\code{\link{getObject}}}{: Get the bytes associated with an object on the Member Node}
#'  \item{\code{\link{getCapabilities}}}{: Get the node capabilities description, and store the information in the MNode.}
#'  \item{\code{\link{generateIdentifier}}}{: Get a unique identifier that is generated by the Member Node repository and guaranteed to be unique.}
#'  \item{\code{\link{getPackage}}}{: Download a data package from a member node.}
#'  \item{\code{\link{updateObject}}}{: Update an object to a Member Node, by creating a new object that replaces an original.}
#'  \item{\code{\link{updateSystemMetadata}}}{: Update the system metadata associated with an object.}
#' }
#' @seealso \code{\link{dataone}}{ package description.}
#' @export
#' @examples \dontrun{
#' library(dataone)
#' library(uuid)
#' library(digest)
#' cn <- CNode("STAGING")
#' mn <- getMNode(cn, "urn:node:mnStageUCSB2")
#' mnid <- mn@identifier
#' # Have Dataone create an identifier for you (requires authentication)
#' \dontrun{
#' newid <- generateIdentifier(mn, "UUID")
#' }
#' # Create an identifier manually
#' newid <- paste("urn:uuid:", UUIDgenerate(), sep="")
#' testdf <- data.frame(x=1:10,y=11:20)
#' csvfile <- paste(tempfile(), ".csv", sep="")
#' write.csv(testdf, csvfile, row.names=FALSE)
#' f <- "text/csv"
#' size <- file.info(csvfile)$size
#' sha256 <- digest(csvfile, algo="sha256", serialize=FALSE, file=TRUE)
#' sysmeta <- new("SystemMetadata", identifier=newid, formatId=f, size=size,
#'     checksum=sha256, originMemberNode=mnid, authoritativeMemberNode=mnid)
#' # Upload data to DataONE (requires authentication)
#' \dontrun{
#' response <- createObject(mn, newid, csvfile, sysmeta)
#' }
#' }
setClass("MNode", slots = c(endpoint = "character"), contains="D1Node")

#########################
## MNode constructors
#########################

#' Create a MNode object representing a DataONE Member Node repository.
#' @description Construct an instance of MNode to provide mechanisms to access, create, and update data and 
#' metadata objects on the associated Member Node.
#' @details If the \code{'x'} is a string, it is treated as a URI and an attempt to find an associated
#' Member Node at that base URL is attempted.  If \code{'x'} is a Node reference, then it is cast to a MNode
#' instance.  This typically is used from the getMNode() function from the CNode class, which is the preferred
#' way to retrieve an instance of an MNode.
#' @param x a URI representing a  base URL (i.e. https://knb.ecoinformatics.org/knb/d1/mn/v2); or a reference to a dataone::Node instance
#' @rdname MNode
#' @return the MNode object-
#' @seealso \code{\link[=MNode-class]{MNode}}{ class description.}
#' @export
#' @examples \dontrun{
#' mn <- MNode("https://knb.ecoinformatics.org/knb/d1/mn/v2")
#' }
setGeneric("MNode", function(x) {
    standardGeneric("MNode")
})

#' @rdname MNode
#' @export 
setMethod("MNode", signature("character"), function(x) {

    ## create new MNode object and insert uri endpoint
    mnode <- new("MNode")
    mnode@endpoint <- x
    
    ## Lookup the rest of the node information
    xml <- getCapabilities(mnode)
    # getCapabilties returns NULL if an error was encoutered
    if (is.null(xml)) return(NULL)
    mnode <- parseCapabilities(mnode, xmlRoot(xml))
    # Set the service URL fragment for the solr query engine
    mnode@serviceUrls <- data.frame(service="query.solr", Url=paste(mnode@endpoint, "query", "solr/", sep="/"), row.names = NULL, stringsAsFactors = FALSE)
    # Determine if this node is in the production or a test environment.
    # If it is is a test domain, then mark as a test node.
    if(length(mnode@env) == 0) {
      if (grepl("test.dataone.org", mnode@endpoint)) {
        mnode@env <- "test"
      } else {
        # Only call the cn if there is no other way to determine
        # the network that this node is in.
        # List nodes in the production environment, if the node
        # is not in the production enviroment, then it must be in
        # a test environment.
        cn <- CNode("PROD")
        nodelist <- listNodes(cn)
        match <- sapply(nodelist, function(x) { 
          x@identifier == mnode@identifier && x@type == "mn"
        })
        output.list <- nodelist[match]
        if (length(output.list) == 1) {
          mnode@env <- "prod"
        } else {
          mnode@env <- "test"
        }
      }
    }
    return(mnode)
})

#' @rdname MNode
#' @export
setMethod("MNode", signature("D1Node"), function(x) {
  
    if (x@type == "mn") {
        ## create new MNode object and insert uri endpoint
        mnode <- new("MNode")
        mnode@identifier = x@identifier
        mnode@name = x@name    
        mnode@description = x@description
        mnode@baseURL = x@baseURL
        mnode@subject = x@subject
        mnode@contactSubject = x@contactSubject
        mnode@replicate = x@replicate
        mnode@type = x@type
        mnode@state = x@state
        mnode@services <- x@services
        mnode@endpoint <- paste(x@baseURL, x@APIversion, sep="/")
        # Set the service URL fragment for the solr query engine
        mnode@serviceUrls <- data.frame(service="query.solr", Url=paste(mnode@endpoint, "query", "solr/", sep="/"), row.names = NULL, stringsAsFactors = FALSE)
        mnode@APIversion <- x@APIversion
        mnode@env <- x@env
        return(mnode)
    } else {
        stop("Error: Node is not of type 'mn'.")
    }
})

##########################
## Methods
##########################

#' Get the node capabilities description, and store the information in the MNode.
#' @description Access the DataONE getCapabilities() service for the Member Node, which returns an XML
#' description of the repository and the services it offers.
#' @rdname getCapabilities
#' @aliases getCapabilities
#' @param x The node identifier with which this node is registered in DataONE
#' @param ... (Not yet used.)
#' @return an XMLInternalDocument object representing the DataONE environment
#' @seealso \url{https://purl.dataone.org/architecture/apis/MN_APIs.html#MN_core.getCapabilities}
#' @import XML
#' @import httr
#' @export
#' @examples \dontrun{
#' library(dataone)
#' cn <- CNode()
#' mn <- getMNode(cn, "urn:node:KNB")
#' xml <- getCapabilities(mn)
#' }
setGeneric("getCapabilities", function(x, ...) {
    standardGeneric("getCapabilities")
})

#' @rdname getCapabilities
setMethod("getCapabilities", signature("MNode"), function(x) {
  url <- paste(x@endpoint, "node", sep="/")
  # Don't need privileged access, so call GET directly vs auth_get
  
  response <- GET(url, user_agent(get_user_agent()))
  # Use charset 'utf-8' if not specified in response headers
  charset <- "utf-8"
  if(response$status_code != "200") {
    stop(sprintf("Error accessing %s: %s\n", x@endpoint, getErrorDescription(response)))
  } else {
    if("content-type" %in% names(response$headers)) {
      media <- parse_media(response$headers[['content-type']])
      if("params" %in% names(media) && "charset" %in% names(media$params)) {
        charset <- media$params$charset
      }
    } 
  }
  
  xml <- xmlParse(content(response, as="text", encoding=charset))
  return(xml)
})

#' @param check A logical value, if TRUE check if this object has been obsoleted by another object in DataONE.
#' @rdname getObject
setMethod("getObject", signature("MNode"), function(x, pid, check=as.logical(FALSE)) {
  
  stopifnot(is.character(pid))
    if(!inherits(check, "logical")) {
      stop("Invalid argument: 'check' must be specified as a logical.")
    }

    # TODO: need to properly URL-escape the PID
    url <- paste(x@endpoint, "object", URLencode(pid, reserved=T), sep="/")
    
    # Check if the requested pid has been obsoleted by a newer version
    # and print a warning
    if (check) {
        sysmeta <- getSystemMetadata(x, pid)
        if (!is.na(sysmeta@obsoletedBy)) {
            message(sprintf('Warning: pid "%s" is obsoleted by pid "%s"', pid, sysmeta@obsoletedBy))
        }
    }
    
    response <- auth_get(url, node=x)
    
    if (response$status_code != "200") {
        stop(sprintf("get() error: %s\n", getErrorDescription(response)))
    }
    return(content(response, as = "raw"))
})

#' @import datapack
#' @export
#' @rdname getSystemMetadata
setMethod("getSystemMetadata", signature("MNode"), function(x, pid) {
  stopifnot(is.character(pid))
    url <- paste(x@endpoint, "meta", URLencode(pid, reserved=T), sep="/")
    response <- auth_get(url, node=x)
    
    # Use charset 'utf-8' if not specified in response headers
    charset <- "utf-8"
    if(response$status_code != "200") {
      warning(sprintf("Error getting SystemMetadata: %s\n", getErrorDescription(response)))
        return(NULL)
    } else {
      if("content-type" %in% names(response$headers)) {
        media <- parse_media(response$headers[['content-type']])
        if("params" %in% names(media) && "charset" %in% names(media$params)) {
          charset <- media$params$charset
        }
      } 
    }
    
    # Convert the response into a SystemMetadata object
    sysmeta <- SystemMetadata(xmlRoot(xmlParse(content(response, as="text", encoding=charset))))
    return(sysmeta)
})

#' @rdname getChecksum
#' @param checksumAlgorithm The algorithm used to calculate the checksum. Default="SHA-256"
#' @export
setMethod("getChecksum", signature("MNode"), function(x, pid, checksumAlgorithm="SHA-256") {
  stopifnot(is.character(pid))
  url <- paste(x@endpoint, "checksum", URLencode(pid, reserved=T), sep="/")
  url <- paste0(url, '?checksumAlgorithm=', checksumAlgorithm)
  response <- auth_get(url, node=x)
  
  if(response$status_code != "200") {
    warning(sprintf("Error getting checksum: %s\n", getErrorDescription(response)))
    return(NULL)
  }
  
  if (is.raw(response$content)) {
    tmpres <- content(response, as="raw")
    resultText <- rawToChar(tmpres)
  } else {
    # Use charset 'utf-8' if not specified in response headers
    charset <- "utf-8"
    if("content-type" %in% names(response$headers)) {
      media <- parse_media(response$headers[['content-type']])
      if("params" %in% names(media) && "charset" %in% names(media$params)) {
        charset <- media$params$charset
      }
    } 
    resultText <- content(response, as="text", encoding=charset)
  }
  
  checksum<-(xmlToList(xmlParse(resultText)))
  returnVal <- checksum$text
  # Set an attribute of the algorithm used to calculate the checksum
  attr(returnVal, "algorithm") <- checksum$.attrs[['algorithm']]
  return(returnVal)
})

#' Create an object on a Member Node.
#' @description This method provides the ability to upload a data or metadata object to the Member Node
#' provided in the \code{'mnode'} parameter.  
#' @details In the version 2.0 library and higher, this operation can utilize an 
#' 'dataone_token' option to provide credentials for write operations in DataONE.
#' The authentication token is obtained from DataONE (see your profile on https://search.dataone.org).
#' See the \code{vignette("dataone-overview")} for details.
#' Alternatively, the version 1.0 approach of using an X.509 certificate in a default location of the file 
#' system can also be used. This certificate provides authentication credentials from 
#' CILogon \url{https://cilogon.org/?skin=DataONE}.  See \code{vignette("dataone-overview")} for details.
#' @rdname createObject
#' @aliases createObject
#' @param x The MNode instance on which the object will be created
#' @param pid The identifier of the object to be created
#' @param ... (Not yet used.)
#' @return a \code{character} containing the identifier that was created.
#' @seealso \url{https://purl.dataone.org/architecture/apis/MN_APIs.html#MNStorage.create}
#' @import datapack
#' @export
#' @examples \dontrun{
#' # Create an object in the DataONE "STAGING" environment
#' library(dataone)
#' library(uuid)
#' library(digest)
#' library(datapack)
#' cn <- CNode("STAGING")
#' mn <- getMNode(cn, "urn:node:mnStageUCSB2")
#' # Have Dataone create an identifier for you (requires authentication)
#' \dontrun{
#' newid <- generateIdentifier(mn, "UUID")
#' }
#' # Create an identifier manually
#' newid <- paste("urn:uuid:", UUIDgenerate(), sep="") 
#' testdf <- data.frame(x=1:10,y=11:20)
#' csvfile <- paste(tempfile(), ".csv", sep="")
#' write.csv(testdf, csvfile, row.names=FALSE)
#' format <- "text/csv"
#' size <- file.info(csvfile)$size
#' sha256 <- digest(csvfile, algo="sha256", serialize=FALSE, file=TRUE)
#' sysmeta <- new("SystemMetadata", identifier=newid, formatId=format, size=size, checksum=sha256)
#' sysmeta <- addAccessRule(sysmeta, "public", "read")
#' # Upload the data to DataONE (requires authentication)
#' \dontrun{
#' createObject(mn, newid, csvfile, sysmeta)
#' }
#' }
setGeneric("createObject", function(x, ...) {
    standardGeneric("createObject")
})

#' @rdname createObject
#' @param file the absolute file location of the object to be uploaded
#' @param sysmeta a SystemMetadata instance describing properties of the object
#' @param dataobj a \code{raw} object to use for the upload, instead of the contents of the \code{file} argument.
setMethod("createObject", signature("MNode"), function(x, pid, file=as.character(NA), sysmeta, dataobj=NULL, ...) {
  stopifnot(is.character(pid))
    # TODO: need to properly URL-escape the PID
    url <- paste(x@endpoint, "object", sep="/")
    
    # Check if the user has set the sysmeta submitter and rightsHolder, 
    # if not, then set them to the values contained in their authentication token 
    # or X.509 certificate.
    am <- AuthenticationManager()
    suppressMessages(isValid <- isAuthValid(am, x))
    # If authentication isn't valid, then let this call fail in auth_post, so the
    # appropriate messages are printed.
    if (isValid) {
      if(is.na(sysmeta@submitter)) {
        sysmeta@submitter <- getAuthSubject(am, x)
      }
      if(is.na(sysmeta@rightsHolder)) {
        sysmeta@rightsHolder <- sysmeta@submitter
      }
      if(is.na(sysmeta@authoritativeMemberNode)) {
        sysmeta@authoritativeMemberNode <- x@identifier
      }
    }
    
    sysmetaxml <- serializeSystemMetadata(sysmeta, version=x@APIversion)
    sm_file <- tempfile()
    writeLines(sysmetaxml, sm_file)
    if(!is.null(dataobj)) {
      if(!is.na(file)) {
        stop("Both 'file' and 'dataobj' arguments have been specified")
      }
      if(!is.raw(dataobj)) {
        stop(sprintf("Invalid type \"%s\" for \"dataobj\" argument, only values of \"raw\" type are accepted.", class(dataobj)))
      } else {
        file <- tempfile()
        writeBin(dataobj, file)
      }
    }
    response <- auth_post(url, encode="multipart", 
                body=list(pid=pid, object=upload_file(file),
                sysmeta=upload_file(sm_file, type='text/xml')), node=x)
    
    # Use charset 'utf-8' if not specified in response headers
    charset <- "utf-8"
    if(response$status_code != "200") {
        #d1_errors(response)
        stop(sprintf("Error creating %s: %s\n", pid, getErrorDescription(response)))
    } else {
      if("content-type" %in% names(response$headers)) {
        media <- parse_media(response$headers[['content-type']])
        if("params" %in% names(media) && "charset" %in% names(media$params)) {
          charset <- media$params$charset
        }
      } 
      xml <- xmlParse(content(response, as="text", encoding=charset))
      # Return the identifier, ignoring the namespace (in case it changes in the future).
      # xml is similiar to:
      #     <d1:identifier xmlns:d1="http://ns.dataone.org/service/types/v1">urn:uuid:6b36a8eb-75c8-4651-b6b6-b954786933f2</d1:identifier>
      returnPid <- ""
      tryCatch({
          returnPid <- xmlValue(getNodeSet(xml, "/*[local-name()='identifier']")[[1]])
      }, error = function(err) {
          returnPid <- pid
      })
      return(returnPid)
    }
})

#' Update an object on a Member Node, by creating a new object that replaces an original.
#' @rdname updateObject
#' @aliases updateObject
#' @description This method provides the ability to update a data or metadata object to the Member Node
#' provided in the \code{'x'} parameter.  In DataONE, both the original object and the new object are
#' maintained, each with its own persistent identifier, and the 'obsoletes' field in the SystemMetadata is
#' used to reflect the fact that the new object replaces the old.  Both objects remain accessible.
#' @details In the version 2.0 library and higher, this operation can utilize an 
#' 'dataone_token' option to provide credentials for write operations in DataONE.
#' The authentication token is obtained from DataONE (see your profile on https://search.dataone.org).
#' See the \code{vignette("dataone-overview")} for details.
#' Alternatively, the version 1.0 approach of using an X.509 certificate in a default location of the file 
#' system can also be used. This certificate provides authentication credentials from 
#' CILogon \url{https://cilogon.org/?skin=DataONE}.  See \code{vignette("dataone-overview")} for details.
#' @param x The MNode instance on which the object will be created
#' @param pid The identifier of the object to be updated
#' @param ... (Not yet used.)
#' @return A \code{character} containing the identifier if successful.
#' @seealso \url{https://purl.dataone.org/architecture/apis/MN_APIs.html#MNStorage.update}
#' @import datapack
#' @export
#' @note Please see the vignette *upload-data* for an example: \code{vignette("upload-data")}
setGeneric("updateObject", function(x, ...) {
    standardGeneric("updateObject")
})

#' @param file the absolute file location of the object to be uploaded
#' @param newpid The identifier of the new object to be created
#' @param sysmeta a SystemMetadata instance describing properties of the object
#' @param dataobj a \code{raw} object to use for the upload, instead of the contents of the \code{file} argument.
#' @rdname updateObject
setMethod("updateObject", signature("MNode"), function(x, pid, file=as.character(NA), newpid, sysmeta, dataobj=NULL) {
  stopifnot(is.character(pid))
    # TODO: need to properly URL-escape the PID
    url <- paste(x@endpoint, "object", sep="/")
    
    # Check if the user has set the sysmeta submitter and rightsHolder, 
    # if not, then set them to the values contained in their authentication token 
    # or X.509 certificate.
    am <- AuthenticationManager()
    suppressWarnings(isValid <- isAuthValid(am, x))
    # If authentication isn't valid, then let this call fail in auth_put, so the
    # appropriate messages are printed.
    if (isValid) {
      if(is.na(sysmeta@submitter)) {
        sysmeta@submitter <- getAuthSubject(am, x)
      }
      if(is.na(sysmeta@rightsHolder)) {
        sysmeta@rightsHolder <- sysmeta@submitter
      }
      if(is.na(sysmeta@authoritativeMemberNode)) {
        sysmeta@authoritativeMemberNode <- x@identifier
      }
    }
    
    sysmetaxml <- serializeSystemMetadata(sysmeta, version=x@APIversion)
    sm_file <- tempfile()
    writeLines(sysmetaxml, sm_file)
    if(!is.null(dataobj)) {
      if(!is.na(file)) {
        stop("Both 'file' and 'dataobj' arguments have been specified")
      }
      file <- tempfile()
      writeBin(dataobj, file)
    }
    response <- auth_put(url, encode="multipart", 
                body=list(pid=pid, object=upload_file(file), 
                newPid=newpid, sysmeta=upload_file(sm_file, type='text/xml')), node=x)
    
    if(response$status_code != "200") {
        #d1_errors(response)
        stop(sprintf("Error updating %s: %s\n", pid, getErrorDescription(response)))
    } else {
        # Use charset 'utf-8' if not specified in response headers
        charset <- "utf-8"
        if("content-type" %in% names(response$headers)) {
          media <- parse_media(response$headers[['content-type']])
          if("params" %in% names(media) && "charset" %in% names(media$params)) {
            charset <- media$params$charset
          }
        } 
        xml <- xmlParse(content(response, as="text", encoding=charset))
        # Return the identifier, ignoring the namespace (in case it changes in the future).
        # xml is similiar to:
        #     <d1:identifier xmlns:d1="http://ns.dataone.org/service/types/v1">urn:uuid:6b36a8eb-75c8-4651-b6b6-b954786933f2</d1:identifier>
        returnPid <- ""
        tryCatch({
          returnPid <- xmlValue(getNodeSet(xml, "/*[local-name()='identifier']")[[1]])
        }, error = function(err) {
          returnPid <- pid
        })
    }
    return(returnPid)
})

#' Update the system metadata associated with an object.
#' @description A modified SytemMetadata object can be sent to DataONE that contains
#' updated information. This function allow updating of the system metadata without
#' updating the object that it describes, so that mutable attributes such as accessPolicy
#' can be updated easily.
#' @details In the version 2.0 library and higher, this operation can utilize an 
#' 'dataone_token' option to provide credentials for write operations in DataONE.
#' The authentication token is obtained from DataONE (see your profile on https://search.dataone.org).
#' See the \code{vignette("dataone-overview")} for details.
#' Alternatively, the version 1.0 approach of using an X.509 certificate in a default location of the file 
#' system can also be used. This certificate provides authentication credentials from 
#' CILogon \url{https://cilogon.org/?skin=DataONE}.  See \code{vignette("dataone-overview")} for details.
#' @param x The MNode instance from which the SystemMetadata will be downloaded
#' @param ... (Not yet used.)
#' @return A logical value, TRUE if the operation was successful, FALSE if there was an error.
#' @seealso \url{https://purl.dataone.org/architecture/apis/MN_APIs.html#MNStorage.updateSystemMetadata}
#' @import datapack
#' @export
#' @note Please see the vignette *upload-data* for an example: \code{vignette("upload-data")}
setGeneric("updateSystemMetadata", function(x, ...) {
    standardGeneric("updateSystemMetadata")
})

#' @rdname updateSystemMetadata
#' @param pid The identifier of the object
#' @param sysmeta a SystemMetadata instance with updated information.
#' @export
setMethod("updateSystemMetadata", signature("MNode"), function(x, pid, sysmeta) {
  stopifnot(is.character(pid))
  stopifnot(inherits(sysmeta, "SystemMetadata"))
    url <- paste(x@endpoint, "meta", URLencode(pid, reserved=T), sep="/")
    
    # Check if the user has set the sysmeta submitter and rightsHolder, 
    # if not, then set them to the values contained in their authentication token 
    # or X.509 certificate.
    am <- AuthenticationManager()
    suppressWarnings(isValid <- isAuthValid(am, x))
    # If authentication isn't valid, then let this call fail in auth_put, so the
    # appropriate messages are printed.
    if (isValid) {
      if(is.na(sysmeta@submitter)) {
        sysmeta@submitter <- getAuthSubject(am)
      }
      if(is.na(sysmeta@rightsHolder)) {
        sysmeta@rightsHolder <- sysmeta@submitter
      }
    } 
    
    sysmetaxml <- serializeSystemMetadata(sysmeta, version=x@APIversion)
    sm_file <- tempfile()
    writeLines(sysmetaxml, sm_file)
    response <- auth_put(url, encode="multipart", body=list(pid=pid, sysmeta=upload_file(sm_file, type='text/xml')), node=x)
    if(response$status_code != "200") {
      warning(sprintf("Error updating %s: %s\n", pid, getErrorDescription(response)))
      return(FALSE)
    } else {
      return(TRUE)
    }
})

#' Get a unique identifier that is generated by the Member Node repository and guaranteed to be unique.
#' @description Creating objects requires use of a unique persistent identifier (pid) when calling the create
#' function.  Member Nodes may optionally provide the generateIdentifier service to issue such identifiers, 
#' ensuring that they are unique. Each identifier conforms to an identifier scheme, which determines the syntax and
#' rules for how the identifier that is generated is formatted.  All Member Nodes that implement this method must 
#' support the UUID scheme, but may also support other schemes such as DOI and others.
#' @details In the version 2.0 library and higher, this operation can utilize an 
#' 'dataone_token' option to provide credentials for write operations in DataONE.
#' The authentication token is obtained from DataONE (see your profile on https://search.dataone.org).
#' See the \code{vignette("dataone-overview")} for details.
#' Alternatively, the version 1.0 approach of using an X.509 certificate in a default location of the file 
#' system can also be used. This certificate provides authentication credentials from 
#' CILogon \url{https://cilogon.org/?skin=DataONE}.  See \code{vignette("dataone-overview")} for details.
#' @param x The MNode instance on which the object will be created
#' @param ... (Not yet used.)
#' @rdname generateIdentifier
#' @aliases enerateIdentifier
#' @return the character string of the generated unique identifier
#' @seealso \url{https://purl.dataone.org/architecture/apis/MN_APIs.html#MNStorage.generateIdentifier}
#' @export
#' @examples
#' \dontrun{
#' library(dataone)
#' cn <- CNode("STAGING")
#' mn <- getMNode(cn, "urn:node:mnStageUCSB2")
#' newid <- generateIdentifier(mn, "UUID")
#' }
setGeneric("generateIdentifier", function(x, ...) {
    standardGeneric("generateIdentifier")
})

#' @rdname generateIdentifier
#' @param scheme The identifier scheme to be used, such as DOI, UUID, etc.
#' @param fragment An optional fragment to be prepended to the identifier for schemes that support it (not all do).
setMethod("generateIdentifier", signature("MNode"), function(x, scheme="UUID", fragment=NULL) {
    # TODO: need to properly URL-escape the PID
    url <- paste(x@endpoint, "generate", sep="/")
    body = list(scheme = scheme, fragment = fragment)
    if (is.null(fragment)) {
        body = list(scheme = scheme)
    }
    
    response <- auth_post(url=url,  encode="multipart", body=body, node=x)
    charset <- "utf-8"
    if(response$status_code != "200") {
        stop(sprintf("Error generating ID of type %s: %s\n", scheme, getErrorDescription(response)))
    } else {
      if("content-type" %in% names(response$headers)) {
        media <- parse_media(response$headers[['content-type']])
        if("params" %in% names(media) && "charset" %in% names(media$params)) {
          charset <- media$params$charset
        }
      } 
    }
    # extract the identifier as a character string from the XML response
    xml <- xmlParse(content(response, as="text", encoding=charset))
    new_identifier <- xmlValue(xmlRoot(xml))
    return(new_identifier)
})

#' Download a data package from a member node.
#' @description Given a valid identifier, download a file containing all of the package 
#' members of the corresponding DataONE data package. 
#' @details The default data package file format is a Bagit file (\url{https://tools.ietf.org/html/draft-kunze-bagit-09}).
#' The downloaded package file is compressed using the ZIP format and will be located in an R session temporary
#' file. Other packaging formats can be requested if they have been implemented by the requested member node.
#' @param x A MNode instance representing a DataONE Member Node repository.
#' @param ... (not yet used)
#' @return The location of the package file downloaded from the member node.
#' @seealso \code{\link[=MNode-class]{MNode}}{ class description.}
#' @import uuid
#' @export
#' @examples \dontrun{
#' cn <- CNode()
#' mn <- getMNode(cn, "urn:node:KNB")
#' packageFileName <- getPackage(mn, id="resourceMap_Blandy.76.2")
#' }
setGeneric("getPackage", function(x, ...) { 
    standardGeneric("getPackage")
})

#' @rdname getPackage
#' @param identifier The identifier of the package to retrieve. The identifier can be for the
#' resource map, metadata file, data file, or any other package member.
#' @param format The format to send the package in.
#' @param dirPath The directory path to save the package to.
#' @param unzip (logical) If the dirPath is specified, the package can also be unzipped automatically (unzip=TRUE).
setMethod("getPackage", signature("MNode"), function(x, identifier, format="application/bagit-097", dirPath=NULL, unzip=FALSE) {
  
  # The identifier provided could be the package id (resource map), the metadata id or a package member (data, etc)
  # The solr queries attempt to determine which id was specified and may issue additional queries to get all the
  # data, for example, the metadata solr record must be retrieved to obtain all the package members.
  resmapId <- as.character(NA)
  metadataPid <- as.character(NA)
  # First find the metadata object for a package. Try to get all required info, but not all record types have all 
  # these fields filled out.
  queryParamList <- list(q=sprintf('id:\"%s\"', identifier), fl='isDocumentedBy,resourceMap,documents,formatType')
  result <- query(x, queryParamList, as="list")
  # Didn't get a result from the CN, query the MN directly. This may happen for several reasons including
  # a new package hasn't been synced to the CN, the package is in a dev environment where CN sync is off, etc.
  if(is.null(result) || length(result) == 0) {
    stop(sprintf("Identifier %s not found on node %s", identifier, x@identifier))
  } 
  
  formatType <- result[[1]]$formatType[[1]]
  # Check if we have the metadata object, and if not, then get it. If a data object pid was specified, then it is possible that
  # it can be contained in mulitple packages. For now, just use the first package returned. 
  # TODO: follow the obsolesence chain up to the most current version.
  if(formatType == "METADATA") {
    # We have the metadata object, which contains the list of package members in the 'documents' field
    resmapId <- result[[1]]$resourceMap
    metadataPid <- identifier
    packageMembers <- as.list(result[[1]]$documents)
  } else if(formatType == "RESOURCE") {
    resmapId <- identifier
    # Get the metadata object for this resource map
    queryParamList <- list(q=sprintf('resourceMap:\"%s\"', identifier), fq='formatType:METADATA', fl='id,documents,formatType')
    result <- query(x, queryParamList, as="list")
    if (length(result) == 0) {
      stop(sprintf("Unable to find metadata object for identifier: %s on node %s", identifier, x@identifier))
    }
    metadataPid <- result[[1]]$id
    packageMembers <- as.list(result[[1]]$documents)
  } else {
    # This must be a package member, so get the metadata pid for the package
    metadataPid <- result[[1]]$isDocumentedBy
    queryParamList <- list(q=sprintf('id:\"%s\"', metadataPid), fl='documents,formatType,resourceMap')
    result <- query(x, queryParamList, as="list")
    if (length(result) == 0) {
      stop(sprintf("Unable to find metadata object with identifier: %s on node %", identifier, x@identifier))
    }
    resmapId <- result[[1]]$resourceMap
    packageMembers <- as.list(result[[1]]$documents)
  }
  
  # The Solr index can contain multiple resource maps that refer to our metadata object. There should be only
  # one current resource map that refers to this metadata, the others may be previous versions of the resmap
  # that are now obsolete. If multple resource map pids were returned, filter out the obsolete ones.
  if(length(resmapId) > 1) {
    quoteSetting <- getOption("useFancyQuotes")
    options(useFancyQuotes = FALSE)
    newIds <- dQuote(unlist(resmapId))
    options(useFancyQuotes = quoteSetting)
    
    qStr <- sprintf("id:(%s)", paste(newIds, collapse=" OR "))
    queryParamList <- list(q=qStr, fq="NOT obsoletedBy:* AND archived:false", fl="id")
    result <- query(x, queryParamList, as="list")
    resmapId <- unlist(result)
    if(length(resmapId) == 0) {
      stop("It appears that all resource maps that reference this package are obsolete or archived.")
    }
    if(length(resmapId) > 1) {
      resmapStr <- paste(resmapId, collapse=", ")
      stop(sprintf("The metadata identifier %s is referenced by more than one current resource map: %s", metadataPid, resmapStr))
    }
  }
  
  # getPackage was implemented in API v1.2
  url <- sprintf("%s/packages/%s/%s", x@endpoint, URLencode(format, reserved=T), resmapId)
  response <- auth_get(url, node=x)
  
  if (response$status_code == "200") {
    if(!is.null(dirPath)){
      stopifnot(is.character(dirPath))
      stopifnot(dir.exists(dirPath))
      
      fileName <- paste0(gsub("[[:punct:]]", "_", identifier), ".zip")
      packageFile <- file.path(dirPath, fileName)
      
      if(!file.exists(packageFile)){
        file.create(packageFile)
      }
      
      packageBin <- content(response, as="raw")
      writeBin(packageBin, packageFile)
      
      if(unzip == TRUE){
        unzip(packageFile, exdir = dirPath)
        file.remove(packageFile) #remove zip
        return(gsub(".zip$", "", packageFile)) #unzipped directory path
      } else {
        return(packageFile)
      }
      
    } else {
      packageFile <- tempfile(pattern=sprintf("%s-", UUIDgenerate()), fileext=".zip")
      packageBin <- content(response, as="raw")
      writeBin(packageBin, packageFile)
      return(packageFile)
    }

  } else {
    warning(sprintf("Error calling getPackage: %s\n", getErrorDescription(response)))
    return(NULL)
  }
})

############# Private functions, internal to this class, not for external callers #################

show_auth_message <- function() {
    message(sprintf('Exception name: %s', "NotAuthenticated"), "\n")
    message(sprintf('Exception description: %s', "You must be logged in with a valid certificate file."), "\n")
    message("You can log in and download a certificate at https://cilogon.org/?skin=DataONE")
}

Try the dataone package in your browser

Any scripts or data that you put into this service are public.

dataone documentation built on June 11, 2022, 1:06 a.m.