R/RosvQuery1.R
In rosv: Client to Access and Operate on the 'Open Source Vulnerability' API

#' R6 Class for OSV Query Endpoint
#'
#' @description
#' An R6 class to provide a lower-level interface to the query endpoint of the OSV API.
#'
#' @details
#' Pageination is implemented via \code{httr2::req_perform_iterative()} and a private method for
#' extracting tokens automatically. When initialized, the page_token is set to \code{NULL};
#' if a token is generated for large results the process is handled internally. The response object
#' will contain a list of all returned responses before any formatting occurred. The content field will
#' contain the list of vulnerabilities which may be further parsed into a table format.
#'
#' @param commit Commit hash to query against (do not use when version set).
#' @param version Version of package.
#' @param name Name of package.
#' @param ecosystem Ecosystem package lives within (must be set if using \code{name}).
#' @param purl URL for package (do not use if \code{name} or \code{ecosystem} is set).
#'
#' @returns An R6 object to operate with OSV query endpoint.
#'
#' @seealso \url{https://google.github.io/osv.dev/post-v1-query/}
#'
#' @examples
#' query <- RosvQuery1$new(commit = '6879efc2c1596d11a6a6ad296f80063b558d5e0f')
#' query
#' @export
RosvQuery1 <- R6::R6Class('RosvQuery1',
                          public = list(

                            #' @field request Request object made by \code{httr2}.
                            request = NULL,

                            #' @field content Body contents of response from OSV API.
                            content = NULL,

                            #' @field response Response object returned from OSV API.
                            response = NULL,

                            #' @description
                            #' Set the core request details for subsequent use when called in \code{run()} method.
                            initialize = function(commit = NULL,
                                                  version = NULL,
                                                  name = NULL,
                                                  ecosystem = NULL,
                                                  purl = NULL) {

                              # Checks on input lengths (NULLs are 0), specific to osv_query_1
                              if(length(commit) > 1) stop('Only provide one commit.')
                              if(length(version) > 1 | length(name) > 1) stop('Only one package and version can be provided.')
                              if(length(ecosystem) > 1) stop('Length of ecosystem vector must be 1.')
                              if(length(purl) > 1) stop('Length of package URL must be 1.')

                              # Perform general rule checks for queries (single or batch)
                              private$validate_query(commit, version, name, ecosystem, purl)

                              # Parse inputs for use specific to this endpoint
                              constructed_query <- list(commit = commit,
                                                        version = version,
                                                        package = list(name = name, ecosystem = ecosystem, purl = purl),
                                                        page_token = NULL)

                              # Perform request, get response
                              req <- private$core_query('query')
                              req <- httr2::req_body_json(req, constructed_query)

                              self$request <- req
                            },

                            #' @description
                            #' Perform the request and return response for OSV API call.
                            run = function() {

                              resp <- httr2::req_perform_iterative(self$request, next_req = private$iterate_osv_page)

                              # Assign to main variables (just vulns, not tokens)
                              self$content <- list(vulns = httr2::resps_data(resp, function(x) httr2::resp_body_json(x)$vulns))

                              self$response <- resp
                              # invisible(self) # If want to be able to chain content at confusion of reference semantics

                            },

                            #' @description
                            #' Parse the contents returned into a tidier format. Can
                            #' use \code{future} plans to help parallelize. Not all contents are parsed.
                            parse = function() {
                              stopifnot(!is.null(self$content))

                              if(length(self$content) == 1 & (!is.null(names(self$content)) && names(self$content) == 'vulns')) {
                                self$content <- purrr::pluck(self$content, 'vulns')
                              }

                              # Extract all nested details of interests for versions of packages under affected array
                              affected_versions <- furrr::future_map(self$content,
                                                                     ~purrr::map(purrr::pluck(., 'affected'), private$extract_details))


                              # Collapse within each pkg set (e.g. tensorflow can have several affected per vulns)
                              affected_versions <- purrr::map(affected_versions, purrr::list_rbind)

                              # Combine summary details per vulns with nested affected details and collapse into 1 dataframe
                              self$content <- purrr::list_rbind(
                                purrr::map2(purrr::map(self$content, private$extract_summary),
                                            affected_versions,
                                            function(x,y) cbind(data.frame(x), y)))
                              # invisible(self) # If want to be able to chain content at confusion of reference semantics


                            },

                            #' @description
                            #' Print basic details of query object to screen.
                            #' @param ... Reserved for possible future use.
                            print = function(...) {
                              if(!is.null(self$response)) {
                                success_length <- length(httr2::resps_successes(self$response))
                                cat('Request made to:', self$request$url , '\n')
                                cat('Successful responses of total:', success_length, '/', length(self$response), '\n')
                                cat('Successful content size (bytes):',
                                    sum(as.double(vapply(httr2::resps_successes(self$response), function(x) purrr::pluck(httr2::resp_headers(x), 'Content-Length'), character(success_length)))),
                                    '\n')
                              } else {
                                cat('Request made to:', NA , '\n')
                                cat('Successful responses of total:', NA, '\n')
                                cat('Successful content size (bytes):', NA, '\n')
                              }
                              invisible(self)
                            }
                          ),

                          private = list(

                            # @description
                            # Create the core query to use across specific endpoints.
                            # @param endpoint Character value for name of endpoint in OSV API.
                            core_query = function(endpoint) {

                              req <- httr2::request('https://api.osv.dev/v1')
                              req <- httr2::req_url_path_append(req, endpoint)
                              req <- httr2::req_user_agent(req, '{{rosv}} (https://github.com/al-obrien/rosv)')
                              req <- httr2::req_headers(req, Accept = "application/json")
                              req <- httr2::req_retry(req, 3, backoff = ~10)
                              req

                            },

                            # Helper function to check for NULLS and replace with NA in lists
                            modify_helper = function(input) {
                              purrr::modify_if(input, is.null,
                                               function(x) NA_character_,
                                               .else = function(x) x)
                            },

                            # Extract summary info into a list for subsequent combination with nested
                            extract_summary = function(x) {
                              summary_list <- list(id = purrr::pluck(x, 'id'),
                                                   summary = purrr::pluck(x, 'summary'),
                                                   modified = purrr::pluck(x, 'modified'),
                                                   published = purrr::pluck(x, 'published'))
                              private$modify_helper(summary_list)
                            },

                            # Extract package and version details and append into a data.frame all at once for lowest levels
                            # Specific for query1 or vulns extractions
                            extract_details = function(x) {

                              # Pkg details
                              pkg_details <- purrr::pluck(x, 'package')

                              # Versions unlisted
                              versions <- unlist(purrr::pluck(x, 'versions'))

                              # Combine list with reformatted list and make a dataframe
                              data.frame(
                                private$modify_helper(append(pkg_details, list(versions = versions)))
                              )
                            },

                            # Iterator for OSV pagination tokens for single queries
                            iterate_osv_page = function(resp, req) {

                              json_body <- httr2::resp_body_json(resp)
                              tokens <- json_body$next_page_token

                              # Break out if all tokens are NULL
                              if(is.null(tokens)) return(NULL)

                              # Modify new request with new tokens
                              httr2::req_body_json_modify(req, page_token = tokens)

                            },

                            validate_query = function(commit, version, name, ecosystem, purl) {

                              # Valid ecosystem selection
                              if(!is.null(ecosystem)) {
                                ecosystem <- check_ecosystem(ecosystem)
                                if(any(ecosystem == 'PyPI') & !is.null(name)) name[ecosystem == 'PyPI'] <- normalize_pypi_pkg(name[ecosystem == 'PyPI'])
                              }

                              # Invalid combinations (as defined in API)
                              if(!is.null(commit) & !is.null(version)) stop('Cannot provide commit hash and version at the same time.')
                              if(!is.null(purl) & (!is.null(name) | !is.null(ecosystem))) stop('Cannot provide purl with name or ecosystem also set.')
                              if(!is.null(name) & is.null(ecosystem)) stop('If using package name, ecosystem must also be set')

                              # Enforce stricter limits to simplify batch and mental model (vectors that can occur together should be same length or not provided)
                              if(!is.null(commit) & (!is.null(name) | !is.null(purl))) stop('Separate commit hash queries from package based queries.')
                              if(length(name) != length(ecosystem)) stop('Package name and ecosystem must be same length for vectorized operations.')
                              if(!is.null(version) & !is.null(name)) {
                                if(length(version) != length(ecosystem)) stop('Package name and versions must be same length for vectorized operations.')
                              }
                            }
                          )
)
Any scripts or data that you put into this service are public.
rosv documentation built on May 29, 2024, 2:58 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
rosv
Client to Access and Operate on the 'Open Source Vulnerability' API

R/RosvQuery1.R
In rosv: Client to Access and Operate on the 'Open Source Vulnerability' API

Try the rosv package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

rosv Client to Access and Operate on the 'Open Source Vulnerability' API

R/RosvQuery1.R In rosv: Client to Access and Operate on the 'Open Source Vulnerability' API

Try the rosv package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

rosv
Client to Access and Operate on the 'Open Source Vulnerability' API

R/RosvQuery1.R
In rosv: Client to Access and Operate on the 'Open Source Vulnerability' API