#' Main scrape function
#'
#' @name scrape
#' @include utils.R
#' @include statsnbaR-package.R
#' @keywords internal
NULL
#' Scrape stats.nba.com API and return statsnbaR data
#'
#' Perform a query on a recognised endpoint of stats.nba.com, and
#' format the output as a data.frame with inferred data types for each
#' attribute based on statsnbaR's internal YAML specifications.
#'
#' The endpoints, data types returned and filter specifications are given in the
#' internal YAML which is available on
#' \url{https://www.github.com/stephematician/statsnbaR/tree/master/data-raw/ADL.yaml}
#'
#' The YAML specifies four main things, the;
#' \itemize{
#' \item host (stats.nba.com);
#' \item names of accepted filters and their mappings to stats.nba.com;
#' \item result data conversion parameters (such as class or mapping); and
#' \item the endpoint APIs, what filters they accept and what data they
#' should return.
#' }
#'
#' How the last three items are specified is briefly described in the following
#' sections.
#'
#' @section \sQuote{statsnbaR filters} YAML:
#'
#' \preformatted{
#' filters :
#' league :
#' mapping : \{
#' NBA : "00", # NBA
#' D-league : "20" # D-League
#' \}
#' default : NBA
#' }
#'
#' This is hopefully fairly self-explanatory:
#' \itemize{
#' \item The mapping is from values that statsnbaR uses internally such
#' as \code{NBA}, to the value that stats.nba.com expects.
#' \item Default values are specified for each filter.
#' }
#'
#' @section \sQuote{statsnbaR datatype} YAML:
#'
#' The desired properties of the returned data types and/or a way to
#' map them to desired types is specified in the YAML in a fashion similar
#' to the following example:
#'
#' \preformatted{
#' data :
#' person_id :
#' class : numeric
#' &active roster_status :
#' class : logical
#' mapping : \{
#' 0 : FALSE, # not active
#' 1 : TRUE # active player
#' \}
#' }
#'
#' Things to note are:
#' \itemize{
#' \item The class of the data-type will refer to a specific type
#' converter from \code{\link{type_converters}}.
#' \item THe mapping for a data type may not guarantee the correct
#' type and so the type converter is employed after the mapping.
#' }
#'
#'
#' @section End-point YAML:
#'
#' The endpoint defined in the YAML might look like
#'
#' \preformatted{
#' endpoints :
#' PlayerCommon :
#' api.name : "commonallplayers"
#' api.path : "stats"
#' api.referrer : "http://stats.nba.com/players"
#' api.filters :
#' league : "LeagueID"
#' api.results :
#' CommonAllPlayers : \{
#' person_id : "person_id",
#' \}
#' }
#'
#' \itemize{
#' \item The first three items under PlayerCommon are passed to
#' \code{httr::GET}.
#' \item The \code{api.filters} item is a mapping from filter names that
#' statsnbaR uses internally to filter names that the stats.nba.com
#' endpoint uses.
#' \item The \code{api.results} item maps the attribute names of the data
#' returned by stats.nba.com to names that statsnbaR uses internally.
#' see the section \sQuote{Data specification YAML}.
#' }
#'
#' @param endpoint Character string which identifies the statsnbaR endpoint
#' @param filters Named list of statsnbaR key-value filters for the query
#' @param method Optional user-supplied function to retrieve JSON from
#' stats.nba.com
#' @return A list of data.frames of the resultSets returned by stats.nba.com
#' with inferred data types.
#'
#' @include utils.R
#' @include statsnbaR-package.R
#' @importFrom httr GET
#' @importFrom httr content
#' @importFrom httr add_headers
#' @importFrom httr accept
#' @importFrom httr modify_url
#' @export
api_scrape <- function(endpoint, filters, method=NULL, ...) {
if (!(is.character(endpoint) &&
(endpoint %in% names(statsnbaR.ADL.endpoints))))
stop(paste('[statsnbaR scrape]', endpoint, 'is not a recognised',
'reference to an endpoint of stats.nba.com'))
ADL.endpoint <- statsnbaR.ADL.endpoints[[endpoint]]
if (!valid_filters(filters))
stop(paste('[statsnbaR scrape] filters must be a valid key-pair list',
'recognised by statsnbaR'))
# Check that all filters are valid for this endpoint, and that all the
# filters required for the endpoint are specified.
if (!all(names(filters) %in% names(ADL.endpoint$api.filters)) ||
length(filters) != length(ADL.endpoint$api.filters)) {
invalid_filters <- !(names(filters) %in%
names(ADL.endpoint$api.filters))
missing_filters <- !(names(ADL.endpoint$api.filters) %in%
names(filters))
MFmsg <- ''
if (any(missing_filters))
MFmsg <- paste0(
'missing filters (',
paste(names(ADL.endpoint$api.filters)[missing_filters],
collapse=', '),
') for stats.nba.com endpoint ',
ADL.endpoint$api.name, '\n'
)
IFmsg <- ''
if (any(invalid_filters))
IFmsg <- paste0('invalid filters (',
paste(names(filters)[invalid_filters],
collapse=', '),
') specified for stats.nba.com endpoint ',
ADL.endpoint$api.name, '\n')
stop(paste('[statsnbaR scrape]', paste0(MFmsg, IFmsg)))
}
# Get the stats.nba.com form of the filters
ADL.filters <- map_filters(filters, ADL.endpoint)
url <- modify_url(statsnbaR.ADL.host,
path=c(ADL.endpoint$api.path,
ADL.endpoint$api.name),
query=ADL.filters)
headers <- c(add_headers(Referer=ADL.endpoint$api.referrer,
`User-Agent`=statsnbaR.ADL.user_agent,
`Accept-Language`=statsnbaR.ADL.accept_language),
accept('application/json'))$headers
if (is.null(method)) {
# Let this fail if we don't get JSON data.
api_response <- GET(url=url, add_headers(.headers=headers), ...)
json_data <- content(api_response)
} else
json_data <- method(url=url, headers=headers, ...)
if (!valid_results(json_data))
stop(paste('[stats_nba scrape] invalid result returned by',
'stats.nba.com endpoint'))
# flatten the data into a nice data.frame, unfortunately there is an
# edge case where the resultSets are not returned as a list of resultSets
# and so we need to account for this.
untyped_data <- NULL
if (is.null(names(json_data$resultSets))) {
untyped_data <- lapply(json_data$resultSets,
flatten_json_rowSet)
names(untyped_data) <- sapply(json_data$resultSets,
function(x) x$name)
} else {
untyped_data <- list(flatten_json_rowSet(json_data$resultSets))
names(untyped_data) <- json_data$resultSets$name
}
# Convert to 'useful' data types
lapply(names(ADL.endpoint$api.results),
function(result_name) {
map_results(untyped_data[[result_name]],
ADL.endpoint$api.results[[result_name]])
})
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.