R/resource.r

Defines functions as_dataframe_of_resources as_dataframe_of_labels levels.ldf_resource vec_restore.ldf_resource vec_cast.character.ldf_resource vec_cast.ldf_resource.character vec_cast.ldf_resource.ldf_resource vec_ptype2.character.ldf_resource vec_ptype2.ldf_resource.character vec_ptype2.ldf_resource.ldf_resource vec_ptype_abbr.ldf_resource obj_print_footer.ldf_resource fill_missing merge_description format.ldf_resource curie default_prefixes sort_priority label.ldf_resource label uri property `description<-` description is_resource validate_resource new_resource resource

Documented in as_dataframe_of_labels as_dataframe_of_resources curie description fill_missing is_resource label merge_description property resource sort_priority uri validate_resource

# resource has a URI and a label
# optionally has a  and a reference instead of a URI
# is merged on the basis of the URI
# is displayed on the bases of the label
# can have other attributes

#' @import vctrs
#' @importFrom rlang %||%
NULL

#' `resource` vector
#'
#' A vector of RDF resources with descriptions held in an orthogonal table.
#'
#' @param uri A character vector of URIs
#' @param description A data frame of descriptions (must have a `uri` column)
#' @param fill_missing A boolean specifying whether rows be added to the
#' description for missing URIs (defaults to `FALSE`)
#' @param x Any vector
#' @return An S3 vector of class `ldf_resource`.
#' @export
#' @examples
#' resource("http://example.net")
#'
#' uris <- c("http://example.net/id/apple",
#'           "http://example.net/id/banana",
#'           "http://example.net/id/carrot")
#' labels <- c("Apple","Banana","Carrot")
#' description <- data.frame(uri=uris, label=labels)
#' r <- resource(uris, description)
resource <- function(uri=character(), description=data.frame(uri=unique(uri)), fill_missing=FALSE) {
  uri <- vec_cast(uri, character())
  if(fill_missing) { description <- fill_missing(description, uri) }
  validate_resource(new_resource(uri, description))
}

new_resource <- function(uri=character(), description=data.frame(uri=unique(uri))) {
  vec_assert(uri, character())
  if(!is.null(description)) { stopifnot(inherits(description, "data.frame")) }
  new_vctr(uri, description=description, class = "ldf_resource", inherit_base_type=F)
}
# for compatibility with the S4 system
methods::setOldClass(c("ldf_resource", "vctrs_vctr"))

#' Validate a resource
#'
#' Checks whether a URI column is present in the description and that it includes a value
#' for every resource with no duplicates.
#'
#' Violations `stop` execution and report the error message.
#'
#' @param resource A vector of `ldf_resource`s
#' @return The resource (fluent interface for chaining)
#' @export
validate_resource <- function(resource) {
  if(!is.null(description(resource))) {
    if(!("uri" %in% colnames(description(resource)))) {
      stop("Description must include a uri column")
    }

    uris <- description(resource)$uri
    if(anyDuplicated(uris) != 0) {
      stop("Description must not include duplicate uris")
    }
    target_uri <- uri(resource) %>% purrr::discard(is.na)
    missing_uri <- target_uri[!target_uri %in% uris]
    if(length(missing_uri)>0) {
      stop("Description must include all uris. Missing e.g. ", missing_uri[1])
    }
  }

  resource
}

#' @export
#' @rdname resource
is_resource <- function(x) {
  inherits(x, "ldf_resource")
}

#' Extract the description from a resource
#'
#' @param resource A vector of `ldf_resource`s
#' @return The resource's description - a type inheriting from data frame
#' @export
description <- function(resource) {
  attr(resource, "description")
}

#' Assign a description to a resource
#'
#' Replaces the existing description. To add properties to a description use in combination with
#' [merge_description()].
#'
#' @param resource A vector of `ldf_resource`s
#' @param value A new resource description
#' @examples
#' r <- resource("a")
#' description(r) <- data.frame(uri="a",label="A")
#' @export
`description<-` <- function(resource, value) {
  attr(resource, "description") <- value
  resource
}

#' Extract a property value from a resource description
#'
#' @param resource A vector of `ldf_resource`s
#' @param p A property (column name from the description)
#' @return A vector with the values of the property that apply to each resource
#' @export
property <- function(resource, p) {
  description <- description(resource)
  if(!(p %in% colnames(description))) {
    warning("Column missing from description: ", p)
  }
  index <- match(resource, description$uri)

  if(inherits(description, "tbl")) {
    if((p %in% colnames(description))) {
      description[index,] %>% dplyr::pull(p)
    } else {
      NULL
    }
  } else {
    description[index, p]
  }
}

#' Extract the URI from a resource vector
#'
#' @param resource A vector of `ldf_resource`s
#' @export
uri <- function(resource) {
  vec_data(resource)
}

#' Extract resource labels
#'
#' @param x A vector of `ldf_resource`s
#' @return A character vector of labels
#' @export
label <- function(x) {
  UseMethod("label")
}

#' @export
label.ldf_resource <- function(x) {
  property(x, "label")
}

#' @export
label.default <- label.ldf_resource

#' Extract resource sort priority
#'
#' Sort priority can be used to determine how to order resources
#'
#' @param resource A vector of `ldf_resource`s
#' @return A vector of sort prorities (typically numeric)
#' @export
sort_priority <- function(resource) {
  property(resource, "sort_priority")
}

default_prefixes <- function() {
  getOption("ldf_prefixes") %||% c(
    rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#",
    rdfs="http://www.w3.org/2000/01/rdf-schema#",
    xsd="http://www.w3.org/2001/XMLSchema#",
    owl="http://www.w3.org/2002/07/owl#",
    skos="http://www.w3.org/2004/02/skos/core#",
    void="http://rdfs.org/ns/void#",
    dcat="http://www.w3.org/ns/dcat#",
    dcterms="http://purl.org/dc/terms/",
    qb="http://purl.org/linked-data/cube#",
    sdmxd="http://purl.org/linked-data/sdmx/2009/dimension#"
  )
}

#' Extract Compact URI from resources
#'
#' The URI is compacted using the specified prefixes.
#' If no prefixes are specified the defaults are taken from `default_prefixes` which can be
#' overriden by setting the global option "ldf_prefixes".
#'
#' @param resource A vector of `ldf_resource`s
#' @param prefixes A named character vector mapping from prefix to namespace
#' @return A character vector of compact URIs.
#' @export
curie <- function(resource, prefixes=default_prefixes()) {
  if(length(prefixes)>0) { # ought to check for names
    prefix_to_ns <- stats::setNames(paste0(names(prefixes),":"), unname(prefixes))
    stringr::str_replace_all(uri(resource), prefix_to_ns)
  } else {
    uri(resource)
  }
}

#' @export
format.ldf_resource <- function(x, ...) {
  suppressWarnings(
    output <- label(x) %||% curie(x)
  )
  format(output)
}

#' Combine resource descriptions
#'
#' Merges descriptions on URI, and attempts to merge columns from either description (filling with
#' `NA` where the column exists only in one description).
#'
#' @param x A resource description (inherits from data frame)
#' @param y A resource description (inherits from data frame)
#' @return A resource description (inherits from data frame)
#' @examples
#' a <- resource("a", data.frame(uri="a", label="A"))
#' description(a) <- merge_description(description(a), data.frame(uri="a", vowel=TRUE))
#' @export
merge_description <- function(x, y) {
  if(!is.null(x)) {
    common_columns <- intersect(colnames(x), colnames(y))
    dplyr::full_join(x, y, by=common_columns)
  } else {
    y
  }
}

#' Add rows to description for missing URIs
#'
#' The missing URIs will only be described with their URI
#'
#' @param description A resource description (inherits from data frame)
#' @param uri A character vector of URIs
#' @return A resource description with all URIs included
fill_missing <- function(description, uri) {
  missing <- setdiff(uri, description$uri)

  dplyr::bind_rows(description, data.frame(uri=missing))
}

#' @export
obj_print_footer.ldf_resource <- function(x, ...) {
  cat("Description: ", paste0(colnames(description(x)), collapse=", "), "\n", sep = "")
}

#' @export
vec_ptype_abbr.ldf_resource <- function(x, ...) {
  "ldf_rsrc"
}

#' @export
vec_ptype2.ldf_resource.ldf_resource <- function(x, y, ...) {
  new_resource(description=merge_description(description(x), description(y)))
}

# TODO: should this actually allow char to be lifted to resource?
#' @export
vec_ptype2.ldf_resource.character <- function(x, y, ...) character()

#' @export
vec_ptype2.character.ldf_resource <- function(x, y, ...) character()


#' @export
vec_cast.ldf_resource.ldf_resource <- function(x, to, ...) {
  all_description <- merge_description(description(x), description(to))

  new_resource(vec_data(x), description=all_description)
}

# Cast from character -> resource
#' @export
vec_cast.ldf_resource.character <- function(x, to, ...) resource(x)

# Cast from resource -> character
# Extracts the URI
#' @export
vec_cast.character.ldf_resource <- function(x, to, ...) vec_data(x)

# Rebuild the description
#' @export
vec_restore.ldf_resource <- function(x, to, ...) {
  all_description <- merge_description(description(x), description(to))

  # this enables subsetting to also subset descriptions
  # it's commented out as it's also stripping descriptions when rbinding
  # if(nrow(all_description)>0 & length(na.omit(vec_data(x))) > 0) {
  #   all_description <- all_description %>% dplyr::filter(uri %in% vec_data(x))
  # }

  new_resource(vec_data(x), description=all_description)
}

# #' @export
# vec_proxy.ldf_resource <- function(x, ...) {
#   unclass(x)
# }


#' @export
levels.ldf_resource <- function(...) { return(NULL) }

#' Convert a linked data frame to labels
#'
#' This takes a data frame containing `ldf_resource` or `ldf_interval` vectors
#' and converts those vectors into labels.
#'
#' The labels will either be character vectors or factors depending on the value of
#' `default.stringsAsFactors()`. This can be ridden by passing the argument
#' `stringsAsFactors=F`.
#'
#' @param d A linked data frame
#' @param ... Additional arguments passed to `data.frame`
#' @return A data frame with labels in place of any ldf vectors
#' @export
#' @examples
#' uris <- c("http://example.net/id/apple",
#'           "http://example.net/id/banana",
#'           "http://example.net/id/carrot")
#' labels <- c("Apple","Banana","Carrot")
#' description <- data.frame(uri=uris, label=labels)
#'
#' linked_data_frame <- data.frame(fruit=resource(uris, description))
#' labelled_data_frame <- as_dataframe_of_labels(linked_data_frame, stringsAsFactors=FALSE)
as_dataframe_of_labels <- function(d, ...) {
  data.frame(lapply(d, function(x) {
    if(is_resource(x) | is_interval(x)) {
      label(x)
    } else {
      x
    }
  }), ...)
}

#' Enrich a data frame of URIs with resources descriptions
#'
#' This takes a data frame containing URIs and attempts to download descriptions for them.
#'
#' The basic version just uses [get_label()].
#'
#' @param d A data frame containing URIs
#' @param endpoint A SPARQL endpoint
#' @return A data frame of resources
#' @export
as_dataframe_of_resources <- function(d, endpoint=default_endpoint()) {
  data.frame(lapply(d, function(x) {
    if(is.character(x) & !is_resource(x)) {
      desc <- get_label(x, endpoint)
      if(nrow(desc)>0) {
        resource(x, desc)
      } else {
        x
      }
    } else {
      x
    }
  }))
}
Swirrl/linked-data-frames documentation built on Sept. 14, 2022, 6:15 p.m.