R/OMLDataSetDescription_Class.R

Defines functions print.OMLDataSetDescription makeOMLDataSetDescription

Documented in makeOMLDataSetDescription

#' @title Construct OMLDataSetDescription.
#'
#' @description
#' Creates a description for an OMLDataSet.
#' To see a full list of all elements, please see the
#' \href{https://docs.openml.org/}{documentation}.
#'
#'
#' @param id [\code{integer(1)}]\cr
#'    Data set ID, autogenerated by the server. Ignored when set manually.
#' @param name [\code{character(1)}]\cr
#'   The  name of the data set.
#' @param version [\code{character(1)}]\cr
#'   Version of the data set, autogenerated by the server. Ignored when set manually.
#' @param description [\code{character(1)}]\cr
#'   Description of the data set, given by the uploader.
#' @param format [\code{character(1)}]\cr
#'   Format of the data set. At the moment this is always "ARFF".
#' @param creator [\code{character}]\cr
#'   The person(s), that created this data set. Optional.
#' @param contributor [\code{character}]\cr
#'   People, that contibuted to this version of the data set (e.g., by reformatting). Optional.
#' @param collection.date [\code{character(1)}]\cr
#'   The date the data was originally collected. Given by the uploader. Optional.
#' @param upload.date [\code{\link[base]{POSIXt}}]\cr
#'   The date the data was uploaded. Added by the server. Ignored when set manually.
#' @param language [\code{character(1)}]\cr
#'   Language in which the data is represented. Starts with 1 upper case letter, rest lower case,
#'   e.g. 'English'
#' @param licence [\code{character(1)}]\cr
#'   Licence of the data. \code{NA} means: Public Domain or "don't know/care".
#' @param url [\code{character(1)}]\cr
#'   Valid URL that points to the data file.
#' @param default.target.attribute [\code{character}]\cr
#'   The default target attribute, if it exists. Of course, tasks can be defined that use
#'   another attribute as target.
#' @param row.id.attribute [\code{character(1)}]\cr
#'   The attribute that represents the row-id column, if present in the data set. Else \code{NA}.
#' @param ignore.attribute [\code{character}]\cr
#'   Attributes that should be excluded in modelling, such as identifiers and indexes. Optional.
#' @param version.label [\code{character(1)}]\cr
#'   Version label provided by user, something relevant to the user. Can also be a date,
#'   hash, or some other type of id.
#' @param citation [\code{character(1)}]\cr
#'   Reference(s) that should be cited when building on this data.
#' @param visibility [\code{character(1)}]\cr
#'   Who can see the data set. Typical values: 'Everyone', 'All my friends', 'Only me'.
#'   Can also be any of the user's circles.
#' @param original.data.url [\code{character(1)}]\cr
#'   For derived data, the url to the original data set.
#'   This can be an OpenML data set, e.g. 'http://openml.org/d/1'.
#' @param paper.url [\code{character(1)}]\cr
#'   Link to a paper describing the data set.
#' @param update.comment [\code{character(1)}]\cr
#'   When the data set is updated, add an explanation here.
#' @param md5.checksum [\code{character(1)}]\cr
#'   MD5 checksum to check if the data set is downloaded without corruption.
#'   Can be ignored by user.
#' @param status [\code{character(1)}]\cr
#'   The status of the data set, autogenerated by the server. Ignored when set manually.
#' @param tags [\code{character}]\cr
#'   Optional tags for the data set.
#'
#' @name OMLDataSetDescription
#' @export
#' @family data set-related functions
#' @aliases OMLDataSetDescription
#' @example inst/examples/makeOMLDataSet.R
makeOMLDataSetDescription = function(id = 0L, name, version = "0", description, format = "ARFF",
  creator = NA_character_, contributor = NA_character_, collection.date = NA_character_, upload.date = as.POSIXct(Sys.time()),
  language = NA_character_, licence = NA_character_, url = NA_character_, default.target.attribute = NA_character_,
  row.id.attribute = NA_character_, ignore.attribute = NA_character_, version.label = NA_character_,
  citation = NA_character_, visibility = NA_character_, original.data.url = NA_character_,
  paper.url = NA_character_, update.comment = NA_character_, md5.checksum = NA_character_,
  status = NA_character_, tags = NA_character_) {

  assertInt(id)
  assertString(name)
  assertString(version)
  assertString(description)
  assertString(format)
  assertCharacter(creator)
  assertCharacter(contributor)
  if (testClass(collection.date, classes = "POSIXt"))
    collection.date = as.character(collection.date)
  assertString(collection.date, na.ok = TRUE)
  assertClass(upload.date, "POSIXt")
  assertString(language, na.ok = TRUE)
  assertString(licence, na.ok = TRUE)
  assertString(url, na.ok = TRUE)
  assertCharacter(default.target.attribute)
  assertString(row.id.attribute, na.ok = TRUE)
  assertCharacter(ignore.attribute)
  assertString(version.label, na.ok = TRUE)
  assertString(citation, na.ok = TRUE)
  assertString(visibility, na.ok = TRUE)
  assertString(original.data.url, na.ok = TRUE)
  assertString(paper.url, na.ok = TRUE)
  assertString(update.comment, na.ok = TRUE)
  assertString(md5.checksum, na.ok = TRUE)
  assertString(status, na.ok = TRUE)
  assertCharacter(tags)

  makeS3Obj("OMLDataSetDescription",
    id = id,
    name = name,
    version = version,
    description = description,
    format = format,
    creator = creator,
    contributor = contributor,
    collection.date = collection.date,
    upload.date = upload.date,
    language = language,
    licence = licence,
    url = url,
    default.target.attribute = default.target.attribute,
    row.id.attribute = row.id.attribute,
    ignore.attribute = ignore.attribute,
    version.label = version.label,
    citation = citation,
    visibility = visibility,
    original.data.url = original.data.url,
    paper.url = paper.url,
    update.comment = update.comment,
    md5.checksum = md5.checksum,
    status = status,
    tags = tags
  )
}

#' @export
print.OMLDataSetDescription = function(x, ...) {
  # Wrong indentation to see alignment
  catf("\nData Set '%s' :: (Version = %s, OpenML ID = %i)", x$name, x$version, x$id)
  catfNotNA("  Collection Date         : %s", x$collection.date)
  catfNotNA("  Creator(s)              : %s", x$creator)
  catfNotNA("  Ignore Attributes       : %s", x$ignore.attribute)
  catfNotNA("  Row ID Attribute        : %s", x$row.id.attribute)
  catfNotNA("  Default Target Attribute: %s", x$default.target.attribute)
}

Try the OpenML package in your browser

Any scripts or data that you put into this service are public.

OpenML documentation built on Oct. 20, 2022, 1:07 a.m.