Nothing
#' @title DataBackend
#'
#' @include mlr_reflections.R
#' @include warn_deprecated.R
#'
#' @description
#' This is the abstract base class for data backends.
#'
#' Data backends provide a layer of abstraction for various data storage systems.
#' It is not recommended to work directly with the DataBackend.
#' Instead, all data access is handled transparently via the [Task].
#'
#' This package comes with two implementations for backends:
#'
#' * [DataBackendDataTable] which stores the data as [data.table::data.table()].
#' * [DataBackendMatrix] which stores the data as sparse [Matrix::sparseMatrix()].
#'
#' To connect to out-of-memory database management systems such as SQL servers,
#' see the extension package \CRANpkg{mlr3db}.
#'
#' @details
#' The required set of fields and methods to implement a custom `DataBackend` is
#' listed in the respective sections (see [DataBackendDataTable] or
#' [DataBackendMatrix] for exemplary implementations of the interface).
#'
#' @template param_data_formats
#' @template seealso_databackend
#'
#' @export
#' @examples
#' data = data.table::data.table(id = 1:5, x = runif(5),
#' y = sample(letters[1:3], 5, replace = TRUE))
#'
#' b = DataBackendDataTable$new(data, primary_key = "id")
#' print(b)
#' b$head(2)
#' b$data(rows = 1:2, cols = "x")
#' b$distinct(rows = b$rownames, "y")
#' b$missings(rows = b$rownames, cols = names(data))
DataBackend = R6Class("DataBackend", cloneable = FALSE,
public = list(
#' @field primary_key (`character(1)`)\cr
#' Column name of the primary key column of positive and unique integer row ids.
primary_key = NULL,
#' @description
#' Creates a new instance of this [R6][R6::R6Class] class.
#'
#' Note: This object is typically constructed via a derived classes, e.g.
#' [DataBackendDataTable] or [DataBackendMatrix], or via the S3 method
#' [as_data_backend()].
#'
#' @param data (any)\cr
#' The format of the input data depends on the specialization. E.g.,
#' [DataBackendDataTable] expects a [data.table::data.table()] and
#' [DataBackendMatrix] expects a [Matrix::Matrix()] from \CRANpkg{Matrix}.
#'
#' @param primary_key (`character(1)`)\cr
#' Each DataBackend needs a way to address rows, which is done via a
#' column of unique integer values, referenced here by `primary_key`. The
#' use of this variable may differ between backends.
initialize = function(data, primary_key, data_formats) {
private$.data = data
self$primary_key = assert_string(primary_key)
if (!missing(data_formats)) warn_deprecated("DataBackend$initialize argument 'data_formats'")
},
#' @description
#' Helper for print outputs.
#' @param ... (ignored).
format = function(...) {
sprintf("<%s>", class(self)[1L])
},
#' @description
#' Printer.
print = function() {
nr = self$nrow
catf("%s (%ix%i)", format(self), nr, self$ncol)
print(self$head(6L), row.names = FALSE, print.keys = FALSE)
if (nr > 6L) {
catf("[...] (%i rows omitted)", nr - 6L)
}
}
),
active = list(
#' @field data_formats (`character()`)\cr
#' Supported data format. Always `"data.table"`..
#' This is deprecated and will be removed in the future.
data_formats = deprecated_binding("DataBackend$data_formats", "data.table"),
#' @template field_hash
hash = function(rhs) {
if (missing(rhs)) {
if (is.na(private$.hash)) {
private$.hash = private$.calculate_hash()
}
return(private$.hash)
}
private$.hash = assert_string(rhs)
},
#' @template field_col_hashes
col_hashes = function() {
cn = setdiff(self$colnames, self$primary_key)
set_names(sprintf("%s.%s", self$hash, cn), cn)
}
),
private = list(
.data = NULL,
.hash = NA_character_
)
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.