Nothing
#' Repeats a dataset count times.
#'
#' @param dataset A dataset
#' @param count (Optional.) An integer value representing the number of times
#' the elements of this dataset should be repeated. The default behavior (if
#' `count` is `NULL` or `-1`) is for the elements to be repeated indefinitely.
#'
#' @return A dataset
#'
#' @family dataset methods
#'
#' @export
dataset_repeat <- function(dataset, count = NULL) {
as_tf_dataset(dataset$`repeat`(
count = as_integer_tensor(count)
))
}
#' Randomly shuffles the elements of this dataset.
#'
#' @param dataset A dataset
#'
#' @param buffer_size An integer, representing the number of elements from this
#' dataset from which the new dataset will sample.
#' @param seed (Optional) An integer, representing the random seed that will be
#' used to create the distribution.
#' @param reshuffle_each_iteration (Optional) A boolean, which if true indicates
#' that the dataset should be pseudorandomly reshuffled each time it is iterated
#' over. (Defaults to `TRUE`). Not used if TF version < 1.15
#' @return A dataset
#'
#' @family dataset methods
#'
#' @export
dataset_shuffle <- function(dataset, buffer_size, seed = NULL, reshuffle_each_iteration = NULL) {
if (!is.null(reshuffle_each_iteration) && tensorflow::tf_version() < "1.15")
warning("reshuffle_each_iteration is only used with TF >= 1.15", call. = FALSE)
args <- list(
buffer_size = as_integer_tensor(buffer_size),
seed = as_integer_tensor(seed)
)
if (tensorflow::tf_version() >= "1.15")
args[["reshuffle_each_iteration"]] <- reshuffle_each_iteration
as_tf_dataset(do.call(dataset$shuffle, args))
}
#' Shuffles and repeats a dataset returning a new permutation for each epoch.
#'
#' @inheritParams dataset_shuffle
#' @inheritParams dataset_repeat
#'
#' @family dataset methods
#'
#' @export
dataset_shuffle_and_repeat <- function(dataset, buffer_size, count = NULL, seed = NULL) {
validate_tf_version("1.8", "dataset_shuffle_and_repeat")
as_tf_dataset(dataset$apply(
tfd_shuffle_and_repeat(
as_integer_tensor(buffer_size),
as_integer_tensor(count),
as_integer_tensor(seed)
)
))
}
#' Combines consecutive elements of this dataset into batches.
#'
#' The components of the resulting element will have an additional outer
#' dimension, which will be `batch_size` (or `N %% batch_size` for the last
#' element if `batch_size` does not divide the number of input elements `N`
#' evenly and `drop_remainder` is `FALSE`). If your program depends on the
#' batches having the same outer dimension, you should set the `drop_remainder`
#' argument to `TRUE` to prevent the smaller batch from being produced.
#'
#' @note If your program requires data to have a statically known shape (e.g.,
#' when using XLA), you should use `drop_remainder=TRUE`. Without
#' `drop_remainder=TRUE` the shape of the output dataset will have an unknown
#' leading dimension due to the possibility of a smaller final batch.
#'
#' @param dataset A dataset
#' @param batch_size An integer, representing the number of consecutive elements
#' of this dataset to combine in a single batch.
#' @param drop_remainder (Optional.) A boolean, representing whether the last
#' batch should be dropped in the case it has fewer than `batch_size`
#' elements; the default behavior is not to drop the smaller batch.
#' @param num_parallel_calls (Optional.) A scalar integer, representing the
#' number of batches to compute asynchronously in parallel. If not specified,
#' batches will be computed sequentially. If the value `tf$data$AUTOTUNE` is
#' used, then the number of parallel calls is set dynamically based on
#' available resources.
#'
#' @param deterministic (Optional.) When `num_parallel_calls` is specified, if
#' this boolean is specified (`TRUE` or `FALSE`), it controls the order in
#' which the transformation produces elements. If set to `FALSE`, the
#' transformation is allowed to yield elements out of order to trade
#' determinism for performance. If not specified, the
#' `tf.data.Options.experimental_deterministic` option (`TRUE` by default)
#' controls the behavior. See `dataset_options()` for how to set dataset
#' options.
#'
#' @return A dataset
#'
#' @family dataset methods
#'
#' @export
dataset_batch <-
function(dataset, batch_size, drop_remainder = FALSE, num_parallel_calls=NULL, deterministic=NULL) {
args <- capture_args(match.call(), list(
batch_size = as_integer_tensor
), ignore = "dataset")
as_tf_dataset(do.call(dataset$batch, args))
}
#' A transformation that buckets elements in a `Dataset` by length
#'
#' @details
#' Elements of the `Dataset` are grouped together by length and then are padded
#' and batched.
#'
#' This is useful for sequence tasks in which the elements have variable
#' length. Grouping together elements that have similar lengths reduces the
#' total fraction of padding in a batch which increases training step
#' efficiency.
#'
#' Below is an example to bucketize the input data to the 3 buckets
#' "[0, 3), [3, 5), [5, Inf)" based on sequence length, with batch size 2.
#'
#' @param dataset A `tf_dataset`
#' @param element_length_func function from element in `Dataset` to `tf$int32`,
#' determines the length of the element, which will determine the bucket it
#' goes into.
#'
#' @param bucket_boundaries integers, upper length boundaries of the buckets.
#'
#' @param bucket_batch_sizes integers, batch size per bucket. Length should be
#' `length(bucket_boundaries) + 1`.
#'
#' @param padded_shapes Nested structure of `tf.TensorShape` (returned by [`tensorflow::shape()`])
#' to pass to `tf.data.Dataset.padded_batch`. If not provided, will use
#' `dataset.output_shapes`, which will result in variable length dimensions
#' being padded out to the maximum length in each batch.
#'
#' @param padding_values Values to pad with, passed to
#' `tf.data.Dataset.padded_batch`. Defaults to padding with 0.
#'
#' @param pad_to_bucket_boundary bool, if `FALSE`, will pad dimensions with unknown
#' size to maximum length in batch. If `TRUE`, will pad dimensions with
#' unknown size to bucket boundary minus 1 (i.e., the maximum length in
#' each bucket), and caller must ensure that the source `Dataset` does not
#' contain any elements with length longer than `max(bucket_boundaries)`.
#'
#' @param no_padding boolean, indicates whether to pad the batch features (features
#' need to be either of type `tf.sparse.SparseTensor` or of same shape).
#'
#' @param drop_remainder (Optional.) A logical scalar, representing
#' whether the last batch should be dropped in the case it has fewer than
#' `batch_size` elements; the default behavior is not to drop the smaller
#' batch.
#'
#' @param name (Optional.) A name for the tf.data operation.
#'
#' @seealso
#' + <https://www.tensorflow.org/api_docs/python/tf/data/Dataset#bucket_by_sequence_length>
#'
#' @export
#' @examples
#' \dontrun{
#' dataset <- list(c(0),
#' c(1, 2, 3, 4),
#' c(5, 6, 7),
#' c(7, 8, 9, 10, 11),
#' c(13, 14, 15, 16, 17, 18, 19, 20),
#' c(21, 22)) %>%
#' lapply(as.array) %>% lapply(as_tensor, "int32") %>%
#' lapply(tensors_dataset) %>%
#' Reduce(dataset_concatenate, .)
#'
#' dataset %>%
#' dataset_bucket_by_sequence_length(
#' element_length_func = function(elem) tf$shape(elem)[1],
#' bucket_boundaries = c(3, 5),
#' bucket_batch_sizes = c(2, 2, 2)
#' ) %>%
#' as_array_iterator() %>%
#' iterate(print)
#' # [,1] [,2] [,3] [,4]
#' # [1,] 1 2 3 4
#' # [2,] 5 6 7 0
#' # [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
#' # [1,] 7 8 9 10 11 0 0 0
#' # [2,] 13 14 15 16 17 18 19 20
#' # [,1] [,2]
#' # [1,] 0 0
#' # [2,] 21 22
#' }
dataset_bucket_by_sequence_length <-
function(dataset,
element_length_func,
bucket_boundaries,
bucket_batch_sizes,
padded_shapes = NULL,
padding_values = NULL,
pad_to_bucket_boundary = FALSE,
no_padding = FALSE,
drop_remainder = FALSE,
name = NULL)
{
args <- capture_args(match.call(), list(
bucket_boundaries = as_integer_list,
bucket_batch_sizes = as_integer_list,
padded_shapes = as_tensor_shapes
), ignore = "dataset")
do.call(dataset$bucket_by_sequence_length, args)
}
#' Caches the elements in this dataset.
#'
#'
#' @param dataset A dataset
#' @param filename String with the name of a directory on the filesystem to use
#' for caching tensors in this Dataset. If a filename is not provided, the
#' dataset will be cached in memory.
#'
#' @return A dataset
#'
#' @family dataset methods
#'
#' @export
dataset_cache <- function(dataset, filename = NULL) {
if (is.null(filename))
filename <- ""
if (!is.character(filename))
stop("filename must be a character vector")
as_tf_dataset(
dataset$cache(tf$constant(filename, dtype = tf$string))
)
}
#' Creates a dataset by concatenating given dataset with this dataset.
#'
#' @note Input dataset and dataset to be concatenated should have same nested
#' structures and output types.
#'
#' @param dataset,... `tf_dataset`s to be concatenated
#'
#' @return A dataset
#'
#' @family dataset methods
#'
#' @export
dataset_concatenate <- function(dataset, ...) {
for (other in list(...))
dataset <- dataset$concatenate(other)
as_tf_dataset(dataset)
}
#' Creates a dataset with at most count elements from this dataset
#'
#' @param dataset A dataset
#' @param count Integer representing the number of elements of this dataset that
#' should be taken to form the new dataset. If `count` is -1, or if `count` is
#' greater than the size of this dataset, the new dataset will contain all
#' elements of this dataset.
#'
#' @return A dataset
#'
#' @family dataset methods
#'
#' @export
dataset_take <- function(dataset, count) {
as_tf_dataset(dataset$take(count = as_integer_tensor(count)))
}
#' A transformation that stops dataset iteration based on a predicate.
#'
#' @param dataset A TF dataset
#' @param predicate A function that maps a nested structure of tensors (having
#' shapes and types defined by `self$output_shapes` and `self$output_types`)
#' to a scalar `tf.bool` tensor.
#' @param name (Optional.) A name for the tf.data operation.
#'
#' @details
#' Example usage:
#' ```` r
#' range_dataset(from = 0, to = 10) %>%
#' dataset_take_while( ~ .x < 5) %>%
#' as_array_iterator() %>%
#' iterate(simplify = FALSE) %>% str()
#' #> List of 5
#' #> $ : num 0
#' #> $ : num 1
#' #> $ : num 2
#' #> $ : num 3
#' #> $ : num 4
#' ````
#'
#' @return A TF Dataset
#'
#' @family dataset methods
#'
#' @export
dataset_take_while <- function(dataset, predicate, name = NULL) {
as_tf_dataset(dataset$take_while(
as_py_function(predicate),
name = name))
}
#' Map a function across a dataset.
#'
#' @param dataset A dataset
#' @param map_func A function mapping a nested structure of tensors (having
#' shapes and types defined by [output_shapes()] and [output_types()] to
#' another nested structure of tensors. It also supports `purrr` style
#' lambda functions powered by [rlang::as_function()].
#' @param num_parallel_calls (Optional) An integer, representing the
#' number of elements to process in parallel If not specified, elements will
#' be processed sequentially.
#'
#' @return A dataset
#'
#' @family dataset methods
#'
#' @export
dataset_map <- function(dataset, map_func, num_parallel_calls = NULL) {
dtype <- if (tensorflow::tf_version() >= "2.3") tf$int64 else tf$int32
as_tf_dataset(dataset$map(
map_func = as_py_function(map_func),
num_parallel_calls = as_integer_tensor(num_parallel_calls, dtype)
))
}
#' Fused implementation of dataset_map() and dataset_batch()
#'
#' Maps `map_func`` across batch_size consecutive elements of this dataset and then combines
#' them into a batch. Functionally, it is equivalent to map followed by batch. However, by
#' fusing the two transformations together, the implementation can be more efficient.
#'
#' @inheritParams dataset_map
#' @inheritParams dataset_batch
#' @param num_parallel_batches (Optional) An integer, representing the number of batches
#' to create in parallel. On one hand, higher values can help mitigate the effect of
#' stragglers. On the other hand, higher values can increase contention if CPU is
#' scarce.
#'
#' @family dataset methods
#'
#' @export
dataset_map_and_batch <- function(dataset,
map_func,
batch_size,
num_parallel_batches = NULL,
drop_remainder = FALSE,
num_parallel_calls = NULL) {
validate_tf_version("1.8", "dataset_map_and_batch")
as_tf_dataset(dataset$apply(
tfd_map_and_batch(
as_py_function(map_func),
as.integer(batch_size),
as_integer_tensor(num_parallel_batches),
drop_remainder,
as_integer_tensor(num_parallel_calls)
)
))
}
#' Maps map_func across this dataset and flattens the result.
#'
#' @param dataset A dataset
#'
#' @param map_func A function mapping a nested structure of tensors (having
#' shapes and types defined by [output_shapes()] and [output_types()] to a
#' dataset.
#'
#' @return A dataset
#'
#' @export
dataset_flat_map <- function(dataset, map_func) {
as_tf_dataset(
dataset$flat_map(map_func)
)
}
#' Creates a Dataset that prefetches elements from this dataset.
#'
#'
#' @param dataset A dataset
#' @param buffer_size An integer, representing the maximum number elements that
#' will be buffered when prefetching.
#'
#' @return A dataset
#'
#' @family dataset methods
#'
#' @export
dataset_prefetch <- function(dataset, buffer_size = tf$data$AUTOTUNE) {
as_tf_dataset(dataset$prefetch(as_integer_tensor(buffer_size)))
}
#' A transformation that prefetches dataset values to the given `device`
#'
#' @param dataset A dataset
#' @param device A string. The name of a device to which elements will be prefetched
#' (e.g. "/gpu:0").
#' @param buffer_size (Optional.) The number of elements to buffer on device.
#' Defaults to an automatically chosen value.
#'
#' @return A dataset
#'
#' @note Although the transformation creates a dataset, the transformation must be the
#' final dataset in the input pipeline.
#'
#' @family dataset methods
#'
#' @export
dataset_prefetch_to_device <- function(dataset, device, buffer_size = NULL) {
validate_tf_version("1.8", "dataset_prefetch_to_device")
as_tf_dataset(dataset$apply(
tfd_prefetch_to_device(
device = device,
buffer_size = as_integer_tensor(buffer_size)
)
))
}
#' Filter a dataset by a predicate
#'
#' @param dataset A dataset
#'
#' @param predicate A function mapping a nested structure of tensors (having
#' shapes and types defined by [output_shapes()] and [output_types()] to a
#' scalar `tf$bool` tensor.
#'
#' @return A dataset composed of records that matched the predicate.
#'
#' @details Note that the functions used inside the predicate must be
#' tensor operations (e.g. `tf$not_equal`, `tf$less`, etc.). R
#' generic methods for relational operators (e.g. `<`, `>`, `<=`,
#' etc.) and logical operators (e.g. `!`, `&`, `|`, etc.) are
#' provided so you can use shorthand syntax for most common
#' comparisions (this is illustrated by the example below).
#'
#' @family dataset methods
#'
#' @examples \dontrun{
#'
#' dataset <- text_line_dataset("mtcars.csv", record_spec = mtcars_spec) %>%
#' dataset_filter(function(record) {
#' record$mpg >= 20
#' })
#'
#' dataset <- text_line_dataset("mtcars.csv", record_spec = mtcars_spec) %>%
#' dataset_filter(function(record) {
#' record$mpg >= 20 & record$cyl >= 6L
#' })
#'
#' }
#'
#' @export
dataset_filter <- function(dataset, predicate) {
as_tf_dataset(dataset$filter(as_py_function(predicate)))
}
#' Creates a dataset that skips count elements from this dataset
#'
#' @param dataset A dataset
#' @param count An integer, representing the number of elements of this dataset
#' that should be skipped to form the new dataset. If count is greater than
#' the size of this dataset, the new dataset will contain no elements. If
#' count is -1, skips the entire dataset.
#'
#' @return A dataset
#'
#' @family dataset methods
#'
#' @export
dataset_skip <- function(dataset, count) {
as_tf_dataset(dataset$skip(count = as_integer_tensor(count)))
}
#' Maps map_func across this dataset, and interleaves the results
#'
#' @param dataset A dataset
#' @param map_func A function mapping a nested structure of tensors (having
#' shapes and types defined by [output_shapes()] and [output_types()] to a
#' dataset.
#' @param cycle_length The number of elements from this dataset that will be
#' processed concurrently.
#' @param block_length The number of consecutive elements to produce from each
#' input element before cycling to another input element.
#'
#' @details
#'
#' The `cycle_length` and `block_length` arguments control the order in which
#' elements are produced. `cycle_length` controls the number of input elements
#' that are processed concurrently. In general, this transformation will apply
#' `map_func` to `cycle_length` input elements, open iterators on the returned
#' dataset objects, and cycle through them producing `block_length` consecutive
#' elements from each iterator, and consuming the next input element each time
#' it reaches the end of an iterator.
#'
#' @examples \dontrun{
#'
#' dataset <- tensor_slices_dataset(c(1,2,3,4,5)) %>%
#' dataset_interleave(cycle_length = 2, block_length = 4, function(x) {
#' tensors_dataset(x) %>%
#' dataset_repeat(6)
#' })
#'
#' # resulting dataset (newlines indicate "block" boundaries):
#' c(1, 1, 1, 1,
#' 2, 2, 2, 2,
#' 1, 1,
#' 2, 2,
#' 3, 3, 3, 3,
#' 4, 4, 4, 4,
#' 3, 3,
#' 4, 4,
#' 5, 5, 5, 5,
#' 5, 5,
#' )
#'
#' }
#'
#' @family dataset methods
#'
#' @export
dataset_interleave <- function(dataset, map_func, cycle_length, block_length = 1) {
as_tf_dataset(dataset$interleave(
map_func = as_py_function(map_func),
cycle_length = as_integer_tensor(cycle_length),
block_length = as_integer_tensor(block_length)
))
}
#' Creates a dataset that includes only 1 / num_shards of this dataset.
#'
#' This dataset operator is very useful when running distributed training, as it
#' allows each worker to read a unique subset.
#'
#' @param dataset A dataset
#' @param num_shards A integer representing the number of shards operating in
#' parallel.
#' @param index A integer, representing the worker index.
#'
#' @return A dataset
#'
#' @family Dataset methods
#'
#' @export
dataset_shard <- function(dataset, num_shards, index) {
as_tf_dataset(dataset$shard(
num_shards = as_integer_tensor(num_shards),
index = as_integer_tensor(index)
))
}
#' Combines consecutive elements of this dataset into padded batches.
#'
#' @details
#' This transformation combines multiple consecutive elements of the input
#' dataset into a single element.
#'
#' Like [`dataset_batch()`], the components of the resulting element will
#' have an additional outer dimension, which will be `batch_size` (or
#' `N %% batch_size` for the last element if `batch_size` does not divide the
#' number of input elements `N` evenly and `drop_remainder` is `FALSE`). If
#' your program depends on the batches having the same outer dimension, you
#' should set the `drop_remainder` argument to `TRUE` to prevent the smaller
#' batch from being produced.
#'
#' Unlike [`dataset_batch()`], the input elements to be batched may have
#' different shapes, and this transformation will pad each component to the
#' respective shape in `padded_shapes`. The `padded_shapes` argument
#' determines the resulting shape for each dimension of each component in an
#' output element:
#'
#' * If the dimension is a constant, the component will be padded out to that
#' length in that dimension.
#' * If the dimension is unknown, the component will be padded out to the
#' maximum length of all elements in that dimension.
#'
#' See also `tf$data$experimental$dense_to_sparse_batch`, which combines
#' elements that may have different shapes into a `tf$sparse$SparseTensor`.
#'
#' @inheritParams dataset_batch
#' @param batch_size An integer, representing the number of
#' consecutive elements of this dataset to combine in a single batch.
#' @param padded_shapes (Optional.) A (nested) structure of
#' `tf.TensorShape` (returned by [`tensorflow::shape()`]) or
#' `tf$int64` vector tensor-like objects representing the shape to which
#' the respective component of each input element should be padded prior
#' to batching. Any unknown dimensions will be padded to the maximum size
#' of that dimension in each batch. If unset, all dimensions of all
#' components are padded to the maximum size in the batch. `padded_shapes`
#' must be set if any component has an unknown rank.
#' @param padding_values (Optional.) A (nested) structure of scalar-shaped
#' `tf.Tensor`, representing the padding values to use for the respective
#' components. `NULL` represents that the (nested) structure should be padded
#' with default values. Defaults are `0` for numeric types and the empty
#' string `""` for string types. The `padding_values` should have the same
#' (nested) structure as the input dataset. If `padding_values` is a single
#' element and the input dataset has multiple components, then the same
#' `padding_values` will be used to pad every component of the dataset.
#' If `padding_values` is a scalar, then its value will be broadcasted
#' to match the shape of each component.
#' @param drop_remainder (Optional.) A boolean scalar, representing
#' whether the last batch should be dropped in the case it has fewer than
#' `batch_size` elements; the default behavior is not to drop the smaller
#' batch.
#'
#' @param name (Optional.) A name for the tf.data operation. Requires tensorflow version >= 2.7.
#'
#' @returns A tf_dataset
#' @export
#' @family dataset methods
#' @seealso
#' - <https://www.tensorflow.org/api_docs/python/tf/data/Dataset#padded_batch>
#'
#' @examples
#' \dontrun{
#' A <- range_dataset(1, 5, dtype = tf$int32) %>%
#' dataset_map(function(x) tf$fill(list(x), x))
#'
#' # Pad to the smallest per-batch size that fits all elements.
#' B <- A %>% dataset_padded_batch(2)
#' B %>% as_array_iterator() %>% iterate(print)
#'
#' # Pad to a fixed size.
#' C <- A %>% dataset_padded_batch(2, padded_shapes=5)
#' C %>% as_array_iterator() %>% iterate(print)
#'
#' # Pad with a custom value.
#' D <- A %>% dataset_padded_batch(2, padded_shapes=5, padding_values = -1L)
#' D %>% as_array_iterator() %>% iterate(print)
#'
#' # Pad with a single value and multiple components.
#' E <- zip_datasets(A, A) %>% dataset_padded_batch(2, padding_values = -1L)
#' E %>% as_array_iterator() %>% iterate(print)
#' }
dataset_padded_batch <-
function(dataset,
batch_size,
padded_shapes = NULL,
padding_values = NULL,
drop_remainder = FALSE,
name = NULL) {
args <- capture_args(match.call(), list(
batch_size = as_integer_tensor,
padded_shapes = as_tensor_shapes
), ignore = "dataset")
as_tf_dataset(do.call(dataset$padded_batch, args))
}
#' A transformation that resamples a dataset to a target distribution.
#'
#' @param dataset A `tf.Dataset`
#' @param class_func A function mapping an element of the input dataset to a
#' scalar `tf.int32` tensor. Values should be in `[0, num_classes)`.
#' @param target_dist A floating point type tensor, shaped `[num_classes]`.
#' @param initial_dist (Optional.) A floating point type tensor, shaped
#' `[num_classes]`. If not provided, the true class distribution is estimated
#' live in a streaming fashion.
#' @param seed (Optional.) Integer seed for the resampler.
#' @param name (Optional.) A name for the tf.data operation.
#'
#' @return A `tf.Dataset`
#' @export
#'
#' @examples
#' \dontrun{
#' initial_dist <- c(.5, .5)
#' target_dist <- c(.6, .4)
#' num_classes <- length(initial_dist)
#' num_samples <- 100000
#' data <- sample.int(num_classes, num_samples, prob = initial_dist, replace = TRUE)
#' dataset <- tensor_slices_dataset(data)
#' tally <- c(0, 0)
#' `add<-` <- function (x, value) x + value
#' # tfautograph::autograph({
#' # for(i in dataset)
#' # add(tally[as.numeric(i)]) <- 1
#' # })
#' dataset %>%
#' as_array_iterator() %>%
#' iterate(function(i) {
#' add(tally[i]) <<- 1
#' }, simplify = FALSE)
#' # The value of `tally` will be close to c(50000, 50000) as
#' # per the `initial_dist` distribution.
#' tally # c(50287, 49713)
#'
#' tally <- c(0, 0)
#' dataset %>%
#' dataset_rejection_resample(
#' class_func = function(x) (x-1) %% 2,
#' target_dist = target_dist,
#' initial_dist = initial_dist
#' ) %>%
#' as_array_iterator() %>%
#' iterate(function(element) {
#' names(element) <- c("class_id", "i")
#' add(tally[element$i]) <<- 1
#' }, simplify = FALSE)
#' # The value of tally will be now be close to c(75000, 50000)
#' # thus satisfying the target_dist distribution.
#' tally # c(74822, 49921)
#' }
dataset_rejection_resample <-
function(dataset,
class_func,
target_dist,
initial_dist = NULL,
seed = NULL,
name = NULL)
{
require_tf_version("2.7", "dataset_rejection_resample")
args <- capture_args(match.call(),
list(class_func = as_py_function,
seed = as_integer_tensor),
ignore = "dataset")
as_tf_dataset(do.call(dataset$rejection_resample, args))
}
#' A transformation that discards duplicate elements of a Dataset.
#'
#' Use this transformation to produce a dataset that contains one instance of
#' each unique element in the input (See example).
#'
#' @note This transformation only supports datasets which fit into memory and
#' have elements of either tf.int32, tf.int64 or tf.string type.
#'
#' @param dataset A tf.Dataset.
#' @param name (Optional.) A name for the tf.data operation.
#'
#' @return A tf.Dataset
#' @export
#'
#' @examples
#' \dontrun{
#' c(0, 37, 2, 37, 2, 1) %>% as_tensor("int32") %>%
#' tensor_slices_dataset() %>%
#' dataset_unique() %>%
#' as_array_iterator() %>% iterate() %>% sort()
#' # [1] 0 1 2 37
#' }
dataset_unique <- function(dataset, name=NULL) {
require_tf_version("2.6", "dataset_unique")
args <- list()
args$name <- name
as_tf_dataset(do.call(dataset$unique, args))
}
#' Prepare a dataset for analysis
#'
#' Transform a dataset with named columns into a list with features (`x`) and
#' response (`y`) elements.
#'
#' @inheritParams dataset_decode_delim
#'
#' @param dataset A dataset
#'
#' @param x Features to include. When `named_features` is `FALSE` all features
#' will be stacked into a single tensor so must have an identical data type.
#'
#' @param y (Optional). Response variable.
#'
#' @param named `TRUE` to name the dataset elements "x" and "y", `FALSE` to
#' not name the dataset elements.
#'
#' @param named_features `TRUE` to yield features as a named list; `FALSE` to
#' stack features into a single array. Note that in the case of `FALSE` (the
#' default) all features will be stacked into a single 2D tensor so need to
#' have the same underlying data type.
#'
#' @param batch_size (Optional). Batch size if you would like to fuse the
#' `dataset_prepare()` operation together with a `dataset_batch()` (fusing
#' generally improves overall training performance).
#'
#' @inheritParams dataset_map_and_batch
#'
#' @return A dataset. The dataset will have a structure of either:
#'
#' - When `named_features` is `TRUE`: `list(x = list(feature_name = feature_values, ...), y = response_values)`
#'
#' - When `named_features` is `FALSE`: `list(x = features_array, y = response_values)`,
#' where `features_array` is a Rank 2 array of `(batch_size, num_features)`.
#'
#' Note that the `y` element will be omitted when `y` is `NULL`.
#'
#' @seealso [input_fn()][input_fn.tf_dataset()] for use with \pkg{tfestimators}.
#'
#' @export
dataset_prepare <- function(dataset, x, y = NULL, named = TRUE, named_features = FALSE,
parallel_records = NULL,
batch_size = NULL,
num_parallel_batches = NULL,
drop_remainder = FALSE) {
# validate dataset
if (!is_dataset(dataset))
stop("Provided dataset is not a TensorFlow Dataset")
# default to null response_col
response_col <- NULL
# get features
col_names <- column_names(dataset)
eq_features <- rlang::enquo(x)
# attempt use of tidyselect. if there is an error it could be because 'x'
# is a formula. in that case attempt to parse the formula
feature_col_names <- tryCatch({
tidyselect::vars_select(col_names, !! eq_features)
},
error = function(e) {
x <- get_expr(eq_features)
if (is_formula(x)) {
data <- lapply(column_names(dataset), function(x) "")
names(data) <- column_names(dataset)
data <- as.data.frame(data)
parsed <- parse_formula(x, data)
if (!is.null(parsed$response))
response_col <<- match(parsed$response, col_names)
parsed$features
} else {
stop(e$message, call. = FALSE)
}
})
# get column indexes
feature_cols <- match(feature_col_names, col_names)
# get response if specified
if (!missing(y) && is.null(response_col)) {
eq_response <- rlang::enquo(y)
response_name <- tidyselect::vars_select(col_names, !! eq_response)
if (length(response_name) > 0) {
if (length(response_name) != 1)
stop("Invalid response column: ", paste(response_name))
response_col <- match(response_name, col_names)
}
}
# mapping function
map_func <- function(record) {
# `make_csv_dataset` returns an ordered dict instead of a `dict`
# which in turn doesn't get automatically converted by reticulate.
if (inherits(record, "python.builtin.dict"))
record <- reticulate::py_to_r(record)
# select features
record_features <- record[feature_cols]
# apply names to features if named
if (named_features) {
names(record_features) <- feature_col_names
# otherwise stack features into a single tensor
} else {
record_features <- unname(record_features)
# determine the axis based on the shape of the tensor
# (unbatched tensors will be scalar with no shape,
# so will stack on axis 0)
shape <- record_features[[1]]$get_shape()$as_list()
axis <- length(shape)
record_features <- tf$stack(record_features, axis = axis)
}
# massage the record into the approriate structure
if (!is.null(response_col)) {
record <- list(record_features, record[[response_col]])
if (named)
names(record) <- c("x", "y")
}
else {
record <- list(record_features)
if (named)
names(record) <- c("x")
}
# return the record
record
}
# call appropriate mapping function
if (is.null(batch_size)) {
dataset <- dataset %>%
dataset_map(map_func = map_func,
num_parallel_calls = parallel_records)
} else {
dataset <- dataset %>%
dataset_map_and_batch(map_func = map_func,
batch_size = batch_size,
num_parallel_batches = num_parallel_batches,
drop_remainder = drop_remainder,
num_parallel_calls = parallel_records)
}
# return dataset
as_tf_dataset(dataset)
}
#' Unbatch a dataset
#'
#' Splits elements of a dataset into multiple elements.
#'
#' @param dataset A dataset
#' @param name (Optional.) A name for the tf.data operation.
#' @export
dataset_unbatch <- function(dataset, name = NULL) {
dataset$unbatch(name)
}
#' Add the tf_dataset class to a dataset
#'
#' Calling this function on a dataset adds the "tf_dataset" class to the dataset
#' object. All datasets returned by functions in the \pkg{tfdatasets} package
#' call this function on the dataset before returning it.
#'
#' @param dataset A dataset
#'
#' @return A dataset with class "tf_dataset"
#'
#' @keywords internal
#'
#' @export
as_tf_dataset <- function(dataset) {
# validate dataset
if (!is_dataset(dataset))
stop("Provided dataset is not a TensorFlow Dataset")
# add class if needed
if (!inherits(dataset, "tf_dataset"))
class(dataset) <- c("tf_dataset", class(dataset))
# return
dataset
}
#' Combines input elements into a dataset of windows.
#'
#' @param dataset A dataset
#' @param size representing the number of elements of the input dataset to
#' combine into a window.
#' @param shift epresenting the forward shift of the sliding window in each
#' iteration. Defaults to `size`.
#' @param stride representing the stride of the input elements in the sliding
#' window.
#' @param drop_remainder representing whether a window should be dropped in
#' case its size is smaller `than window_size`.
#'
#' @family dataset methods
#'
#' @export
dataset_window <- function(dataset, size, shift = NULL, stride = 1,
drop_remainder = FALSE) {
as_tf_dataset(
dataset$window(
size = as_integer_tensor(size),
shift = as_integer_tensor(shift),
stride = as_integer_tensor(stride),
drop_remainder = drop_remainder
)
)
}
#' Collects a dataset
#'
#' Iterates throught the dataset collecting every element into a list.
#' It's useful for looking at the full result of the dataset.
#' Note: You may run out of memory if your dataset is too big.
#'
#' @param dataset A dataset
#' @param iter_max Maximum number of iterations. `Inf` until the end of the
#' dataset
#'
#' @family dataset methods
#'
#' @export
dataset_collect <- function(dataset, iter_max = Inf) {
if (tensorflow::tf_version() < "2.0")
stop("dataset_collect requires TF 2.0", call.=FALSE)
it <- reticulate::as_iterator(dataset)
out <- list()
i <- 0
while(!is.null(x <- reticulate::iter_next(it))) {
i <- i + 1
out[[i]] <- x
if (i >= iter_max) break
}
out
}
#' Reduces the input dataset to a single element.
#'
#' The transformation calls reduce_func successively on every element of the input dataset
#' until the dataset is exhausted, aggregating information in its internal state.
#' The initial_state argument is used for the initial state and the final state is returned as the result.
#'
#' @param dataset A dataset
#' @param initial_state An element representing the initial state of the transformation.
#' @param reduce_func A function that maps `(old_state, input_element)` to new_state.
#' It must take two arguments and return a new element.
#' The structure of new_state must match the structure of initial_state.
#'
#' @return A dataset element.
#'
#' @family dataset methods
#'
#' @export
dataset_reduce <- function(dataset, initial_state, reduce_func) {
dataset$reduce(initial_state, reduce_func)
}
#' Get or Set Dataset Options
#'
#' @param dataset a tensorflow dataset
#' @param ... Valid values include:
#'
#' + A set of named arguments setting options. Names of nested attributes can
#' be separated with a `"."` (see examples). The set of named arguments can be
#' supplied individually to `...`, or as a single named list.
#'
#' + a `tf$data$Options()` instance.
#'
#'
#' @return If values are supplied to `...`, returns a `tf.data.Dataset` with the
#' given options set/updated. Otherwise, returns the currently set options for
#' the dataset.
#'
#' @details The options are "global" in the sense they apply to the entire
#' dataset. If options are set multiple times, they are merged as long as
#' different options do not use different non-default values.
#'
#'
#' @export
#' @examples
#' \dontrun{
#' # pass options directly:
#' range_dataset(0, 10) %>%
#' dataset_options(
#' experimental_deterministic = FALSE,
#' threading.private_threadpool_size = 10
#' )
#'
#' # pass options as a named list:
#' opts <- list(
#' experimental_deterministic = FALSE,
#' threading.private_threadpool_size = 10
#' )
#' range_dataset(0, 10) %>%
#' dataset_options(opts)
#'
#' # pass a tf.data.Options() instance
#' opts <- tf$data$Options()
#' opts$experimental_deterministic <- FALSE
#' opts$threading$private_threadpool_size <- 10L
#' range_dataset(0, 10) %>%
#' dataset_options(opts)
#'
#' # get currently set options
#' range_dataset(0, 10) %>% dataset_options()
#' }
dataset_options <- function(dataset, ...) {
user_opts <- list(...)
if(!length(user_opts))
return(dataset$options())
options <- tf$data$Options()
# accept a packed list of arguments, don't required do.call for programming
if(is.null(names(user_opts)) &&
length(user_opts) == 1 &&
is.list(user_opts[[1]]))
user_opts <- user_opts[[1]]
for (i in seq_along(user_opts)) {
name <- names(user_opts)[i]
val <- user_opts[[i]]
if (inherits(val, c("tensorflow.python.data.ops.dataset_ops.Options",
"tensorflow.python.data.ops.options.Options"))) {
options <- options$merge(val)
next
}
# special convenience hooks for some known options, with a no-op fallback
transform <- switch(name,
"threading.private_threadpool_size" = as.integer,
"threading.max_intra_op_parallelism" = as.integer,
"experimental_distribute.num_devices" = as.integer,
identity
)
val <- transform(val)
# change names like "foo.bar.baz" to an R expression like
# `options$foo$bar$baz`, but with some semblance of safety by avoiding
# parse(), using as.symbol() on user supplied names, and constructing the
# call we want directly. We do this to avoid hand-coding a recursive impl
# using py_set_attr(), and let the R's `$<-` method do the recursion.
target <- Reduce(
function(x, y) substitute(x$y, list(x = x, y = as.symbol(y))),
strsplit(name, ".", fixed = TRUE)[[1]],
init = quote(options))
expr <- substitute(target <- val, list(target = target))
eval(expr)
}
as_tf_dataset(dataset$with_options(options))
}
#' Get Dataset length
#'
#' Returns the length of the dataset.
#'
#' @param x a `tf.data.Dataset` object.
#'
#' @return Either `Inf` if the dataset is infinite, `NA` if the dataset length
#' is unknown, or an R numeric if it is known.
#' @export
#' @importFrom tensorflow tf_version
#' @examples
#' \dontrun{
#' range_dataset(0, 42) %>% length()
#' # 42
#'
#' range_dataset(0, 42) %>% dataset_repeat() %>% length()
#' # Inf
#'
#' range_dataset(0, 42) %>% dataset_repeat() %>%
#' dataset_filter(function(x) TRUE) %>% length()
#' # NA
#' }
length.tf_dataset <- function(x) {
if (tf_version() >= "2.3") {
l <- x$cardinality()$numpy()
car_inf <- tf$data$INFINITE_CARDINALITY
car_unk <- tf$data$UNKNOWN_CARDINALITY
} else {
l <- tf$data$experimental$cardinality(x)$numpy()
car_inf <- tf$data$experimental$INFINITE_CARDINALITY
car_unk <- tf$data$experimental$UNKNOWN_CARDINALITY
}
if (l == car_inf)
Inf
else if (l == car_unk)
NA
else
l
}
#' @export
#' @rdname length.tf_dataset
length.tensorflow.python.data.ops.dataset_ops.DatasetV2 <- length.tf_dataset
#' Enumerates the elements of this dataset
#'
#' @details It is similar to python's `enumerate`, this transforms a sequence of
#' elements into a sequence of `list(index, element)`, where index is an integer
#' that indicates the position of the element in the sequence.
#'
#' @param dataset A tensorflow dataset
#' @param start An integer (coerced to a `tf$int64` scalar `tf.Tensor`),
#' representing the start value for enumeration.
#'
#' @export
#' @examples
#' \dontrun{
#' dataset <- tensor_slices_dataset(100:103) %>%
#' dataset_enumerate()
#'
#' iterator <- reticulate::as_iterator(dataset)
#' reticulate::iter_next(iterator) # list(0, 100)
#' reticulate::iter_next(iterator) # list(1, 101)
#' reticulate::iter_next(iterator) # list(2, 102)
#' reticulate::iter_next(iterator) # list(3, 103)
#' reticulate::iter_next(iterator) # NULL (iterator exhausted)
#' reticulate::iter_next(iterator) # NULL (iterator exhausted)
#' }
dataset_enumerate <- function(dataset, start=0L) {
as_tf_dataset(dataset$enumerate(as_integer_tensor(start)))
}
#' Creates a `Dataset` of pseudorandom values
#'
#' @details
#' The dataset generates a sequence of uniformly distributed integer values (dtype int64).
#'
#' @param seed (Optional) If specified, the dataset produces a deterministic
#' sequence of values.
#'
#' @export
random_integer_dataset <- function(seed = NULL) {
if (tf_version() >= "2.6")
as_tf_dataset(tf$data$Dataset$random(as_integer_tensor(seed)))
else
as_tf_dataset(tf$data$experimental$RandomDataset(as_integer_tensor(seed)))
}
#' A transformation that scans a function across an input dataset
#'
#' @details
#' This transformation is a stateful relative of `dataset_map()`.
#' In addition to mapping `scan_func` across the elements of the input dataset,
#' `scan()` accumulates one or more state tensors, whose initial values are
#' `initial_state`.
#'
#' @param dataset A tensorflow dataset
#'
#' @param initial_state A nested structure of tensors, representing the initial
#' state of the accumulator.
#'
#' @param scan_func A function that maps `(old_state, input_element)` to
#' `(new_state, output_element)`. It must take two arguments and return a
#' pair of nested structures of tensors. The `new_state` must match the
#' structure of `initial_state`.
#'
#' @export
#' @examples
#' \dontrun{
#' initial_state <- as_tensor(0, dtype="int64")
#' scan_func <- function(state, i) list(state + i, state + i)
#' dataset <- range_dataset(0, 10) %>%
#' dataset_scan(initial_state, scan_func)
#'
#' reticulate::iterate(dataset, as.array) %>%
#' unlist()
#' # 0 1 3 6 10 15 21 28 36 45
#' }
dataset_scan <- function(dataset, initial_state, scan_func) {
if(tf_version() >= "2.6")
as_tf_dataset(dataset$scan(initial_state, as_py_function(scan_func)))
else {
dataset$apply(
tf$data$experimental$scan(initial_state, as_py_function(scan_func))
)
}
}
#' Persist the output of a dataset
#'
#' @details
#' The snapshot API allows users to transparently persist the output of their
#' preprocessing pipeline to disk, and materialize the pre-processed data on a
#' different training run.
#'
#' This API enables repeated preprocessing steps to be consolidated, and allows
#' re-use of already processed data, trading off disk storage and network
#' bandwidth for freeing up more valuable CPU resources and accelerator compute
#' time.
#'
#' https://github.com/tensorflow/community/blob/master/rfcs/20200107-tf-data-snapshot.md
#' has detailed design documentation of this feature.
#'
#' Users can specify various options to control the behavior of snapshot,
#' including how snapshots are read from and written to by passing in
#' user-defined functions to the `reader_func` and `shard_func` parameters.
#'
#' `shard_func` is a user specified function that maps input elements to
#' snapshot shards.
#'
# If `shard_func` is not supplied, the equivalent action is performed:
#' ```R
#' NUM_SHARDS <- parallel::detectCores()
#' dataset %>%
#' dataset_enumerate() %>%
#' dataset_snapshot(
#' "/path/to/snapshot/dir",
#' shard_func = function(index, ds_elem) x %% NUM_SHARDS) %>%
#' dataset_map(function(index, ds_elem) ds_elem)
#' ```
#'
#' `reader_func` is a user specified function that accepts a single argument:
#' a Dataset of Datasets, each representing a "split" of elements of the
#' original dataset. The cardinality of the input dataset matches the
#' number of the shards specified in the `shard_func`. The function
#' should return a Dataset of elements of the original dataset.
#'
#' Users may want specify this function to control how snapshot files should be
#' read from disk, including the amount of shuffling and parallelism.
#'
#' Here is an example of a standard reader function a user can define. This
#' function enables both dataset shuffling and parallel reading of datasets:
#'
#' ````R
#' user_reader_func <- function(datasets) {
#' num_cores <- parallel::detectCores()
#' datasets %>%
#' dataset_shuffle(num_cores) %>%
#' dataset_interleave(function(x) x, num_parallel_calls=AUTOTUNE)
#' }
#'
#' dataset <- dataset %>%
#' dataset_snapshot("/path/to/snapshot/dir",
#' reader_func = user_reader_func)
#' ````
#'
#' By default, snapshot parallelizes reads by the number of cores available on
#' the system, but will not attempt to shuffle the data.
#'
#' @param dataset A tensorflow dataset
#'
#' @param path Required. A directory to use for storing/loading the snapshot to/from.
#'
#' @param compression Optional. The type of compression to apply to the snapshot
#' written to disk. Supported options are `"GZIP"`, `"SNAPPY"`, `"AUTO"` or
#' `NULL` (values of `""`, `NA`, and `"None"` are synonymous with `NULL`)
#' Defaults to `AUTO`, which attempts to pick an appropriate compression
#' algorithm for the dataset.
#'
#' @param reader_func Optional. A function to control how to read data from
#' snapshot shards.
#'
#' @param shard_func Optional. A function to control how to shard data when writing
#' a snapshot.
#'
#' @export
dataset_snapshot <- function(dataset, path, compression=c("AUTO", "GZIP", "SNAPPY", "None"),
reader_func=NULL, shard_func=NULL) {
if(identical(compression, ""))
compression <- NULL
else if(!is.null(compression)) {
compression <- match.arg(compression)
if(compression == "None")
compression <- NULL
}
if (!is.null(reader_func))
reader_func <- as_py_function(reader_func)
if (!is.null(shard_func))
shard_func <- as_py_function(shard_func)
args <- list(path, compression=compression,
reader_func = reader_func,
shard_func = shard_func)
if(tf_version() >= "2.6")
do.call(dataset$snapshot, args)
else
dataset$apply(do.call(tf$data$experimental$snapshot, args))
}
#' Convert tf_dataset to an iterator that yields R arrays.
#'
#' @param dataset A tensorflow dataset
#'
#' @return An iterable. Use [`iterate()`] or [`iter_next()`] to access values from the iterator.
#' @export
as_array_iterator <- function(dataset) {
dataset$as_numpy_iterator()
}
#' Group windows of elements by key and reduce them
#'
#' @details This transformation maps each consecutive element in a dataset to a
#' key using `key_func()` and groups the elements by key. It then applies
#' `reduce_func()` to at most `window_size_func(key)` elements matching the same
#' key. All except the final window for each key will contain
#' `window_size_func(key)` elements; the final window may be smaller.
#'
#' You may provide either a constant `window_size` or a window size determined
#' by the key through `window_size_func`.
#'
#' ````r
#' window_size <- 5
#' dataset <- range_dataset(to = 10) %>%
#' dataset_group_by_window(
#' key_func = function(x) x %% 2,
#' reduce_func = function(key, ds) dataset_batch(ds, window_size),
#' window_size = window_size
#' )
#'
#' it <- as_array_iterator(dataset)
#' while (!is.null(elem <- iter_next(it)))
#' print(elem)
#' #> tf.Tensor([0 2 4 6 8], shape=(5), dtype=int64)
#' #> tf.Tensor([1 3 5 7 9], shape=(5), dtype=int64)
#' ````
#'
#' @param dataset a TF Dataset
#'
#' @param key_func A function mapping a nested structure of tensors (having
#' shapes and types defined by `self$output_shapes` and `self$output_types`)
#' to a scalar `tf.int64` tensor.
#'
#' @param reduce_func A function mapping a key and a dataset of up to
#' `window_size` consecutive elements matching that key to another dataset.
#'
#' @param window_size A `tf.int64` scalar `tf.Tensor`, representing the number
#' of consecutive elements matching the same key to combine in a single batch,
#' which will be passed to `reduce_func`. Mutually exclusive with
#' `window_size_func`.
#'
#' @param window_size_func A function mapping a key to a `tf.int64` scalar
#' `tf.Tensor`, representing the number of consecutive elements matching the
#' same key to combine in a single batch, which will be passed to
#' `reduce_func`. Mutually exclusive with `window_size`.
#'
#' @param name (Optional.) A name for the Tensorflow operation.
#'
#' @seealso
#' + <https://www.tensorflow.org/api_docs/python/tf/data/Dataset#group_by_window>
#' @export
dataset_group_by_window <-
function(dataset, key_func, reduce_func,
window_size = NULL,
window_size_func = NULL,
name = NULL) {
if(!is.null(window_size_func))
window_size_func <- as_py_function(window_size_func)
as_tf_dataset(dataset$group_by_window(
key_func = as_py_function(key_func),
reduce_func = as_py_function(reduce_func),
window_size = as_integer_tensor(window_size),
window_size_func = window_size_func,
name = name
))
}
#' Get the single element of the dataset.
#'
#' The function enables you to use a TF Dataset in a stateless "tensor-in
#' tensor-out" expression, without creating an iterator. This facilitates the
#' ease of data transformation on tensors using the optimized TF Dataset
#' abstraction on top of them.
#'
#' For example, consider a `preprocess_batch()` which would take as an input
#' a batch of raw features and returns the processed feature.
#'
#' ```r
#' preprocess_one_case <- function(x) x + 100
#'
#' preprocess_batch <- function(raw_features) {
#' batch_size <- dim(raw_features)[1]
#' ds <- raw_features %>%
#' tensor_slices_dataset() %>%
#' dataset_map(preprocess_one_case, num_parallel_calls = batch_size) %>%
#' dataset_batch(batch_size)
#' as_tensor(ds)
#' }
#'
#' raw_features <- array(seq(prod(4, 5)), c(4, 5))
#' preprocess_batch(raw_features)
#' ````
#'
#' In the above example, the batch of `raw_features` was converted to a TF
#' Dataset. Next, each of the raw_feature cases in the batch was mapped using
#' the preprocess_one_case and the processed features were grouped into a single
#' batch. The final dataset contains only one element which is a batch of all
#' the processed features.
#'
#' Note: The dataset should contain only one element. Now, instead of creating
#' an iterator for the dataset and retrieving the batch of features, the
#' `as_tensor()` function is used to skip the iterator creation process and
#' directly output the batch of features.
#'
#' This can be particularly useful when your tensor transformations are
#' expressed as TF Dataset operations, and you want to use those transformations
#' while serving your model.
#'
#' @param x A TF Dataset
#' @param name (Optional.) A name for the TensorFlow operation.
#' @param ... passed on to `tensorflow::as_tensor()`
#' @seealso
#' + <https://www.tensorflow.org/api_docs/python/tf/data/Dataset#get_single_element>
#'
#' @export
#' @rdname as_tensor.tf_dataset
#' @aliases get_single_element
#' @importFrom tensorflow as_tensor
as_tensor.tensorflow.python.data.ops.dataset_ops.DatasetV2 <- function(x, ..., name = NULL) {
tensor <- x$get_single_element(name = name)
if(length(list(...)))
tensorflow::as_tensor(tensor, ..., name = name)
else
tensor
}
#' @rdname as_tensor.tf_dataset
#' @export
as.array.tensorflow.python.data.ops.dataset_ops.DatasetV2 <- function(x, ...)
as.array(as_tensor.tensorflow.python.data.ops.dataset_ops.DatasetV2(x, ...))
#' @export
tensorflow::as_tensor
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.