Nothing
#' Generate individual vectors of the types supported by vroom
#'
#' @param n The size of the vector to generate
#' @param min The minimum range for the vector
#' @param max The maximum range for the vector
#' @param values The explicit values to use.
#' @param f The random function to use.
#' @inheritParams base::sample.int
#' @param ... Additional arguments passed to internal generation functions
#' @name generators
#' @examples
#' # characters
#' gen_character(4)
#'
#' # factors
#' gen_factor(4)
#'
#' # logical
#' gen_logical(4)
#'
#' # numbers
#' gen_double(4)
#' gen_integer(4)
#'
#' # temporal data
#' gen_time(4)
#' gen_date(4)
#' gen_datetime(4)
#' @export
gen_character <- function(
n,
min = 5,
max = 25,
values = c(letters, LETTERS, 0:9),
...
) {
if (min > max) {
max <- min
}
# The seed for the C++ RNG used is an unsigned 32 bit integer, which is why I
# multiply int max by 2. Possibly an off by one error here though...
seeds <- sample.int(2 * .Machine$integer.max, 2)
gen_character_(
n,
min,
max,
paste(values, collapse = ""),
seeds[[1]],
seeds[[2]]
)
}
#' @rdname generators
#' @export
gen_double <- function(n, f = stats::rnorm, ...) {
f(n, ...)
}
#' @rdname generators
#' @export
gen_number <- gen_double
#' @rdname generators
#' @export
gen_integer <- function(
n,
min = 1L,
max = .Machine$integer.max,
prob = NULL,
...
) {
max <- max - min + 1L
sample.int(max, size = n, replace = TRUE, prob = prob) + min - 1L
}
#' @rdname generators
#' @param num_levels The number of factor levels to generate
#' @param ordered Should the factors be ordered factors?
#' @param levels The explicit levels to use, if `NULL` random levels are generated using [gen_name()].
#' @export
gen_factor <- function(
n,
levels = NULL,
ordered = FALSE,
num_levels = gen_integer(1L, 1L, 25L),
...
) {
if (is.null(levels)) {
levels <- gen_name(num_levels)
}
res <- gen_integer(n, max = length(levels), ...)
attr(res, "levels") <- levels
if (ordered) {
class(res) <- c("ordered", "factor")
} else {
class(res) <- "factor"
}
res
}
#' @rdname generators
#' @param fractional Whether to generate times with fractional seconds
#' @export
gen_time <- function(
n,
min = 0,
max = hms::hms(days = 1),
fractional = FALSE,
...
) {
res <- hms::hms(seconds = stats::runif(n, min = min, max = max))
if (!fractional) {
res <- hms::as_hms(floor(res))
}
res
}
#' @rdname generators
#' @export
gen_date <- function(
n,
min = as.Date("2001-01-01"),
max = as.Date("2021-01-01"),
...
) {
structure(as.numeric(gen_integer(n, min = min, max = max)), class = "Date")
}
#' @rdname generators
#' @param tz The timezone to use for dates
#' @export
gen_datetime <- function(
n,
min = as.POSIXct("2001-01-01"),
max = as.POSIXct("2021-01-01"),
tz = "UTC",
...
) {
structure(
stats::runif(n, min = min, max = max),
class = c("POSIXct", "POSIXt"),
tzone = tz
)
}
#' @rdname generators
#' @export
gen_logical <- function(n, ...) {
c(TRUE, FALSE)[sample.int(n = 2, n, replace = TRUE)]
}
all_col_types <- tibble::tribble(
~type , ~class ,
"character" , "character" ,
"factor" , "character" ,
"double" , "numeric" ,
"integer" , "numeric" ,
"number" , "numeric" ,
"date" , "temporal" ,
"datetime" , "temporal" ,
"time" , "temporal" ,
)
#' Generate a random tibble
#'
#' This is useful for benchmarking, but also for bug reports when you cannot
#' share the real dataset.
#'
#' There is also a family of functions to generate individual vectors of each
#' type.
#'
#' @param rows Number of rows to generate
#' @param cols Number of columns to generate, if `NULL` this is derived from `col_types`.
#' @param missing The percentage (from 0 to 1) of missing data to use
#' @seealso [generators] to generate individual vectors.
#' @inheritParams vroom
#' @examples
#' # random 10 x 5 table with random column types
#' rand_tbl <- gen_tbl(10, 5)
#' rand_tbl
#'
#' # all double 25 x 4 table
#' dbl_tbl <- gen_tbl(25, 4, col_types = "dddd")
#' dbl_tbl
#'
#' # Use the dots in long form column types to change the random function and options
#' types <- rep(times = 4, list(col_double(f = stats::runif, min = -10, max = 25)))
#' types
#' dbl_tbl2 <- gen_tbl(25, 4, col_types = types)
#' dbl_tbl2
#' @export
gen_tbl <- function(
rows,
cols = NULL,
col_types = NULL,
locale = default_locale(),
missing = 0
) {
if (is.null(cols) && is.null(col_types)) {
cli::cli_abort("One of {.arg cols} or {.arg col_types} must be set.")
}
spec <- as.col_spec(col_types)
if (is.null(cols)) {
cols <- length(spec$cols)
}
nms <- make_names(names(spec$cols), cols)
specs <- col_types_standardise(
spec,
length(nms),
nms,
vroom_enquo(quo(NULL)),
"unique"
)
res <- vector("list", cols)
for (i in seq_len(cols)) {
type <- sub("collector_", "", class(specs$cols[[i]])[[1]])
if (type == "guess") {
type <- sample(all_col_types[["type"]], 1)
specs$cols[[i]] <- do.call(paste0("col_", type), list())
}
fun_nme <- paste0("gen_", type)
res[[i]] <- do.call(fun_nme, c(rows, specs$cols[[i]]))
}
if (missing > 0) {
res[] <- lapply(res, function(x) {
x[sample(
c(TRUE, FALSE),
size = rows,
prob = c(missing, 1 - missing),
replace = TRUE
)] <- NA
x
})
}
names(res) <- nms
attr(res, "spec") <- specs
tibble::as_tibble(res)
}
# Name and adjective list from https://github.com/rstudio/cranwhales/blob/93349fe1bc790f115a3d56660b6b99ffe258d9a2/random-names.R
#' @rdname generators
#' @export
gen_name <- local({
# This will run during build / installation, but that is OK
adjectives <- readLines(system.file(
"words",
"adjectives.txt",
package = "vroom"
))
animals <- readLines(system.file("words", "animals.txt", package = "vroom"))
function(n) {
paste0(
sample(adjectives, n, replace = TRUE),
"_",
sample(animals, n, replace = TRUE)
)
}
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.