# synthetic - R package for synthetic dataset creation and serialization benchmarks
#
# Copyright (C) 2019-present, Mark AJ Klik
#
# This file is part of the synthetic R package.
#
# The synthetic R package is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License version 3 as
# published by the Free Software Foundation.
#
# The synthetic R package is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
# for more details.
#
# You should have received a copy of the GNU Affero General Public License along
# with the synthetic R package. If not, see <http://www.gnu.org/licenses/>.
#
# You can contact the author at:
# - synthetic R package source repository : https://github.com/fstpackage/synthetic
numerical_model_printer <- function(metadata) {
cat(italic(cyan("numerical model based vector template")), "\n")
cat(cyan("- values between "), metadata$values[1] + 0.5 * metadata$derivatives[1],
cyan(" and "), tail(metadata$values, 1) + 0.5 * tail(metadata$derivatives, 1),
"\n", sep = "")
}
template_from_column <- function(column) {
# implement this method in C for speed
# numeric columns
if (typeof(column) == "double") {
return(dbl_template_from_column(column))
}
stop("Unknown column format")
}
#' Create the blueprints of a synthetic table
#'
#' @param id ID of the synthetic table (e.g. 'fst homepage dataset')
#' @param ... column templates created with the various template_ methods or a single dataset
#' @param construct_model If parameter ... is a table, this parameter is used to determine how
#' the table model is generated. When `construct_model = TRUE`, the columns from the table
#' are used to construct an approximation of the column's distribution. This approximation
#' is used to generate the actual data. When `construct_model = FALSE`, the whole table is
#' stored and generation of new columns is done by taking random samples from the original
#' table.
#'
#' @return a synthetic table template that can be used to generate synthetic data using generate()
#' @export
synthetic_table <- function(..., id = "synthetic table", construct_model = FALSE) {
column_definitions <- list(...)
if (length(column_definitions) == 0) {
stop("Please specify at least one column template to define the table. Alternatively, you ",
"can specify a table that will be used as the source for generating synthetic data.")
}
if (length(column_definitions) == 1) {
if (inherits(column_definitions[[1]], "data.frame")) {
setDT(column_definitions[[1]])
# small tables just get stored
if (!construct_model) {
x <- structure(
list(
id = id,
columns = NULL,
source_table = column_definitions[[1]]
),
class = "tabletemplate"
)
return(x)
}
x <- structure(
list(
id = id,
columns = template_from_table(column_definitions[[1]]),
source_table = NULL
),
class = "tabletemplate"
)
return(x)
}
}
lapply(column_definitions, function(col_def) {
if (!("vectortemplate") %in% class(col_def)) {
stop("Incorrect column definitions, columns must be vector templates",
" generated with one of the template_ methods")
}
})
x <- list(
id = id,
columns = column_definitions,
source_table = NULL
)
class(x) <- "tabletemplate"
x
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.