Nothing
# __________________ #< 48020236dfd0dbd89a97a74647216c72 ># __________________
# Basic pipeline ####
# NOTE: Any R6 method called for its side effects should return invisible(self).
# Applies one transformation at a time
# With the same arguments for all groups
#' @title Chain multiple transformations
#' @description
#' \Sexpr[results=rd, stage=render]{lifecycle::badge("experimental")}
#'
#' Build a pipeline of transformations to be applied sequentially.
#'
#' Uses the same arguments for all groups in \code{`data`}.
#'
#' Groupings are reset between each transformation. See `group_cols`.
#'
#' \strong{Standard workflow}: Instantiate pipeline -> Add transformations -> Apply to data
#'
#' To apply different argument values to each group, see
#' \code{\link[rearrr:GeneratedPipeline]{GeneratedPipeline}} for generating
#' argument values for an arbitrary number of groups and
#' \code{\link[rearrr:FixedGroupsPipeline]{FixedGroupsPipeline}} for specifying
#' specific values for a fixed set of groups.
#' @author Ludvig Renbo Olsen, \email{r-pkgs@@ludvigolsen.dk}
#' @export
#' @family pipelines
#' @examples
#' # Attach package
#' library(rearrr)
#'
#' # Create a data frame
#' df <- data.frame(
#' "Index" = 1:12,
#' "A" = c(1:4, 9:12, 15:18),
#' "G" = rep(1:3, each = 4)
#' )
#'
#' # Create new pipeline
#' pipe <- Pipeline$new()
#'
#' # Add 2D rotation transformation
#' # Note that we specify the grouping via `group_cols`
#' pipe$add_transformation(
#' fn = rotate_2d,
#' args = list(
#' x_col = "Index",
#' y_col = "A",
#' origin = c(0, 0),
#' degrees = 45,
#' suffix = "",
#' overwrite = TRUE
#' ),
#' name = "Rotate",
#' group_cols = "G"
#' )
#'
#' # Add the `cluster_group` transformation
#' # Note that this function requires the entire input data
#' # to properly scale the groups. We therefore specify `group_cols`
#' # as part of `args`. This works as `cluster_groups()` accepts that
#' # argument.
#' pipe$add_transformation(
#' fn = cluster_groups,
#' args = list(
#' cols = c("Index", "A"),
#' suffix = "",
#' overwrite = TRUE,
#' multiplier = 0.05,
#' group_cols = "G"
#' ),
#' name = "Cluster"
#' )
#'
#' # Check pipeline object
#' pipe
#'
#' # Apply pipeline to data.frame
#' # Enable `verbose` to print progress
#' pipe$apply(df, verbose = TRUE)
Pipeline <- R6::R6Class(
"Pipeline",
public = list(
#' @field transformations \code{list} of transformations to apply.
transformations = list(),
#' @field names Names of the transformations.
names = character(),
#' @description
#' Add a transformation to the pipeline.
#' @param fn Function that performs the transformation.
#' @param args Named \code{list} with arguments for the \code{`fn`} function.
#' @param name Name of the transformation step. Must be unique.
#' @param group_cols Names of the columns to group the input
#' data by before applying the transformation.
#'
#' Note that the transformation function is applied separately to each group (subset).
#' If the \code{`fn`} function requires access to the entire \code{data.frame}, the
#' grouping columns should be specified as part of \code{`args`} and
#' handled by the \code{`fn`} function.
#' @return The pipeline. To allow chaining of methods.
add_transformation = function(fn, args, name, group_cols = NULL){
if (name %in% self$names){
stop(paste0("the `name`, ", name, ", already exists. Names must be unique."))
}
# Add transformation to pipeline
self$names <- append(self$names, name)
transformation <- Transformation$new(fn = fn, args = args,
name = name, group_cols = group_cols)
self$transformations <- c(self$transformations, setNames(list(transformation), name))
# Return object invisibly to allow method chaining
invisible(self)
},
#' @description
#' Apply the pipeline to a \code{data.frame}.
#' @param data \code{data.frame}.
#'
#' A grouped \code{data.frame} will raise a warning and the grouping will be ignored.
#' Use the \code{`group_cols`} argument in the \code{`add_transformation`} method to
#' specify how \code{`data`} should be grouped for each transformation.
#' @param verbose Whether to print the progress.
#' @return Transformed version of \code{`data`}.
apply = function(data, verbose = FALSE) {
# Warn if `data` is grouped and that grouping would be ignored
if (isTRUE(private$warn_grouped_input) && dplyr::is_grouped_df(data)){
warning(
"Ignoring groups in `data`. Only the `group_cols` grouping specifications are used."
)
}
# Print start of process
if (isTRUE(verbose)){
cat(paste0("\n", paste0(rep("-", 54), collapse = "")))
cat("\nApplying transformations.")
start_time <- proc.time()
}
# Apply each of the transformations sequentially
# TODO benchmark if another looping function with env assignment
# is faster with e.g. 5-10 transformations
for (name in self$names){
# Print start of transformation
if (isTRUE(verbose)){
cat(paste0("\nStarting: ", name))
transf_start_time <- proc.time()
}
# Apply transformation
data <- self$transformations[[name]]$apply(data = data)
# Print running time of the transformation
if (isTRUE(verbose)){
transf_end_time <- proc.time()
cat(paste0(
"\nEnded: ", name,
" | Took ", format_running_time_(transf_start_time, transf_end_time), "."
))
}
}
# Print end of finish with running time etc.
if (isTRUE(verbose)){
end_time <- proc.time()
cat(paste0(
"\nFinished applying transformations. Total time: ",
format_running_time_(start_time, end_time), "."
))
cat(paste0("\n", paste0(rep("-", 54), collapse = ""), "\n"))
}
# Return transformed data frame
data
},
#' @description
#' Print an overview of the pipeline.
#' @param ... further arguments passed to or from other methods.
#' @return The pipeline. To allow chaining of methods.
print = function(...) {
cat("Pipeline: \n")
for (name in self$names){
print(self$transformations[[name]], indent = 2, show_class = FALSE)
}
# Return object invisibly to allow method chaining
invisible(self)
}
),
private = list(
# Groupings in the input data will be ignored
# We instead rely on `group_cols` specifications
warn_grouped_input = TRUE
)
)
## .................. #< 79fc422a3b7326a8c27f242052fbd9c1 ># ..................
## Transformation class ####
# A transformation to be applied with
# different argument values to each group
#' @title Transformation
#' @description
#' \Sexpr[results=rd, stage=render]{lifecycle::badge("experimental")}
#'
#' Container for the type of transformation used in
#' \code{\link[rearrr:Pipeline]{Pipeline}}.
#'
#' \strong{Note}: For internal use.
#' @author Ludvig Renbo Olsen, \email{r-pkgs@@ludvigolsen.dk}
#' @export
#' @family transformation classes
Transformation <- R6::R6Class(
"Transformation",
public = list(
#' @field name Name of transformation.
name = NULL,
#' @field fn Transformation function.
fn = NULL,
#' @field args \code{list} of arguments for \code{`fn`}.
args = NULL,
#' @field group_cols Names of columns to group \code{data.frame}
#' by before applying \code{`fn`}.
#'
#' When \code{`NULL`}, the \code{data.frame} is not grouped.
group_cols = NULL,
#' @description
#' Initialize transformation.
#' @param fn Transformation function.
#' @param args \code{list} of arguments for \code{`fn`}.
#' @param name Name of transformation.
#' @param group_cols Names of columns to group \code{data.frame}
#' by before applying \code{`fn`}.
#'
#' When \code{`NULL`}, the \code{data.frame} is not grouped.
initialize = function(fn, args, name = NULL, group_cols = NULL) {
# Check arguments
private$check_initialize_args(fn = fn, args = args, name = name,
group_cols = group_cols)
# Assign to object
self$fn <- fn
self$args <- args
self$name <- name
# In this type of Transformation,
# we need to specify potential group columns
self$group_cols <- group_cols
},
#' @description
#' Apply the transformation to a \code{data.frame}.
#' @param data \code{data.frame}.
#'
#' A grouped \code{data.frame} will first be ungrouped. If \code{`group_cols`} is specified,
#' it will then be grouped by those columns.
#' @return Transformed version of \code{`data`}.
apply = function(data) {
# Ungroup data frame if specified
if (isTRUE(private$ungroup_input)){
data <- dplyr::ungroup(data)
}
# Group data frame if given some group column names
if (!is.null(self$group_cols)){
data <- dplyr::group_by(data, !!!rlang::syms(self$group_cols))
}
# Apply transformation to each group
run_by_group(
data = data,
fn = private$apply_to_group,
restore_grouping = dplyr::is_grouped_df(data)
)
},
#' @description
#' Print an overview of the transformation.
#' @param ... further arguments passed to or from other methods.
#' @param indent How many spaces to indent when printing.
#' @param show_class Whether to print the transformation class name.
#' @return The pipeline. To allow chaining of methods.
print = function(...,
indent = 0,
show_class = TRUE) {
# Create string with indentation spaces
indentation_str <- paste0(rep(" ", indent), collapse = "")
# Extract name
name <- self$name
if (is.null(name)) {
name <- "NoName"
}
# Print transformation class name
if (isTRUE(show_class)){
cat(indentation_str, "Transformation: \n")
indentation_str <- paste0(rep(" ", indent + 2), collapse = "")
}
# Print name of transformation
cat(indentation_str, name, "\n", sep = "")
# Print arguments
args_str <- private$args_to_string(self$args)
cat(indentation_str, " Arguments: ", args_str, "\n", sep = "")
# Print group columns
if (!is.null(self$group_cols)){
group_cols_str <- private$args_to_string(self$group_cols)
cat(indentation_str, " Grouping columns: ", group_cols_str, "\n", sep = "")
}
# Return object invisibly to allow method chaining
invisible(self)
}
),
private = list(
# We don't use existing groupings in the given data frame
ungroup_input = TRUE,
apply_to_group = function(data, group_id) {
if (dplyr::is_grouped_df(data)) {
stop("`data` was grouped. Pass the group subset instead.")
}
# Prepare arguments and apply function
args <- c(list(data = data), self$args)
do.call(self$fn, args, envir = parent.frame())
},
check_initialize_args = function(fn, args, name, group_cols){
assert_collection <- checkmate::makeAssertCollection()
checkmate::assert_function(fn, add = assert_collection)
checkmate::assert_list(args, names = "unique", add = assert_collection)
checkmate::assert_string(name, null.ok = TRUE, add = assert_collection)
checkmate::assert_character(group_cols, null.ok = TRUE, min.chars = 1,
any.missing = FALSE, add = assert_collection)
checkmate::reportAssertions(assert_collection)
},
# Convert list of arguments to single string for print methods
args_to_string = function(args, max_len = 20, rm_function_start = FALSE, rm_capitalized_l = FALSE) {
arg_names <- names(args)
arg_vals_strings <- lapply(
args,
clean_arg_str,
max_len = max_len,
rm_function_start = rm_function_start,
rm_capitalized_l = rm_capitalized_l
)
if (length(arg_names) == 0){
string <- paste0(arg_vals_strings, collapse = ", ")
} else {
string <- paste0(arg_names, "=", arg_vals_strings, collapse = ", ")
}
string
}
)
)
## .................. #< b86e070947c65180cf4882eb350888c6 ># ..................
## Utilities ####
format_running_time_ <- function(t_start, t_end, digits = 4, suffix = "s") {
t_total <- t_end[["elapsed"]] - t_start[["elapsed"]]
t_total <- round(t_total, digits = digits)
paste0(t_total, suffix)
}
# Collapse list of strings once
collapse_strings <- function(strings) {
if (length(strings) > 1) {
strings <- paste0(strings, collapse = "")
}
strings
}
# This is applied to each argument value separately
clean_arg_str <- function(string, max_len = 20, rm_function_start = FALSE, rm_capitalized_l = FALSE) {
# Deparse the strings and make sure split strings are collapsed
string <- collapse_strings(deparse(string))
# Reduce multiple consecutive whitespaces to a single whitespace
string <- gsub("[[:blank:]]+", " ", string)
# Remove trailing whitespaces
string <- trimws(string)
# Compress function string
string <- gsub("function[[:space:]]?\\([[:space:]]?", "function(", string)
string <- gsub("[[:space:]]?\\) \\{[[:space:]]*", "){", string)
string <- gsub("\\{[[:space:]]*", "{", string)
string <- gsub("[[:space:]]*\\}", "}", string)
if (isTRUE(rm_function_start)){
string <- gsub("function\\(\\)", "", string)
}
if (isTRUE(rm_capitalized_l)){
string <- gsub("L", "", string, fixed = TRUE)
}
# Shorten long strings
if (!is.null(max_len) && nchar(string) > max_len + 3) {
string <- substr(string, 1, max_len)
string <- paste0(string, "...")
}
string
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.