Nothing
# __________________ #< 837022fb68986f4274072896ad1f8be6 ># __________________
# Helpers ####
## .................. #< a309bc456a67722d30118ad015672baa ># ..................
## Is correctly positioned ####
is_correctly_positioned_ <-
function(data, col, position, what = "max") {
target_val <-
ifelse(what == "max", max(data[[col]]), min(data[[col]]))
data[[col]][[position]] == target_val
}
## .................. #< 8e6f910bb09315ea08d9791e6a30c6a7 ># ..................
## Swap values ####
swap_values_ <- function(vec, v1, v2) {
v1_ind <- vec[vec == v1]
v2_ind <- vec[vec == v2]
vec[v1_ind] <- v2
vec[v2_ind] <- v1
vec
}
## .................. #< f5bf1e543ca585628647e0f35453d42f ># ..................
## Base helpers ####
base_rename_ <-
function(data, before, after, warn_at_overwrite = FALSE) {
#
# Replaces name of column in data frame
#
# Check names
if (!is.character(before) || !is.character(after)) {
stop("'before' and 'after' must both be of type character.")
}
if (length(before) != 1 || length(before) != 1) {
stop("'before' and 'after' must both have length 1.")
}
if (before == after) {
message("'before' and 'after' were identical.")
return(data)
}
# If after is already a column in data
# remove it, so we don't have duplicate column names
if (after %in% colnames(data)) {
if (isTRUE(warn_at_overwrite)) {
warning("'after' already existed in 'data' and will be replaced.")
}
data[[after]] <- NULL
}
colnames(data)[names(data) == before] <- after
data
}
# Cols should be col names
base_select_ <- function(data, cols) {
tryCatch(
subset(data, select = cols),
error = function(e) {
if (grepl("is missing", e)) {
stop("base_select_() only work on data.frames.")
} else {
stop(paste0("base_select_() got error from subset(): ", e))
}
}
)
}
# Cols should be col names
base_deselect_ <- function(data, cols) {
if (!is.character(cols)) {
stop("cols must be names")
}
base_select_(data = data, cols = setdiff(names(data), cols))
}
## .................. #< ee7952f84c8fa1b183ab4a3b923b9903 ># ..................
## Position first ####
# Col should be col name
position_first_ <- function(data, col) {
if (is.numeric(col)) {
stop("'col' must be a name.")
}
# if(is.data.table(data)){
# return(data[, c(col, setdiff(names(data), col)), with = FALSE])
# }
base_select_(data = data, cols = c(col, setdiff(names(data), col)))
}
## .................. #< 4cb48e1b875b60049471d53ddceeb9dc ># ..................
## Insert row ####
# insertRow2 from https://stackoverflow.com/a/11587051/11832955
# Note: May not work with rownames!
insert_row_ <- function(data, new_row, after) {
data <- rbind(data, new_row)
data <- data[order(c(seq_len(nrow(data) - 1), after + 0.5)), ,
drop = FALSE
] # extra comma on purpose
row.names(data) <- NULL
data
}
## .................. #< 551180c531b6373a09823e43527a5c9d ># ..................
## Create tmp column name ####
# Add underscore until var name is unique
# arg disallowed can add extra things not to be named as
create_tmp_var <- function(data,
tmp_var = ".tmp_index_",
disallowed = NULL) {
# Extract the disallowed names
disallowed <- c(colnames(data), disallowed)
while (tmp_var %in% disallowed) {
tmp_var <- paste0(tmp_var, "_")
}
tmp_var
}
## .................. #< 3abfddeb36aa8dd83aab2d14c948a5fc ># ..................
## Create tmp unique value ####
# Add underscore until value is unique in the vector
# arg disallowed can add extra things not to be named as
create_tmp_val <- function(v,
tmp_val = ".tmp_val_",
disallowed = NULL) {
# Extract the disallowed names
disallowed <- c(unique(v), disallowed)
while (tmp_val %in% disallowed) {
tmp_val <- paste0(tmp_val, "_")
}
tmp_val
}
## .................. #< e0e05520eb560f3113652056facb7c8f ># ..................
## Is between ####
is_between_ <- function(x, a, b) {
# Checks if x is between a and b
x > a & x < b
}
## .................. #< db2cfc96e473219a35e8193b8357e682 ># ..................
## Not in ####
`%ni%` <- function(x, table) {
!(x %in% table)
}
## .................. #< 56db9e9358659a166e5bb2c662733e42 ># ..................
## Is zero vector ####
is_zero_vector_ <- function(v){
is.numeric(v) && all(v == 0)
}
## .................. #< 76a9352ccecc01349336a01b9a870511 ># ..................
## Greedy windows ####
greedy_windows_ <- function(data,
window_size,
factor_name = ".window") {
# Check arguments ####
assert_collection <- checkmate::makeAssertCollection()
checkmate::assert_data_frame(data,
min.cols = 1,
min.rows = 1,
add = assert_collection
)
checkmate::assert_number(window_size, lower = 1, add = assert_collection)
checkmate::assert_string(factor_name, min.chars = 1, add = assert_collection)
checkmate::reportAssertions(assert_collection)
# End of argument checks ####
size <- nrow(data)
num_windows <- ceiling(size / window_size)
windows <- rep(seq_len(num_windows), each = window_size)
data[[factor_name]] <- factor(head(windows, size))
data
}
## .................. #< 160fb806ead6df367e8121d143d788b6 ># ..................
## Windows n-distributed ####
ndist_windows_ <- function(data,
num_windows,
factor_name = ".window") {
# Check arguments ####
assert_collection <- checkmate::makeAssertCollection()
checkmate::assert_data_frame(data, add = assert_collection)
checkmate::assert_number(num_windows, lower = 1, add = assert_collection)
checkmate::assert_string(factor_name, min.chars = 1, add = assert_collection)
checkmate::reportAssertions(assert_collection)
if (num_windows > nrow(data)) {
assert_collection$push("'num_windows' was greater than the number of rows in 'data'.")
}
checkmate::reportAssertions(assert_collection)
# End of argument checks ####
data[[factor_name]] <-
n_dist_group_factor_(v_size = nrow(data), n_windows = num_windows)
data
}
n_dist_group_factor_ <- function(v_size, n_windows) {
# Simplified version of groupdata2 version
#
# Takes a vector and a number of windows to create
# Distributes excess elements somewhat evenly across windows
# Returns grouping factor
#
# Create grouping factor with distributed excess elements
grouping_factor <- ceiling(seq_len(v_size) / (v_size / n_windows))
# Sometimes a value of e.g. 7.0000.. is rounded up to 8
# in the above ceiling(). This means that we get 8 groups
# instead of the specified 7. In this case we replace
# the extra "8" with 7.
# --> This should be tested! <--
# If there are too many groups
if (max(grouping_factor) > n_windows) {
# Get the largest number in grouping factor
max_value <- max(grouping_factor)
# Get the size of the last group
last_group_size <-
length(grouping_factor[grouping_factor == max_value])
# If there is only one group too much and it only contains one element
# put this element in the second last group instead
if (max_value - 1 == n_windows && last_group_size == 1) {
# Replace the level of the factor containing the max_value
# with the value of the second last group instead (max_value - 1)
grouping_factor[grouping_factor == max_value] <- max_value - 1
# Else, stop the script as something has gone wrong
# and I need to know about it!
} else {
stop(
paste(
"Grouping factor contains too many groups! ",
max_value,
" groups in total with ",
last_group_size,
" elements in last group.",
sep = ""
)
)
}
}
factor(grouping_factor)
}
## .................. #< 60cfc78f594e5611a6eaaf34a2b212ae ># ..................
## Range outliers ####
split_range_outliers_ <- function(data,
col,
.min = NULL,
.max = NULL) {
# Check arguments ####
assert_collection <- checkmate::makeAssertCollection()
checkmate::assert_data_frame(data, add = assert_collection)
checkmate::assert_string(col, min.chars = 1, add = assert_collection)
checkmate::assert_number(.min, finite = TRUE, null.ok = TRUE, add = assert_collection)
checkmate::assert_number(.max, finite = TRUE, null.ok = TRUE, add = assert_collection)
checkmate::reportAssertions(assert_collection)
if (col %ni% colnames(data)){
assert_collection$push("'col' not found in 'data'.")
}
if (!is.null(.min) && !is.null(.max) && .min >= .max){
assert_collection$push("'.min' must be smaller than .max or 'NULL'.")
}
checkmate::reportAssertions(assert_collection)
# End of argument checks ####
rownames(data) <- NULL
# Create min/max masks (bools)
if (!is.null(.min)){
min_mask <- data[[col]] < .min
} else {
min_mask <- rep(0, nrow(data))
}
if (!is.null(.max)){
max_mask <- data[[col]] > .max
} else {
max_mask <- rep(0, nrow(data))
}
mask <- (min_mask + max_mask) == 1
outlier_inds <- seq_len(nrow(data))[mask]
if (length(outlier_inds) == 0){
outliers <- data[-seq_len(nrow(data)), ]
} else {
outliers <- data[outlier_inds, ]
data <- data[-outlier_inds, ]
}
list("data" = data,
"outliers" = outliers)
}
add_na_column_ <- function(data, col, val = NA_real_, as_list = FALSE, overwrite = FALSE){
if (isTRUE(as_list))
to_add <- rep(list(val), nrow(data))
else
to_add <- rep(val, nrow(data))
# Add column
add_info_col_(
data = data,
nm = col,
content = to_add,
overwrite = overwrite
)
}
## .................. #< 385b0b542a569ee6b885fc0849f644a6 ># ..................
## Input helpers ####
check_unique_colnames_ <- function(...) {
nms <- list(...) %>%
purrr::compact() %>%
unlist(recursive = TRUE, use.names = FALSE)
if (is.null(nms))
return(invisible(NULL))
# Get names as string
first_nms <- deparse(head(nms, 4))
# Remove "list()"
nms_str <- paste0(substring(first_nms, first = 3, nchar(first_nms)-1))
# Add ... if more than 4 elements
if (length(nms) > 4){
nms_str <- paste0(nms_str, ", ...")
}
# Check that names are unique
checkmate::assert_character(
nms,
unique = TRUE,
any.missing = FALSE,
.var.name = paste0(
"specified column names (",
nms_str, ")"
)
)
}
# check_overwrite(data = data, nm = "?", overwrite = overwrite)
check_overwrite_ <- function(data, nm, overwrite){
checkmate::assert_flag(overwrite)
if (!is.data.frame(data) || isTRUE(overwrite)){
return(invisible(NULL))
}
assert_collection <- checkmate::makeAssertCollection()
checkmate::assert_string(nm, min.chars = 1, null.ok = TRUE, add = assert_collection)
# Check if we would overwrite an existing column
if (!is.null(nm) && nm %in% colnames(data)){
assert_collection$push(
paste0("The column '", nm,
"' already exists and 'overwrite' is disabled.")
)
}
checkmate::reportAssertions(assert_collection)
}
## .................. #< 4405b38854cdc7cc63ac70477e1c9953 ># ..................
## Output helpers ####
# When 1 coordinate but multiple names, it recycles the coordinate
list_coordinates_ <- function(coordinates, names) {
checkmate::assert(
checkmate::check_numeric(coordinates),
checkmate::check_list(coordinates, types = "numeric")
)
checkmate::assert_character(names, any.missing = FALSE,
min.chars = 1, unique = TRUE)
if (length(coordinates) == 1 && length(names) > 1) {
coordinates <- rep(coordinates, length(names))
}
if (length(coordinates) > 1 &&
length(coordinates) != length(names)) {
stop(
paste0(
"When 'coordinates' has length >1, 'coordinates' and 'names'",
" must have same length."
)
)
}
list(setNames(coordinates, names))
}
# Paste a list column where each element is c(x = 1, y = d)
paste_coordinates_column_ <- function(data, col, na.rm=FALSE) {
str_name <- paste0(col, "_str")
# Remove NAs
if (isTRUE(na.rm)){
data[[col]] <- purrr::map(.x = data[[col]], .f = ~{
#purrr::map(.f = round(.x, digits = digits))
.x[!is.na(.x)]
})
}
# Paste coordinates
data[[str_name]] <- paste0(data[[col]])
# Whether to cut away "list(" or "c("
start_char <- ifelse(is.list(data[[col]][[1]]), 6, 3)
data[[str_name]] <- substr(data[[str_name]],
start = start_char,
stop = nchar(data[[str_name]]) - 1
)
# Remove whitespace
data[[str_name]] <- gsub("[[:blank:]]+", "", data[[str_name]])
data
}
paste_ranges_column_ <- function(data,
col,
include_min = TRUE,
include_max = TRUE) {
str_name <- paste0(col, "_str")
outer_is_list <- is.list(data[[col]][[1]])
data[[str_name]] <- data[[col]] %>%
purrr::map(.f = ~ {
purrr::map(.x, .f = unname)
})
data[[str_name]] <- paste0(data[[str_name]])
data[[str_name]] <- substr(data[[str_name]],
start = ifelse(outer_is_list, 6, 3),
stop = nchar(data[[str_name]]) - 1
)
data[[str_name]] <- gsub("[[:blank:]]+", "", data[[str_name]])
data[[str_name]] <- gsub("c", "", data[[str_name]])
if (isTRUE(include_min)) {
data[[str_name]] <- gsub("\\(", "[", data[[str_name]])
}
if (isTRUE(include_max)) {
data[[str_name]] <- gsub("\\)", "]", data[[str_name]])
}
data
}
add_dimensions_ <- function(data,
new_vectors,
suffix = "",
overwrite = TRUE) {
# Check arguments ####
assert_collection <- checkmate::makeAssertCollection()
checkmate::assert_data_frame(data, add = assert_collection)
checkmate::assert_list(new_vectors, names = "unique",
types = c("numeric", "character", "factor"),
add = assert_collection)
checkmate::assert_string(suffix, add = assert_collection)
checkmate::assert_flag(overwrite, add = assert_collection)
checkmate::reportAssertions(assert_collection)
# End of argument checks ####
# Add suffix
names(new_vectors) <- paste0(names(new_vectors), suffix)
# Convert to data frame
new_data <- data.frame(new_vectors, stringsAsFactors = FALSE)
if (!isTRUE(overwrite) &&
length(intersect(colnames(new_data), colnames(data))) > 0) {
stop(
paste0(
"Adding these dimensions would overwrite existing columns: ",
paste0(intersect(colnames(new_data), colnames(data)), collapse = ", "),
"."
)
)
}
# If overwriting columns, delete in 'data' first
col_intersection <-
intersect(colnames(new_data), colnames(data))
if (length(col_intersection) > 0) {
data <- data[, colnames(data) %ni% col_intersection, drop = FALSE]
}
# Add to original dataframe
data <- dplyr::bind_cols(data, new_data)
data
}
add_info_col_ <- function(data, nm, content, overwrite = FALSE, check_overwrite = TRUE){
# Check arguments ####
assert_collection <- checkmate::makeAssertCollection()
checkmate::assert_data_frame(data, add = assert_collection)
checkmate::assert_string(nm, null.ok = TRUE, add = assert_collection)
checkmate::reportAssertions(assert_collection)
# End of argument checks ####
# When name is NULL, we don't add the column
if (is.null(nm)){
return(data)
}
if (isTRUE(check_overwrite)){
# Check if we will overwrite an existing column
check_overwrite_(data = data, nm = nm, overwrite = overwrite)
}
# Add column
data[[nm]] <- content
data
}
## .................. #< cb9942d54f8fa35406de031d066531dc ># ..................
## Conversions ####
calculate_swirl_degrees_ <- function(distances, radius) {
checkmate::assert_numeric(distances, lower = 0, any.missing = FALSE)
checkmate::assert_number(radius)
if (radius == 0) {
return(distances * 0)
}
(distances / (2 * radius) * 360) %% 360
}
## .................. #< 60cfc78f594e5611a6eaaf34a2b212ae ># ..................
## Package imports ####
#' @importFrom dplyr %>%
#' @importFrom rlang .data :=
#' @importFrom utils head tail
#' @importFrom stats quantile runif rnorm median setNames
#' @importFrom lifecycle deprecated deprecate_stop
NULL
# R CMD check NOTE handling
if (getRversion() >= "2.15.1") {
utils::globalVariables(c("."))
}
get_pkg_version <- function(pkg_name){
vs <- unlist(utils::packageVersion(pkg_name))
list("major" = vs[[1]],
"minor" = vs[[2]],
"patch" = vs[[3]],
"dev" = ifelse(length(vs) > 3, vs[[4]], integer(0)))
}
is_checkmate_v2_1 <- function(){
v <- get_pkg_version("checkmate")
v$major == 2 && v$minor >= 1
}
is_purr_v1 <- function(){
v <- get_pkg_version("purrr")
v$major >= 1
}
if (is_purr_v1()) {purrr_error <- c("purrr_error_indexed", "rlang_error")
} else {purrr_error <- "simpleError"}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.