Nothing
## data_fun.R | ds4psy
## hn | uni.kn | 2023 09 12
## ------------------------
## Functions for creating and manipulating data.
## (1) Generate random data: ----------
# Random binary values: Flip a 2-element-set/coin n times: ------
random_bin_value <- function(x = c(0, 1), n = 1, replace = TRUE) {
if (length(x) != 2) {
stop("random_bin_value: x must be binary.")
}
sample(x = x, size = n, replace = replace)
} # random_bin_value().
## Check:
# random_bin_value(n = 10)
# random_bin_value(c(TRUE, FALSE), 5)
# random_bin_value(x = c("m", "f"), n = 100)
# coin: Flip a fair coin n times (with events): ------
#' Flip a fair coin (with 2 sides "H" and "T") n times.
#'
#' \code{coin} generates a sequence of events that
#' represent the results of flipping a fair coin \code{n} times.
#'
#' By default, the 2 possible \code{events} for each flip
#' are "H" (for "heads") and "T" (for "tails").
#'
#' @param n Number of coin flips.
#' Default: \code{n = 1}.
#'
#' @param events Possible outcomes (as a vector).
#' Default: \code{events = c("H", "T")}.
#'
#' @examples
#' # Basics:
#' coin()
#' table(coin(n = 100))
#' table(coin(n = 100, events = LETTERS[1:3]))
#'
#' # Note an oddity:
#' coin(10, events = 8:9) # works as expected, but
#' coin(10, events = 9:9) # odd: see sample() for an explanation.
#'
#' # Limits:
#' coin(2:3)
#' coin(NA)
#' coin(0)
#' coin(1/2)
#' coin(3, events = "X")
#' coin(3, events = NA)
#' coin(NULL, NULL)
#'
#' @family sampling functions
#'
#' @export
coin <- function(n = 1, events = c("H", "T")){
# check inputs:
if (is.null(n)){
message("coin: n must not be NULL. Using n = 1:")
n <- 1
}
if (is.null(events)){
message("coin: events must not be NULL. Using events = c('H', 'T':)")
events <- c("H", "T")
}
if (length(n) > 1) { # n is a vector:
message(paste0("coin: n must be a scalar. Using n[1] = ", n[1], ":"))
n <- n[1]
}
if ( (length(n) == 1) && ( is.na(n) || !is.numeric(n) || !is_wholenumber(n) || (n < 1) ) ) {
message("coin: n must be a positive integer. Using n = 1:")
n <- 1
}
# sample n outcomes:
sample(x = events, size = n, replace = TRUE)
} # coin().
# ## Check:
# # Basics:
# coin()
# table(coin(n = 1000))
#
# # Limits:
# coin(2:3)
# coin(NA)
# coin("_")
# coin(0)
# coin(1/2)
# coin(10, NA)
# coin(NULL, NULL)
## Note:
# table(coin(1000, 9:9)) # does NOT draw only 9...
# Random values from a normal distribution: ------
# r_n <- rnorm(n = 10000, mean = 100, sd = 10)
# table(round(r_n, 0))
# hist(r_n)
# Random values from a uniform distribution: ------
# r_u <- runif(n = 10000, min = .500, max = 6.499)
# table(round(r_u, 0))
# hist(r_u)
# Random draws from a sample: ------
# r_s <- sample(x = 1:10, size = 1000, replace = TRUE)
# table(r_s)
# hist(r_s, right = TRUE)
# hist(r_s, right = FALSE)
# Sample random characters (from given characters): ------
#' Draw a sample of n random characters
#' (from given characters).
#'
#' \code{sample_char} draws a sample of
#' \code{n} random characters from a given range of characters.
#'
#' By default, \code{sample_char} draws \code{n = 1}
#' a random alphabetic character from
#' \code{x_char = c(letters, LETTERS)}.
#'
#' As with \code{sample()}, the sample size \code{n} must not exceed
#' the number of available characters \code{nchar(x_char)},
#' unless \code{replace = TRUE} (i.e., sampling with replacement).
#'
#' @param x_char Population of characters to sample from.
#' Default: \code{x_char = c(letters, LETTERS)}.
#'
#' @param n Number of characters to draw.
#' Default: \code{n = 1}.
#'
#' @param replace Boolean: Sample with replacement?
#' Default: \code{replace = FALSE}.
#'
#' @param ... Other arguments.
#' (Use for specifying \code{prob}, as passed to \code{sample()}.)
#'
#' @return A text string (scalar character vector).
#'
#' @examples
#' sample_char() # default
#' sample_char(n = 10)
#' sample_char(x_char = "abc", n = 10, replace = TRUE)
#' sample_char(x_char = c("x y", "6 9"), n = 6, replace = FALSE)
#' sample_char(x_char = c("x y", "6 9"), n = 20, replace = TRUE)
#'
#' # Biased sampling:
#' sample_char(x_char = "abc", n = 20, replace = TRUE,
#' prob = c(3/6, 2/6, 1/6))
#'
#' # Note: By default, n must not exceed nchar(x_char):
#' sample_char(n = 52, replace = FALSE) # works, but
#' # sample_char(n = 53, replace = FALSE) # would yield ERROR;
#' sample_char(n = 53, replace = TRUE) # works again.
#'
#' @family sampling functions
#'
#' @export
sample_char <- function(x_char = c(letters, LETTERS), n = 1, replace = FALSE, ...){
out <- NA # initialize
# Checks:
# x_char is a vector of characters:
if (!is.character(x_char)){
message("sample_char: x_char must be of type character.")
}
# # x_char is not "":
# if ((all(is.character(x_char))) & (sum(nchar(x_char) == 0))){
# message("sample_char: x_char must contain at least 1 character.")
# }
# Split x_char into a vector of individual characters:
char_v <- unlist(strsplit(x_char, split = ""))
# Check: Verify that is something to sample from:
if (length(char_v) == 0){
message("sample_char: x_char must contain at least 1 character.")
}
# Use sample():
sample_v <- sample(x = char_v, size = n, replace = replace, ...)
# Paste into single char:
out <- paste0(sample_v, collapse = "")
return(out)
} # sample_char().
# ## Check:
# sample_char()
# sample_char(n = 10)
# sample_char(x_char = "abc", n = 10, replace = TRUE)
# sample_char(x_char = c("x y", "6 9"), n = 6, replace = FALSE)
# sample_char(x_char = c("x y", "6 9"), n = 20, replace = TRUE)
#
# # Biased sampling:
# sample_char(x_char = "abc", n = 20, replace = TRUE, prob = c(3/6, 2/6, 1/6))
#
# # Note: By default, n must not exceed nchar(x_char):
# sample_char(n = 52, replace = FALSE) # works, but
# # sample_char(n = 53, replace = FALSE) # yields ERROR.
# sample_char(n = 53, replace = TRUE) # works again
# ## Errors:
#
# sample_char(x_char = 1)
# sample_char(x_char = NA)
# sample_char(x_char = NULL)
#
# sample_char(x_char = "")
# sample_char(x_char = c("", ""))
# sample_char(x_char = c("", "", " "))
# ## R meta-characters:
# metas <- c(". \ | ( ) [ { ^ $ * + ?")
# nomta <- c(", : / < > ] } & % # - ! =")
#
# # without spaces:
# mcv <- unlist(strsplit(metas, split = " "))
# mcv # Note: \ is now ""!
# nmv <- unlist(strsplit(nomta, split = " "))
# nmv
#
# # Apply:
# sample_char(x_char = c(mcv, nmv), n = 24, replace = FALSE) # unique items
# sample_char(x_char = c(mcv, nmv), n = 50, replace = TRUE) # repeated items
# Sample random dates (from a given range): ------
#' Draw a sample of n random dates (from a given range).
#'
#' \code{sample_date} draws a sample of
#' \code{n} random dates from a given range.
#'
#' By default, \code{sample_date} draws \code{n = 1}
#' random date (as a "Date" object) in the range
#' \code{from = "1970-01-01"}
#' \code{to = Sys.Date()} (current date).
#'
#' Both \code{from} and \code{to} currently
#' need to be scalars (i.e., with a length of 1).
#'
#' @param from Earliest date (as "Date" or string).
#' Default: \code{from = "1970-01-01"}
#' (as a scalar).
#'
#' @param to Latest date (as "Date" or string).
#' Default: \code{to = Sys.Date()}
#' (as a scalar).
#'
#' @param size Size of date samples to draw.
#' Default: \code{size = 1}.
#'
#' @param ... Other arguments.
#' (Use for specifying \code{replace}, as passed to \code{sample()}.)
#'
#' @return A vector of class "Date".
#'
#' @examples
#' sample_date()
#' sort(sample_date(size = 10))
#' sort(sample_date(from = "2020-02-28", to = "2020-03-01",
#' size = 10, replace = TRUE)) # 2020 is a leap year
#'
#' # Note: Oddity with sample():
#' sort(sample_date(from = "2020-01-01", to = "2020-01-01", size = 10, replace = TRUE)) # range of 0!
#' # see sample(9:9, size = 10, replace = TRUE)
#'
#' @family sampling functions
#'
#' @export
sample_date <- function(from = "1970-01-01", to = Sys.Date(), size = 1, ...){
# 0. Initialize:
dt <- rep(NA, size)
# 1. Handle inputs:
if (!is_Date(from)){
# message('sample_date: Aiming to parse "from" as "Date".')
from <- date_from_noDate(from)
}
if (!is_Date(to)){
# message('sample_date: Aiming to parse "to" as "Date".')
to <- date_from_noDate(to)
}
# 2. Main: Use sample()
# set.seed(1984) # for reproducible randomness
dt <- as.Date(sample(as.numeric(from):as.numeric(to), size = size, ...), origin = '1970-01-01')
# 3. Output:
return(dt)
} # sample_date().
# ## Check:
# sample_date()
# sort(sample_date(size = 10))
# sort(sample_date(from = "2020-02-28", to = "2020-03-01", size = 10, replace = TRUE)) # 2020 is a leap year
#
# # with vectors:
# (f <- as.Date(c("1970-01-01", "1980-01-01", "1990-01-01")))
# (t <- as.Date(c("1979-12-31", "1989-12-31", "1999-12-31")))
# sample_date(f, t, 10) # only uses 1st elements
#
# ft <- data.frame(f, t)
# apply(ft, MARGIN = 1, FUN = function(from, to) sample(x = from:to, size = 1))
# # ToDo: Vectorized version of sample_date().
#
# # Note: Oddity with sample():
# sort(sample_date(from = "2020-01-01", to = "2020-01-01", size = 10, replace = TRUE)) # range of 0!
# # see sample(9:9, size = 10, replace = TRUE)
# Sample random times (from a given range): ------
#' Draw a sample of n random times (from a given range).
#'
#' \code{sample_time} draws a sample of
#' \code{n} random times from a given range.
#'
#' By default, \code{sample_time} draws \code{n = 1}
#' random calendar time (as a "POSIXct" object) in the range
#' \code{from = "1970-01-01 00:00:00"}
#' \code{to = Sys.time()} (current time).
#'
#' Both \code{from} and \code{to} currently
#' need to be scalars (i.e., with a length of 1).
#'
#' If \code{as_POSIXct = FALSE}, a local time ("POSIXlt") object is returned
#' (as a list).
#'
#' The \code{tz} argument allows specifying time zones
#' (see \code{Sys.timezone()} for current setting
#' and \code{OlsonNames()} for options.)
#'
#' @param from Earliest date-time (as string).
#' Default: \code{from = "1970-01-01 00:00:00"}
#' (as a scalar).
#'
#' @param to Latest date-time (as string).
#' Default: \code{to = Sys.time()}
#' (as a scalar).
#'
#' @param size Size of time samples to draw.
#' Default: \code{size = 1}.
#'
#' @param as_POSIXct Boolean: Return calendar time ("POSIXct") object?
#' Default: \code{as_POSIXct = TRUE}.
#' If \code{as_POSIXct = FALSE}, a local time ("POSIXlt") object is returned
#' (as a list).
#'
#' @param tz Time zone.
#' Default: \code{tz = ""} (i.e., current system time zone,
#' see \code{Sys.timezone()}).
#' Use \code{tz = "UTC"} for Universal Time, Coordinated.
#'
#' @param ... Other arguments.
#' (Use for specifying \code{replace}, as passed to \code{sample()}.)
#'
#' @return A vector of class "POSIXct" or "POSIXlt".
#'
#' @examples
#' # Basics:
#' sample_time()
#' sample_time(size = 10)
#'
#' # Specific ranges:
#' sort(sample_time(from = (Sys.time() - 60), size = 10)) # within last minute
#' sort(sample_time(from = (Sys.time() - 1 * 60 * 60), size = 10)) # within last hour
#' sort(sample_time(from = Sys.time(), to = (Sys.time() + 1 * 60 * 60),
#' size = 10, replace = FALSE)) # within next hour
#' sort(sample_time(from = "2020-12-31 00:00:00 CET", to = "2020-12-31 00:00:01 CET",
#' size = 10, replace = TRUE)) # within 1 sec range
#'
#' # Local time (POSIXlt) objects (as list):
#' (lt_sample <- sample_time(as_POSIXct = FALSE))
#' unlist(lt_sample)
#'
#' # Time zones:
#' sample_time(size = 3, tz = "UTC")
#' sample_time(size = 3, tz = "America/Los_Angeles")
#'
#' # Note: Oddity with sample():
#' sort(sample_time(from = "2020-12-31 00:00:00 CET", to = "2020-12-31 00:00:00 CET",
#' size = 10, replace = TRUE)) # range of 0!
#' # see sample(9:9, size = 10, replace = TRUE)
#'
#' @family sampling functions
#'
#' @export
sample_time <- function(from = "1970-01-01 00:00:00",
to = Sys.time(),
size = 1,
as_POSIXct = TRUE, tz = "",
...){
# 0. Initialize:
tv <- rep(NA, size)
lt1 <- rep(NA, size)
lt2 <- rep(NA, size)
# 1. Handle inputs:
if (!is_POSIXt(from)){
# message('sample_time: Aiming to parse "from" as "POSIXct".')
from <- time_from_noPOSIXt(from)
}
if (!is_POSIXt(to)){
# message('sample_time: Aiming to parse "to" as "POSIXct".')
to <- time_from_noPOSIXt(to)
}
# Convert into local times:
lt1 <- as.POSIXlt(from)
lt2 <- as.POSIXlt(to)
# 2. Main: Use sample()
# set.seed(1984) # for reproducible randomness
tv <- as.POSIXlt(sample(as.numeric(lt1):as.numeric(lt2), size = size, ...), origin = '1970-01-01')
# 3. Add time zone:
if (as_POSIXct) {
tv <- as.POSIXct(tv, tz = tz) # convert into POSIXct with tz
} else {
tv <- as.POSIXct(tv, tz = tz) # convert into POSIXct with tz
tv <- as.POSIXlt(tv, tz = tz) # re-convert into POSIXlt
}
# 4. Output:
return(tv)
} # sample_time().
# ## Check:
# # Basics:
# sample_time()
# sample_time(size = 10)
#
# # Specific ranges:
# sort(sample_time(from = (Sys.time() - 60), size = 10)) # within the last minute
# sort(sample_time(from = (Sys.time() - 2), size = 10, replace = TRUE)) # with duplicates
# sort(sample_time(from = (Sys.time() - 1 * 60 * 60), size = 10)) # within the last hour
# sort(sample_time(from = Sys.time(), to = (Sys.time() + 1 * 60 * 60), size = 10)) # within next hour
# sort(sample_time(from = "2020-01-01 00:00:00 CET", to = "2020-01-01 00:00:01 CET",
# size = 10, replace = TRUE)) # 1 sec range
#
# # Local time (POSIXlt) objects (as list):
# sample_time(as_POSIXct = FALSE)
# unlist(sample_time(as_POSIXct = FALSE))
#
# # Time zones:
# sample_time(size = 3, tz = "UTC")
# sample_time(size = 3, tz = "America/Los_Angeles")
#
# # Note: Oddity with sample():
# sort(sample_time(from = "2020-01-01 00:00:00 CET", to = "2020-01-01 00:00:00 CET",
# size = 10, replace = TRUE)) # range of 0!
# # see sample(9:9, size = 10, replace = TRUE)
## ToDo: Sampling normally distributed times:
# now <- Sys.time()
# hist(as.POSIXlt(now) + rnorm(n = 1000, mean = 0, sd = 60*60), breaks = 10)
# t1 <- as.POSIXlt(Sys.time())
# t2 <- as.POSIXlt(Sys.time() + 1 * 60 * 60) # 1 hour later
# as.POSIXlt(sample(as.numeric(t1):as.numeric(t2), size = 10, replace = TRUE), origin = '1970-01-01')
# dice: n random draws from a sample (from events): ------
#' Throw a fair dice (with a given number of sides) n times.
#'
#' \code{dice} generates a sequence of events that
#' represent the results of throwing a fair dice
#' (with a given number of \code{events} or number of sides)
#' \code{n} times.
#'
#' By default, the 6 possible \code{events} for each throw of the dice
#' are the numbers from 1 to 6.
#'
#' @param n Number of dice throws.
#' Default: \code{n = 1}.
#'
#' @param events Events to draw from (or number of sides).
#' Default: \code{events = 1:6}.
#'
#' @examples
#' # Basics:
#' dice()
#' table(dice(10^4))
#'
#' # 5-sided dice:
#' dice(events = 1:5)
#' table(dice(100, events = 5))
#'
#' # Strange dice:
#' dice(5, events = 8:9)
#' table(dice(100, LETTERS[1:3]))
#'
#' # Note:
#' dice(10, 1)
#' table(dice(100, 2))
#'
#' # Note an oddity:
#' dice(10, events = 8:9) # works as expected, but
#' dice(10, events = 9:9) # odd: see sample() for an explanation.
#'
#' # Limits:
#' dice(NA)
#' dice(0)
#' dice(1/2)
#' dice(2:3)
#' dice(5, events = NA)
#' dice(5, events = 1/2)
#' dice(NULL, NULL)
#'
#' @family sampling functions
#'
#' @export
dice <- function(n = 1, events = 1:6){
# (a) verify n:
if (is.null(n)){
message("dice: n must not be NULL. Using n = 1:")
n <- 1
}
if (length(n) > 1) { # n is a vector:
message(paste0("dice: n must be scalar. Using n[1] = ", n[1], ":"))
n <- n[1]
}
# Verify that n is a numeric integer > 1:
if ((length(n) == 1) && (is.na(n) || !is.numeric(n) || !is_wholenumber(n) || (n < 1) ) ) {
message("dice: n must be a positive integer. Using n = 1:")
n <- 1
}
# (b) verify events:
if (is.null(events)){
message("dice: events must not be NULL. Using events = 1:6:")
events <- 1:6
}
if (length(events) > 1) { # events is a vector:
# message(paste0("dice: sides is a set. Using it:"))
set_of_events <- events
} else { # sides is a scalar: length(sides) <= 1:
# Verify that events is a numeric integer > 1:
if ( is.na(events) || !is.numeric(events) || !is_wholenumber(events) || (events < 1) ) {
message("dice: events must be an integer or a set. Using events = 6:")
events <- 6
}
set_of_events <- 1:events # default set
}
# Sample n times from set_of_events:
sample(x = set_of_events, size = n, replace = TRUE)
} # dice().
# ## Check:
# # Basics:
# dice()
# table(dice(10^4))
#
# # 5-sided dice:
# dice(sides = 5)
# table(dice(10^5, sides = 5))
#
# # Set dice:
# dice(5, sides = 2:3)
# dice(5, sides = c(2, 4, 6))
#
# # Note:
# dice(10, 1) # always yields 1
# table(dice(1000, 2))
#
# # Limits:
# dice(NA)
# dice(0)
# dice(1/2)
# dice(2:3)
# dice(10, sides = NA)
# dice(10, sides = 1/2)
#
# # Note an oddity:
# dice(n = 10, sides = 3:4) # works, but
# dice(n = 10, sides = 4:4) # odd: see sample() for an explanation.
# dice(NULL, NULL)
# dice_2: n non-random draws from a sample (from 1 to sides): ------
#' Throw a questionable dice (with a given number of sides) n times.
#'
#' \code{dice_2} is a variant of \code{\link{dice}} that
#' generates a sequence of events that
#' represent the results of throwing a dice
#' (with a given number of \code{sides}) \code{n} times.
#'
#' Something is wrong with this dice.
#' Can you examine it and measure its problems
#' in a quantitative fashion?
#'
#' @param n Number of dice throws.
#' Default: \code{n = 1}.
#'
#' @param sides Number of sides.
#' Default: \code{sides = 6}.
#'
#' @examples
#' # Basics:
#' dice_2()
#' table(dice_2(100))
#'
#' # 10-sided dice:
#' dice_2(sides = 10)
#' table(dice_2(100, sides = 10))
#'
#' # Note:
#' dice_2(10, 1)
#' table(dice_2(5000, sides = 5))
#'
#' # Note an oddity:
#' dice_2(n = 10, sides = 8:9) # works, but
#' dice_2(n = 10, sides = 9:9) # odd: see sample() for an explanation.
#'
#' @family sampling functions
#'
#' @export
dice_2 <- function(n = 1, sides = 6){
# (a) verify n:
if (is.null(n)){
message("dice_2: n must not be NULL. Using n = 1:")
n <- 1
}
if (length(n) > 1) { # n is a vector:
message(paste0("dice_2: n must be scalar. Using n[1] = ", n[1], ":"))
n <- n[1]
}
# Verify that n is a numeric integer > 1:
if ((length(n) == 1) && (is.na(n) || !is.numeric(n) || !is_wholenumber(n) || (n < 1) ) ) {
message("dice_2: n must be a positive integer. Using n = 1:")
n <- 1
}
# (b) verify sides:
if (is.null(sides)){
message("dice_2: sides must not be NULL. Using sides = 6:")
sides <- 6
}
if (length(sides) > 1) { # sides is a vector:
# message(paste0("dice_2: sides is a set. Using it:"))
set_of_sides <- sides
} else { # sides is a scalar: length(sides) <= 1:
# Verify that sides is a numeric integer > 1:
if ( is.na(sides) || !is.numeric(sides) || !is_wholenumber(n) || (sides < 1) ) {
message("dice_2: sides must be an integer or a set. Using sides = 6:")
sides <- 6
}
set_of_sides <- 1:sides # default set
}
n_sides <- length(set_of_sides) # number of sides
## Weigh events by some probability density distribution:
# pfac <- # loading factor (0: fair, 1: always final side)
## Bias for 1 side:
ptru <- 1/n_sides # p-values of a fair dice
bias <- ptru * .075 # (additional) bias of 1 side
p_hi <- ptru + bias # higher p of biased side
p_lo <- ptru - (bias/(n_sides - 1)) # lower p of all other sides
pset <- c(rep(p_lo, (n_sides - 1)), p_hi) # p-values of all sides
sample(x = set_of_sides, size = n, replace = TRUE, prob = pset)
} # dice_2().
## Check:
# # Basics:
# dice_2()
# table(dice_2(10^5))
#
# # 10-sided dice:
# dice_2(sides = 10)
# table(dice_2(10^6, sides = 10))
#
# # Set dice:
# table(dice_2(300000, sides = c("A", "B", "C")))
#
# # Note:
# dice_2(10, 1)
# table(dice_2(2000, 2))
#
# # Note an oddity:
# dice_2(n = 10, sides = 3:4) # works, but
# dice_2(n = 10, sides = 4:4) # odd: see sample() for an explanation.
# Permutations: List ALL permutations of a vector/set x / permute a set/vector x: ------
# library(combinat)
#
# set <- c("a", "b", "c")
# pm <- combinat::permn(x = set)
# pm
# Recursive definition:
all_permutations <- function(x) {
# initialize: ----
out <- NA
n <- length(x)
if (n == 1) { # basic case: ----
out <- x
} else { # Use recursion: ----
out <- NULL # init/stopping case
for (i in 1:n) { # loop: ----
out <- rbind(out, cbind(x[i], all_permutations(x[-i])))
}
}
return(out)
} # all_permutations().
# # Check:
# all_permutations(246)
# all_permutations(1:3)
# all_permutations(c("A", "B", "b", "a"))
# Combinations: List ALL combinations of length n of a set x: ------
# # (a) Using utils::combn:
# m <- utils::combn(x = 1:4, m = 2)
# m
# is.matrix(m)
# t(m)
# is.vector(m) # if m == length(x)
all_combinations <- function(x, length){
# Prepare: ----
out <- NA # initialize
# Verify inputs:
if (all(is.na(x)) || is.na(length)){
return(NA)
}
if (length > length(x)){
message(paste0("all_combinations: length must not exceed length(x). Using length = ", length(x)))
length <- length(x)
}
# Main: Use utils::combn to obtain matrix: ----
m <- utils::combn(x = x, m = length)
if (is.vector(m)){
out <- m # return as is
} else if (is.matrix(m)){
out <- t(m) # transpose m into matrix of rows
}
# Output:
return(out)
} # all_combinations().
# # Check:
# all_combinations(x = c("a", "b", "c"), 2)
# all_combinations(x = 1:5, length = 2)
# all_combinations(x = 1:25, 2) # Note: 25 * 24 / 2 = 300 combinations.
# all_combinations(x = 1:3, length = 1)
# all_combinations(x = 1:3, length = 88)
# all_combinations(x = 1:3, length = NA)
# all_combinations(x = NA, length = 1)
# random_symbols: Get n vectors of random symbols (of length len) from some set x: -----
random_symbols <- function(x = letters, len = 1, n = 1, sep = "", replace = TRUE) {
stopifnot(is.numeric(n), n > 0) # check conditions
out <- rep(NA, n) # initialize vector
for (i in 1:n) { # loop 1: 1:n
i_th <- "" # initialize
# (a) Sample symbols in set x 1 element at a time: ----
# for (j in 1:len) { # loop 2: 1:len
#
# j_th <- sample(x = x, size = 1) # Note: replace ineffective for size 1
# i_th <- paste0(i_th, j_th, sep = sep)
#
# }
# (b) Sample len elements at once: ----
v_x <- as.vector(x) # turn x into vector
i_th <- sample(x = v_x, size = len, replace = replace)
i_th <- paste0(i_th, collapse = "") # collapse i_th again
out[i] <- i_th
}
return(out)
} # random_symbols().
## Check:
# random_symbols()
# random_symbols(len = 4, n = 10)
# random_symbols(len = 4, n = 10, replace = FALSE)
# random_symbols(x = as.character(0:4), len = 5, n = 5) # with replacement
# random_symbols(x = as.character(0:4), len = 5, n = 5, replace = FALSE) # w/o replacement
## Goal: Adding a random amount (number or proportion) of NA or other values to a vector:
# add_NAs: Adding NA/missing values to a vector v of data: -----
## A function to replace a random amount (a proportion <= 1 OR absolute number > 1)
## of vector v's elements by NA values:
add_NAs <- function(v, amount){
stopifnot((is.vector(v)) & (amount >= 0) & (amount <= length(v)))
out <- v
n <- length(v)
amount_2 <- ifelse(amount < 1, round(n * amount, 0), amount) # turn amount prop into n
out[sample(x = 1:n, size = amount_2, replace = FALSE)] <- NA
return(out)
} # add_NAs().
# Check:
# add_NAs(1:10, 0)
# add_NAs(1:10, 5)
# add_NAs(1:10, .5)
# add_NAs(letters[1:10], 5)
# add_whats: Adding some element(s) what to a vector v of data: -----
# Goal: Replace a random amount of vector v elements by what.
# [A generalization of add_NAs().]
add_whats <- function(v, amount, what = NA){
# Prepare: ----
# Verify inputs:
stopifnot((is.vector(v)) & (amount >= 0) & (amount <= length(v)))
# Key parameters:
out <- v
n <- length(v)
amount_2 <- ifelse(amount < 1, round(n * amount, 0), amount) # turn amount prop into n
# Main: ----
# Replace random sample of elements by what:
ix_replace <- sample(x = 1:n, size = amount_2, replace = FALSE)
out[ix_replace] <- what
# Output: ----
return(out)
} # add_whats().
# # Check:
# add_whats(1:10, 5) # default: what = NA
# add_whats(1:10, 5, what = 99)
# add_whats(1:10, .5, what = TRUE) # Note: interpreted numerically!
# add_whats(1:10, .5, what = "ABC") # Note: v coerced into character!
## (2) Make tables for plots: ----------
# make_tb: Create (n x n) table tb for plots: ------
make_tb <- function(n = NA, rseed = NA){
tb <- NA # initialize
# Robustness:
if (is.na(rseed)) {
rseed <- sample(1:9999, size = 1, replace = TRUE) # random rseed
}
if (is.na(n)) {
n <- sample(1:12, size = 1, replace = TRUE) # random n
}
# Parameters:
n_x <- n
n_y <- n
N <- (n_x * n_y)
set.seed(seed = rseed) # for reproducible randomness
# Vectors:
# (a) sorted:
v_sort <- 1:N # Tile: top_left = seeblau, bottom_right = black | Polar: outer = seeblau, center = black.
# v_sort <- rev(1:N) # Tile: top_left = black, bottom_right = seeblau | Polar: outer = black, center = seeblau.
# Colors of text labels:
col_sort <- rep("white", N) # default
lim_black <- .25 # threshold to switch from "white" to "black" labels
col_sort[(v_sort > (lim_black * N)) &
(v_sort <= ((1 - lim_black) * N))] <- "black" # switch to "black" in mid of range
# table(col_sort)
# (b) random:
# v_rand <- runif(n = N, 0, 1)
rand_ord <- sample(v_sort, N) # random permutation of v_sort
v_rand <- rand_ord # random permutation of v_sort
col_rand <- col_sort[rand_ord] # corresponding colors
# x and y vectors:
x_vec <- rep(1:n_y, times = n_x)
y_vec <- rep(n_x:1, each = n_y)
# # (a) as tibble:
# tb <- tibble::tibble(x = x_vec,
# y = y_vec,
# sort = v_sort,
# rand = v_rand,
# col_sort = col_sort,
# col_rand = col_rand)
# (b) as data frame:
tb <- data.frame(x = x_vec,
y = y_vec,
sort = v_sort,
rand = v_rand,
col_sort = col_sort,
col_rand = col_rand,
stringsAsFactors = FALSE)
return(tb)
} # make_tb().
## Check:
# make_tb(n = 3)
# make_tb(n = 5, rseed = 1) # check rseed
# make_tb(n = 5, rseed = 1)
# make_tbs: Create simpler (1 x n) table tbs for plots: ------
make_tbs <- function(n = NA, rseed = NA){
tbs <- NA # initialize
# Robustness:
if (is.na(rseed)) {
rseed <- sample(1:9999, size = 1, replace = TRUE) # random rseed
}
if (is.na(n)) {
n <- sample(1:12, size = 1, replace = TRUE) # random n
}
# Parameters:
n_x <- n
n_y <- 1 # only 1 column/row
N <- (n_x * n_y)
set.seed(seed = rseed) # for reproducible randomness
# Vectors:
# (a) sorted:
v_sort <- 1:N # Tile: top_left = seeblau, bottom_right = black | Polar: outer = seeblau, center = black.
# v_sort <- rev(1:N) # Tile: top_left = black, bottom_right = seeblau | Polar: outer = black, center = seeblau.
# Colors of text labels:
col_sort <- rep("white", N) # default
lim_black <- .25 # threshold to switch from "white" to "black" labels
col_sort[(v_sort > (lim_black * N)) &
(v_sort <= ((1 - lim_black) * N))] <- "black" # switch to "black" in mid of range
# table(col_sort)
# (b) random:
# v_rand <- runif(n = N, 0, 1)
rand_ord <- sample(v_sort, N) # random permutation of v_sort
v_rand <- rand_ord # random permutation of v_sort
col_rand <- col_sort[rand_ord] # corresponding colors
# x and y vectors:
x_vec <- 1:n_x
y_vec <- rep(1, n_x)
# # (a) as tibble:
# tbs <- tibble::tibble(x = x_vec,
# y = y_vec,
# sort = v_sort,
# rand = v_rand,
# col_sort = col_sort,
# col_rand = col_rand)
# (b) as data frame:
tbs <- data.frame(x = x_vec,
y = y_vec,
sort = v_sort,
rand = v_rand,
col_sort = col_sort,
col_rand = col_rand,
stringsAsFactors = FALSE)
return(tbs)
} # make_tbs().
## Check:
# make_tbs(n = 6)
# make_tbs(n = 6, rseed = 1) # check rseed
# make_tbs(n = 6, rseed = 1)
# make_grid: Generate a grid of x-y coordinates (from -x:x to -y:y): ------
#' Generate a grid of x-y coordinates.
#'
#' \code{make_grid} generates a grid of x/y coordinates and returns it
#' (as a data frame).
#'
#' @param x_min Minimum x coordinate.
#' Default: \code{x_min = 0}.
#'
#' @param x_max Maximum x coordinate.
#' Default: \code{x_max = 2}.
#'
#' @param y_min Minimum y coordinate.
#' Default: \code{y_min = 0}.
#'
#' @param y_max Maximum y coordinate.
#' Default: \code{y_max = 1}.
#'
#' @examples
#' make_grid()
#' make_grid(x_min = -3, x_max = 3, y_min = -2, y_max = 2)
#'
#' @family data functions
#'
#' @export
make_grid <- function(x_min = 0, x_max = 2, y_min = 0, y_max = 1){
# check inputs:
if (!is.numeric(x_min) || !is.numeric(x_max) ||
!is.numeric(y_min) || !is.numeric(y_max) ) {
stop("All arguments must be numeric.")
}
if (x_min > x_max) {
message("x_max should be larger than x_min: Reversing them...")
x_tmp <- x_min
x_min <- x_max
x_max <- x_tmp
}
if (y_min > y_max) {
message("y_max should be larger than y_min: Reversing them...")
y_tmp <- y_min
y_min <- y_max
y_max <- y_tmp
}
# initialize:
tb <- NA
# ranges:
xs <- x_min:x_max
ys <- y_min:y_max
# x and y vectors:
x_vec <- rep(xs, times = length(ys))
y_vec <- rep(ys, each = length(xs))
## (a) as tibble:
# tb <- tibble::tibble(x = x_vec,
# y = y_vec)
# (b) as data frame:
tb <- data.frame(x = x_vec,
y = y_vec,
stringsAsFactors = FALSE)
return(tb)
} # make_grid().
## Check:
# make_grid()
# make_grid(x_min = 0, x_max = 0, y_min = 1, y_max = 1)
# Note:
# make_grid(x_min = 1, x_max = 0, y_min = 2, y_max = 1)
# make_grid(x_min = 1/2, y_min = 1/3)
## Errors:
# make_grid(x_min = "A")
## (3) Misc: ----------
# get_set: Get a coordinate set from datasets::anscombe (as df): ------
#' Get a set of x-y coordinates.
#'
#' \code{get_set} obtains a set of x/y coordinates and returns it
#' (as a data frame).
#'
#' Each set stems from Anscombe's Quartet
#' (see \code{datasets::anscombe}, hence
#' \code{1 <= n <= 4}) and is returned as an
#' \code{11 x 2} data frame.
#'
#' @source See \code{?datasets:anscombe} for details and references.
#'
#' @param n Number of set (as an integer from 1 to 4)).
#' Default: \code{n = 1}.
#'
#' @examples
#' get_set(1)
#' plot(get_set(2), col = "red")
#'
#' @family data functions
#'
#' @export
# Obtain a set of x- and y- values out of Anscombe's quartet:
get_set <- function(n = 1){
set <- NA # initialize
# check inputs: ----
if (is.null(n)){
message("get_set: n must not be NULL. Using n = 1:")
n <- 1
}
if (length(n) > 1) { # n is a vector:
message(paste0("get_set: n must be a scalar. Using n[1] = ", n[1], ":"))
n <- n[1]
}
if ( (length(n) == 1) && ( is.na(n) || !is.numeric(n) || !is_wholenumber(n) || (n < 1) || (n > 4)) ) {
message("get_set: n must be a positive integer (from 1 to 4). Using n = 1:")
n <- 1
}
# main: ----
df <- datasets::anscombe # get data
ans <- with(df, data.frame(x = c(x1, x2, x3, x4),
y = c(y1, y2, y3, y4),
nr = gl(4, nrow(df))))
set <- ans[ans$nr == n, 1:2] # get subset
rownames(set) <- paste0("p", num_as_char(1:nrow(set), n_pre_dec = 2, n_dec = 0))
return(set)
} # get_set().
## Check:
# get_set(1)
# get_set(pi)
# plot(get_set(2), col = "red")
#
# # Note: @importFrom datasets anscombe is not needed.
## ToDo: ----------
# - Vectorized versions of sample_date() and sample_time()
# that allow inputs of (recycled) vectors from and to and draw
# n = size samples from each pair-wise range.
# - sample_time variant for sampling normally distributed times?
## eof. ----------------------
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.