#' Create a Data Envelope
#'
#' A Data Envelope is a simple list data stores probabilistic information about
#' the variables in a \code{data.frame}.
#'
#' @noRd
#'
#' @author David Navega
#'
#' @param x a data.frame
#' @param type a string defining the type of envelope. type = "data" creates an
#' envelope with the marginal probabilities (Default). type = "uniform" assumes
#' uniform marginal probability for the variables.
#'
create_envelope <- function(x, type = "data") {
x <- as.data.frame(x)
x_names <- colnames(x)
x_class <- lapply(x, class)
x_class <- sapply(x_class, numeric_factor_class)
envelope <- named_apply(x_names, function(name) {
# switch variable ----
switch(
EXPR = x_class[name],
factor = {
# swith envelope type ----
switch(
EXPR = type,
data = {
value <- levels(x[[name]])
probability <- m_estimator(x[[name]])
# return
rout <- list(
value = value,
probability = probability,
class = "factor"
)
return(rout)
},
uniform = {
value <- levels(x[[name]])
k <- nlevels(x[[name]])
probability <- rep(x = 1 / k, times = k)
# return
rout <- list(
value = value,
probability = probability,
class = "factor"
)
return(rout)
},
{ stop("[-] type = 'data' or type = 'uniform'") }
)
},
numeric = {
# swith envelope type ----
switch(
EXPR = type,
data = {
x_range <- range(x[[name]], na.rm = T)
from <- x_range[1]
to <- x_range[2]
kde <- stats::density(x[[name]], from = from, to = to, na.rm = T)
value <- kde$x
probability <- kde$y / sum(kde$y)
# return
rout <- list(
value = value,
probability = probability,
class = "numeric"
)
return(rout)
},
uniform = {
x_range <- range(x[[name]], na.rm = T)
from <- x_range[1]
to <- x_range[2]
value <- seq(from = from, to = to, length.out = 512)
probability <- rep(x = 1 / 512, times = 512)
# return
rout <- list(
value = value,
probability = probability,
class = "numeric"
)
return(rout)
},
{ stop("[-] type = 'data' or type = 'uniform'") }
)
}
)
})
# return ----
rout <- envelope
return(rout)
}
#' Sample from a Data Envelope
#'
#' @noRd
#'
#' @author David Navega
#'
#' @param object a data envelope object
#' @param n an integer defining the number of observation to sample
#' @param replace a logical indicating if sampling is done with or without
#' replacement. Default is TRUE
#'
sample_envelope <- function(object, n, replace = TRUE) {
x_names <- names(object)
sampled_list <- named_apply(x_names, function(name) {
sub_object <- object[[name]]
sampled_value <- sample(
x = sub_object$value,
prob = sub_object$probability,
size = n,
replace = replace
)
condition <- sub_object$class == "factor"
if(condition) {
sampled_value <- factor(sampled_value, levels = sub_object$value)
}
# return
rout <- sampled_value
return(rout)
})
# return ----
rout <- as.data.frame(sampled_list)
colnames(rout) <- x_names
return(rout)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.