#' @name create_manifest_explore_univariate
#' @export
#'
#' @title Create a manifest for exploring univariate patterns.
#'
#' @description This function creates a meta-dataset (from the \code{data.frame} passed as a parameter)
#' and optionally saves the meta-dataset as a CSV. The meta-dataset specifies how the variables
#' should be plotted.
#'
#'
#' @param d_observed The \code{data.frame} to create metadata for.
#' @param write_to_disk Indicates if the meta-dataset should be saved as a CSV.
#' @param path_out The file path to save the meta-dataset.
#' @param overwrite_file Indicates if the CSV of the meta-dataset should be overwritten if a file already exists at the location.
#' @param default_format A \code{character} array indicating which formatting function should be displayed on the axis of the univariate graph.
#' @param default_class_graph A \code{character} array indicating which graph should be used with variables of a certain class.
#' @param bin_count_suggestion An \code{integer} value of the number of roughly the number bins desired for a histogram.
#'
#' @return Returns a \code{data.frame} where each row in the metadata represents a column in \code{d_observed}.
#' The metadata contains the following columns
#' \enumerate{
#' \item{\code{variable_name} The variable name (in \code{d_observed}). \code{character}.}
#' \item{\code{remark} A blank field that allows theuser to enter notes in the CSV for later reference.}
#' \item{\code{class} The variable's \code{\link{class}} (eg, numeric, Date, factor). \code{character}.}
#' \item{\code{should_graph} A boolean value indicating if the variable should be graphed. \code{logical}.}
#' \item{\code{graph_function} The name of the function used to graph the variable. \code{character}.}
#' \item{\code{x_label_format} The name of the function used to format the \emph{x}-axis. \code{character}.}
#' \item{\code{bin_width} The uniform width of the bins. \code{numeric}.}
#' \item{\code{bin_start} The location of the left boundary of the first bin. \code{numeric}.}
#' }
#' @keywords explore
#' @examples
#'
#' create_manifest_explore_univariate(datasets::InsectSprays, write_to_disk=FALSE)
#'
#' #Careful, the first column is a `ts` class.
#' create_manifest_explore_univariate(datasets::freeny, write_to_disk=FALSE)
create_manifest_explore_univariate <- function(
d_observed,
write_to_disk = TRUE,
path_out = getwd(),
overwrite_file = FALSE,
default_class_graph = c(
"numeric" = "histogram_continuous",
"integer" = "histogram_continuous",
"factor" = "histogram_discrete",
"character" = "histogram_discrete",
"notMatched" = "histogram_generic"
),
default_format = c(
"numeric" = "scales::comma",
"notMatched" = "scales::comma"
),
bin_count_suggestion = 30L
) {
#Determine the variable's class (eg, numeric, integer, factor, ...).
column_class <- vapply(X=d_observed, FUN=class, character(1))
#Determine the index of the most appropriate graph.
matched_index_graph <- match(column_class, names(default_class_graph))
matched_index_graph <- ifelse(!is.na(matched_index_graph), matched_index_graph, which(names(default_class_graph)=="notMatched"))
#Determine the index of the most appropriate format.
matched_index_format <- match(column_class, names(default_format))
matched_index_format <- ifelse(!is.na(matched_index_format), matched_index_format, which(names(default_format)=="notMatched"))
bins <- calculate_bins(d_observed, bin_count_suggestion=bin_count_suggestion)
rounding_digits <- calculate_rounding_digits(d_observed)
#Create the data.frame of metadata.
ds_skeleton <- data.frame(
variable_name = colnames(d_observed),
remark = "",
class = column_class,
should_graph = TRUE,
graph_function = default_class_graph[matched_index_graph],
x_label_format = default_format[matched_index_format],
bin_width = bins$bin_width,
bin_start = bins$bin_start,
rounding_digits = rounding_digits,
stringsAsFactors = FALSE
)
#Ths sapply function sets rownames for the dataset. They're redundant with the `variable_name` column.
row.names(ds_skeleton) <- NULL
#If desired, write the data.frame to disk as a CSV.
if( write_to_disk ) {
if( overwrite_file & base::file.exists(path_out) ) {
stop(paste0(
"The file `", path_out, "` already exists, and will not be overwritten by `create_manifest_explore()`."
))
}
utils::write.csv(
x = ds_skeleton,
file = path_out,
row.names = FALSE
)
}
return( ds_skeleton )
}
# ds <- create_manifest_explore_univariate(datasets::freeny, write_to_disk=FALSE) #Careful, the first column is a `ts` class.
# ds <- create_manifest_explore_univariate(datasets::InsectSprays, write_to_disk=FALSE)
# ds <- create_manifest_explore_univariate(datasets::beaver1, write_to_disk=FALSE)
# ds
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.