Nothing
#' Explore data
#'
#' Explores data from a data frame in wide format.
#'
#' This function helps to explore the dataset before being analyzed with \code{modeler()}.
#'
#' @param data A \code{data.frame} containing the input data for analysis.
#' @param x The name of the column in \code{data} that contains x points.
#' @param y The names of the columns in \code{data} that contain the variables to be analyzed.
#' @param id The names of the columns in \code{data} that contains a grouping variable.
#' @param metadata The names of the columns in \code{data} to keep across the analysis.
#'
#' @return An object of class \code{explorer}, which is a list containing the following elements:
#' \describe{
#' \item{\code{summ_vars}}{A data.frame containing summary statistics for each trait at each x point, including minimum, mean, median, maximum, standard deviation, coefficient of variation, number of non-missing values, percentage of missing values, and percentage of negative values.}
#' \item{\code{summ_metadata}}{A data.frame summarizing the metadata.}
#' \item{\code{locals_min_max}}{A data.frame containing the local minima and maxima of the mean y values over x.}
#' \item{\code{dt_long}}{A data.frame in long format, with columns for uid, metadata, var, x, and y}
#' \item{\code{metadata}}{A character vector with the names of the variables to keep across.}
#' }
#'
#' @export
#'
#' @examples
#' library(flexFitR)
#' data(dt_potato)
#' results <- dt_potato |>
#' explorer(
#' x = DAP,
#' y = c(Canopy, GLI),
#' id = Plot,
#' metadata = c(gid, Row, Range)
#' )
#' names(results)
#' head(results$summ_vars)
#' plot(results, label_size = 4, signif = TRUE, n_row = 2)
#' # New data format
#' head(results$dt_long)
#' @import dplyr
#' @import tidyr
#' @importFrom stats sd median
explorer <- function(data, x, y, id, metadata) {
if (is.null(data)) {
stop("Error: data not found")
}
id <- names(select(data, {{ id }}))
if (length(id) == 0) {
data <- data |> mutate(.id = 1)
id <- ".id"
} else if (length(id) > 1) {
data <- data |> unite(.id, id, sep = "_", remove = FALSE)
id <- ".id"
}
x <- names(select(data, {{ x }}))
y <- names(select(data, {{ y }}))
metadata <- names(select(data, {{ metadata }}))
for (i in y) {
class_trait <- data[[i]] |> class()
if (!class_trait %in% c("numeric", "integer")) {
stop(
paste0("Class of the trait '", i, "' should be numeric or integer.")
)
}
}
check_metadata(data, metadata)
data <- data |>
select(all_of(c(id, metadata, x, y))) |>
mutate(uid = .data[[id]], .keep = "unused", .before = 0) |>
rename(x = all_of(x))
resum <- summarize_metadata(data, cols = c("uid", "x", metadata))
dt_long <- data |>
select(uid, all_of(metadata), x, all_of(y)) |>
pivot_longer(all_of(y), names_to = "var", values_to = "y") |>
relocate(x, .after = var)
summ_vars <- dt_long |>
group_by(var, x) |>
summarise(
Min = suppressWarnings(min(y, na.rm = TRUE)),
Mean = mean(y, na.rm = TRUE),
Median = median(y, na.rm = TRUE),
Max = suppressWarnings(max(y, na.rm = TRUE)),
SD = sd(y, na.rm = TRUE),
CV = SD / Mean,
n = sum(!is.na(y)),
miss = sum(is.na(y)),
`miss%` = miss / n(),
`neg%` = sum(y < 0, na.rm = TRUE) / n,
.groups = "drop"
)
l <- which(is.infinite(summ_vars$Min))
summ_vars[l, c("Min", "Mean", "Max")] <- NA
max_min <- dt_long |>
group_by(var, x) |>
summarise(mean = mean(y, na.rm = TRUE), .groups = "drop") |>
arrange(var, x) |>
group_by(var) |>
summarise(
local_min_at = paste(local_min_max(mean, x)$days_min, collapse = "_"),
local_max_at = paste(local_min_max(mean, x)$days_max, collapse = "_"),
.groups = "drop"
)
out <- list(
summ_vars = summ_vars,
summ_metadata = resum,
locals_min_max = max_min,
dt_long = dt_long,
metadata = metadata,
x_var = x
)
class(out) <- "explorer"
return(out)
}
#' @noRd
check_metadata <- function(data, metadata = NULL) {
# Check if data is NULL
if (is.null(data)) {
stop("The dataset cannot be NULL.")
}
if (is.null(metadata)) {
return()
}
# Iterate over each variable in the metadata vector
for (var in metadata) {
# Check if the variable exists in the dataset
if (!var %in% names(data)) {
stop(paste("Variable", var, "is not found in the dataset."))
}
# Check for missing values
if (any(is.na(data[[var]]))) {
warning(paste("Variable", var, "contains missing values."))
}
}
}
#' @noRd
summarize_metadata <- function(data = NULL, cols = NULL) {
# Initialize an empty list to store summaries
summaries <- list()
# Iterate over each variable in the metadata vector
for (var in seq_along(cols)) {
# Extract the variable data
var_data <- data[[cols[var]]]
# Create a summary for the variable
var_summary <- list(
Column = cols[var],
Type = class(var_data),
Unique_Values = length(unique(var_data))
)
# Append the summary to the list
summaries[[var]] <- var_summary
}
# Convert the list of summaries to a data frame
summary_df <- do.call(rbind, lapply(summaries, as.data.frame))
return(summary_df)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.