#' @title Summary
#' @description Short summary of the data frame,
#' including the columns names and number of rows.
#' @param df (`list`)\cr Data frame containing the performane measure.
#' @return (`list`)\cr A vector containing the columns names and number of rows.
#' @export
data_summary <- function(df) {
rows <- nrow(df)
columns <- colnames(df)
return(list(Rows = rows, Columns = columns))
}
#' @title NA Check
#' @description
#' Check if the measure column is complete.
#' For the problem sets and all algorithms present in the data frame, this
#' function specifies the ratio of existing NAs. If there are any NAs the
#' User can decide to drop all observations for that specific value, since
#' the data frame needs to be complete for testing.
#' @param df (`list`)\cr Data frame containing the performane measure.
#' @param measure (`character`)\cr Name of the 'measure' column. If not
#' defined, the first 'measure' column in the data frame is used.
#' @param check_var (`character`)\cr Column in data frame used to check for NAs.
#' Either "problem" (default) or "algorithm".
#' @return (`list`)\cr List of Cases, NAs and the NA ratio according to
#' check_var.
#' @export
na_check <- function(df, measure = NULL, check_var = NULL){
result <- data.frame()
if (is.null(measure)) {
measure <- get_measure_columns(df)[1]
}
if (is.null(check_var)) {
check_var <- "problem"
}
if (any(is.na(df))) {
values <- unique(df[, check_var])
for (i in as.character(values)) {
value_data <- subset(df, df[, check_var] == i)
result[i, "na_number"] <- sum(is.na(value_data[, measure]))
result[i, "observations"] <- length(which(df[, check_var] == i))
result[i, "na_ratio"] <-
(result[i, "na_number"]/result[i, "observations"])
}
} else {
result <- "data complete"
}
return(result)
}
#' @title Drop NAs by groups
#' @description
#' Drop group of rows that contain any NA depending on values of check_var.
#' @param df (`list`)\cr Data frame containing the performane measure.
#' @param measure (`character`)\cr Name of the 'measure' column. If not
#' defined, the first 'measure' column in the data frame is used.
#' @param check_var (`character`)\cr Column in data frame used to check for NAs.
#' Either "problem" (default) or "algorithm".
#' @return (`list`)\cr New data frame without NAs.
#' @export
na_drop <- function(df, check_var = NULL, measure = NULL) {
if (is.null(measure)) {
measure <- get_measure_columns(df)[1]
}
if (is.null(check_var)) {
check_var <- "problem"
}
df[!(df[, check_var] %in% df[, check_var][is.na(df[, measure])]), ]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.