Nothing
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.width = 7,
fig.height = 5,
fig.align = "center",
warning = FALSE,
message = FALSE
)
## ----load-package-------------------------------------------------------------
library(evanverse)
library(dplyr)
## ----void-concepts------------------------------------------------------------
# Examples of void values
void_examples <- list(
numbers = c(1, NA, 3, 4),
strings = c("A", "", "C", NA),
mixed = c("text", NA, "", "data")
)
print("Examples of data with void values:")
str(void_examples)
## ----void-detection-single----------------------------------------------------
# Check if individual values are void
print(is_void(NA)) # TRUE
print(is_void("")) # TRUE
print(is_void(NULL)) # TRUE
print(is_void("hello")) # FALSE
print(is_void(0)) # FALSE
## ----void-detection-vector----------------------------------------------------
# Check if any element in a vector is void
test_vector <- c("A", "", "C", NA, "E")
print(any_void(test_vector)) # TRUE
# Example with no void values
clean_vector <- c("A", "B", "C")
print(any_void(clean_vector)) # FALSE
## ----void-detection-dataframe-------------------------------------------------
# Create sample data with various void patterns
sample_data <- data.frame(
id = 1:6,
name = c("Alice", "", "Charlie", NA, "Eve", "Frank"),
age = c(25, 30, NA, 35, 28, 32),
city = c("NYC", "LA", "", "Chicago", NA, "Boston"),
stringsAsFactors = FALSE
)
print("Sample data with void values:")
print(sample_data)
# Identify columns with void values
void_cols <- cols_with_void(sample_data)
print(paste("Columns with void values:", paste(void_cols, collapse = ", ")))
# Identify rows with void values
void_rows <- rows_with_void(sample_data)
print(paste("Rows with void values:", paste(void_rows, collapse = ", ")))
## ----void-replacement-basic---------------------------------------------------
# Replace all void values with a single replacement
messy_vector <- c("A", "", "C", NA, "E")
clean_vector <- replace_void(messy_vector, value = "MISSING")
print("Original vector:")
print(messy_vector)
print("After replacement:")
print(clean_vector)
## ----void-replacement-selective-----------------------------------------------
# Replace only specific types of void values
mixed_data <- c("A", "", "C", NA, "E")
# Replace only empty strings
only_empty <- replace_void(mixed_data,
value = "EMPTY",
include_na = FALSE,
include_empty_str = TRUE)
print("Replace only empty strings:")
print(only_empty)
# Replace only NA values
only_na <- replace_void(mixed_data,
value = "NOT_AVAILABLE",
include_na = TRUE,
include_empty_str = FALSE)
print("Replace only NA values:")
print(only_na)
## ----void-replacement-dataframe-----------------------------------------------
# Apply replacement column by column
clean_data <- sample_data
clean_data$name <- replace_void(sample_data$name, value = "UNKNOWN")
clean_data$city <- replace_void(sample_data$city, value = "UNKNOWN")
print("Data after void replacement:")
print(clean_data)
## ----drop-elements------------------------------------------------------------
# For vectors, drop_void removes void elements
test_vector <- c("A", "", "C", NA, "E")
clean_vector <- drop_void(test_vector)
print("Original vector:")
print(test_vector)
print("After dropping void elements:")
print(clean_vector)
# For data analysis, we can identify problematic rows/columns
print("Rows with void values:")
print(rows_with_void(sample_data))
print("Columns with void values:")
print(cols_with_void(sample_data))
## ----df-to-list---------------------------------------------------------------
# Group data by a key column and create lists
mtcars_subset <- mtcars[1:12, c("cyl", "mpg", "hp", "wt")]
# Group by cylinder count, focusing on MPG values
grouped_cars <- df2list(
data = mtcars_subset,
key_col = "cyl",
value_col = "mpg"
)
print("Cars grouped by cylinder count (MPG values):")
str(grouped_cars)
# Access specific groups
print("4-cylinder cars MPG values:")
print(grouped_cars[["4"]])
## ----column-mapping-----------------------------------------------------------
# Map values in a column using a named vector
grades_data <- data.frame(
student = c("Alice", "Bob", "Charlie", "Diana"),
grade_letter = c("A", "B", "A", "C")
)
# Create mapping for letter grades to numbers
grade_mapping <- c("A" = 4.0, "B" = 3.0, "C" = 2.0, "D" = 1.0, "F" = 0.0)
# Apply mapping using the correct parameters
result <- map_column(
query = grades_data,
by = "grade_letter",
map = grade_mapping,
to = "grade_numeric"
)
print("Grades with numeric mapping:")
print(result)
## ----file-reading, eval=FALSE-------------------------------------------------
# # Read various file formats with automatic detection
# data1 <- read_table_flex("data.csv")
# data2 <- read_table_flex("data.tsv", sep = "\t")
# data3 <- read_table_flex("data.txt", header = TRUE)
#
# # Read Excel files with flexibility
# excel_data <- read_excel_flex("workbook.xlsx", sheet = "Sheet1")
## ----file-info, eval=FALSE----------------------------------------------------
# # Get comprehensive file information
# info <- file_info("myfile.csv")
# print(info)
#
# # Extract file extensions
# files <- c("data.csv", "analysis.R", "report.pdf")
# extensions <- sapply(files, get_ext)
# print(extensions)
#
# # Display directory structure
# file_tree(".", max_depth = 2)
## ----string-operators---------------------------------------------------------
# Paste operator for clean string concatenation
full_name <- "John" %p% " " %p% "Doe"
print(full_name)
file_path <- "data" %p% "/" %p% "analysis" %p% ".csv"
print(file_path)
## ----logical-operators--------------------------------------------------------
# Enhanced "not in" operator
fruits <- c("apple", "banana", "orange")
check_fruits <- c("apple", "grape", "banana", "kiwi")
# Find fruits not in our list
missing_fruits <- check_fruits[check_fruits %nin% fruits]
print(paste("Missing fruits:", paste(missing_fruits, collapse = ", ")))
# Enhanced identity checking
print(5 %is% 5) # TRUE
print("a" %is% "a") # TRUE
print(5 %is% "5") # FALSE
## ----combinatorial------------------------------------------------------------
# Generate combinations and permutations
items <- c("A", "B", "C", "D")
# Calculate combination numbers
combinations_count <- comb(4, 2) # C(4,2) = 6
print(paste("Number of ways to choose 2 items from 4:", combinations_count))
# Calculate permutation numbers
permutations_count <- perm(4, 2) # P(4,2) = 12
print(paste("Number of ways to arrange 2 items from 4:", permutations_count))
## ----survey-example-----------------------------------------------------------
# Simulate messy survey data
survey_data <- data.frame(
id = 1:8,
age = c(25, "", 30, NA, "35", 28, 0, 45),
income = c("50000", "", NA, "75000", "60000", "invalid", "80000", ""),
satisfaction = c(5, 4, "", 3, NA, 5, 4, 2),
stringsAsFactors = FALSE
)
print("Original messy survey data:")
print(survey_data)
# Step 1: Identify problematic data
cat("\nData quality assessment:\n")
cat("Columns with void values:", paste(cols_with_void(survey_data), collapse = ", "), "\n")
cat("Rows with void values:", paste(rows_with_void(survey_data), collapse = ", "), "\n")
# Step 2: Clean the data
# Replace void values with appropriate defaults
survey_clean <- survey_data
survey_clean$age <- replace_void(survey_clean$age, value = "25")
survey_clean$income <- replace_void(survey_clean$income, value = "50000")
survey_clean$satisfaction <- replace_void(survey_clean$satisfaction, value = 3)
# Convert to appropriate types
survey_clean$age <- as.numeric(survey_clean$age)
survey_clean$income <- as.numeric(survey_clean$income)
survey_clean$satisfaction <- as.numeric(survey_clean$satisfaction)
# Handle special cases (e.g., age = 0, income = "invalid")
survey_clean$age[survey_clean$age == 0] <- 25
survey_clean$income[is.na(survey_clean$income)] <- 50000
print("Cleaned survey data:")
print(survey_clean)
## ----performance-tips---------------------------------------------------------
# For large datasets, check specific columns rather than entire data frame
large_data <- data.frame(
col1 = sample(c(1:100, NA), 1000, replace = TRUE),
col2 = sample(c(letters, ""), 1000, replace = TRUE),
col3 = runif(1000)
)
# Check only columns likely to have voids
critical_cols <- c("col1", "col2")
void_status <- sapply(critical_cols, function(col) any_void(large_data[[col]]))
print("Void status for critical columns:")
print(void_status)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.