Nothing
#' Compute tile pattern analysis for all pairwise datasets
#'
#' This function iterates over a list of pairwise datasets (as produced by create_pairwise_datasets)
#' and fills the results$tile_pattern entry for each pair using the provided tile pattern analysis.
#'
#' Assign 3x3 tiles to data points based on two variables
#' @keywords internal
assign_3x3_tiles <- function(data, var1_name, var2_name, split_method = "quantile") {
# Input validation
if (!is.data.frame(data) || nrow(data) == 0) {
stop("'data' must be a non-empty data frame", call. = FALSE)
}
if (!all(c(var1_name, var2_name) %in% names(data))) {
stop("Variables '", var1_name, "' and '", var2_name, "' must exist in data", call. = FALSE)
}
if (!split_method %in% c("quantile", "range")) {
stop("'split_method' must be either 'quantile' or 'range'", call. = FALSE)
}
# Get variable vectors
var1_values <- data[[var1_name]]
var2_values <- data[[var2_name]]
# Check for sufficient variation
if (length(unique(var1_values[!is.na(var1_values)])) < 2) {
warning("Variable '", var1_name, "' has insufficient variation for tile assignment", call. = FALSE)
}
if (length(unique(var2_values[!is.na(var2_values)])) < 2) {
warning("Variable '", var2_name, "' has insufficient variation for tile assignment", call. = FALSE)
}
# Determine split points
if (split_method == "quantile") {
x_limits <- compute_quantile_splits(var1_values)
y_limits <- compute_quantile_splits(var2_values)
} else {
x_limits <- split_range_into_thirds(var1_values)
y_limits <- split_range_into_thirds(var2_values)
}
# Internal function for flexible group assignment
assign_flex_groups <- function(values, limits, labels) {
breaks <- c(-Inf, limits, Inf)
groups <- cut(values, breaks = breaks, labels = labels, include.lowest = TRUE)
if (!any(values > limits[1] & values < limits[2]) &&
any(values == limits[2]) &&
!any(values > limits[2])) {
groups[values == limits[2]] <- labels[length(labels)]
groups <- factor(groups, levels = labels)
}
return(groups)
}
# Create group assignments
x_group <- assign_flex_groups(var1_values, x_limits, c("Left", "Mid", "Right"))
y_group <- assign_flex_groups(var2_values, y_limits, c("Lower", "Mid", "Upper"))
# Map to tile names
tile_names <- mapply(map_coordinates_to_tile, x_group, y_group, USE.NAMES = FALSE)
# Create factor with proper ordering
tile_levels <- c(
"Upper Left", "Upper Mid", "Upper Right",
"Mid Left", "Center", "Mid Right",
"Lower Left", "Lower Mid", "Lower Right"
)
factor(tile_names, levels = tile_levels)
}
#' Compute robust quantile splits with fallback to range splits
#' @keywords internal
compute_quantile_splits <- function(x) {
# Try quantile method first
quantile_splits <- quantile(x, probs = c(1/3, 2/3), na.rm = TRUE)
# Check if quantiles provide sufficient separation
if (length(unique(quantile_splits)) < 2) {
# Fall back to range-based splitting
return(split_range_into_thirds(x))
}
return(quantile_splits)
}
#' Split a numeric vector into three equal-width bins
#' @keywords internal
split_range_into_thirds <- function(x) {
min_val <- min(x, na.rm = TRUE)
max_val <- max(x, na.rm = TRUE)
if (is.infinite(min_val) || is.infinite(max_val) || min_val == max_val) {
# Handle edge cases
return(c(min_val, min_val))
}
range_size <- max_val - min_val
break1 <- min_val + range_size / 3
break2 <- min_val + 2 * range_size / 3
c(break1, break2)
}
#' Map x,y coordinates to tile names
#' @keywords internal
map_coordinates_to_tile <- function(x, y) {
if (is.na(x) || is.na(y)) return(NA)
coordinate_key <- paste(x, y)
tile_mapping <- c(
"Left Upper" = "Upper Left",
"Right Upper" = "Upper Right",
"Left Lower" = "Lower Left",
"Right Lower" = "Lower Right",
"Mid Upper" = "Upper Mid",
"Left Mid" = "Mid Left",
"Right Mid" = "Mid Right",
"Mid Lower" = "Lower Mid",
"Mid Mid" = "Center"
)
result <- tile_mapping[coordinate_key]
if (is.na(result)) {
warning("Unknown coordinate combination: ", coordinate_key, call. = FALSE)
}
return(result)
}
#' Chi-squared test and difference check for observed vs expected counts
#' @keywords internal
check_chisquared_difference <- function(
observed_counts, expected_counts,
abs_diff_threshold = 20,
filter_expected = 0,
p_threshold = 0.05,
monte_carlo_B = 2000
) {
# Input validation and conversion
observed_counts <- as.matrix(observed_counts)
expected_counts <- as.matrix(expected_counts)
if (!identical(dim(observed_counts), dim(expected_counts))) {
stop("Observed and expected count matrices must have the same dimensions", call. = FALSE)
}
# Handle zero expected counts
expected_counts[expected_counts == 0] <- 0.5
# Convert to vectors for analysis
observed_vector <- as.vector(t(observed_counts))
expected_vector <- as.vector(t(expected_counts))
# Apply filtering if specified
if (filter_expected > 0) {
mask <- expected_vector > filter_expected
observed_vector_filt <- observed_vector[mask]
expected_vector_filt <- expected_vector[mask]
} else {
observed_vector_filt <- observed_vector
expected_vector_filt <- expected_vector
}
# Calculate absolute difference
abs_difference_sum <- sum(abs(observed_vector - expected_vector))
# Initialize result variables
p_value <- NA
statistic <- NA
df <- NA
p_value_mc <- NA
statistic_mc <- NA
# Perform chi-square tests if we have sufficient data
if (length(observed_vector_filt) > 1 && sum(expected_vector_filt) > 0) {
# Standard chi-square test
tryCatch({
chisq_test_result <- suppressWarnings(
chisq.test(
x = observed_vector_filt,
p = expected_vector_filt / sum(expected_vector_filt),
rescale.p = TRUE
)
)
p_value <- chisq_test_result$p.value
statistic <- chisq_test_result$statistic
df <- chisq_test_result$parameter
}, error = function(e) {
warning("Standard chi-square test failed: ", e$message, call. = FALSE)
})
# Monte Carlo chi-square test
if (monte_carlo_B > 0) {
tryCatch({
chisq_mc_result <- suppressWarnings(
chisq.test(
x = observed_vector_filt,
p = expected_vector_filt / sum(expected_vector_filt),
rescale.p = TRUE,
simulate.p.value = TRUE,
B = monte_carlo_B
)
)
p_value_mc <- chisq_mc_result$p.value
statistic_mc <- chisq_mc_result$statistic
}, error = function(e) {
warning("Monte Carlo chi-square test failed: ", e$message, call. = FALSE)
})
}
}
# Determine significance
differs <- (!is.na(p_value) && p_value < p_threshold) ||
abs_difference_sum > abs_diff_threshold
differs_mc <- (!is.na(p_value_mc) && p_value_mc < p_threshold) ||
abs_difference_sum > abs_diff_threshold
# Generate message
message <- if (differs || differs_mc) {
"Observed counts differ from expectations (statistically or practically)."
} else {
"No significant or practical difference detected."
}
list(
differs = differs,
p_value = p_value,
statistic = statistic,
df = df,
differs_mc = differs_mc,
p_value_mc = p_value_mc,
statistic_mc = statistic_mc,
abs_difference_sum = abs_difference_sum,
observed = observed_counts,
expected = expected_counts,
message = message
)
}
#' Main tile pattern analysis for a single pair
#' @keywords internal
analyze_tile_dominance <- function(
data, var1_name, var2_name, class_name,
p_threshold = 0.05,
abs_diff_threshold = 20,
split_method = "quantile"
) {
# Input validation
validate_tile_analysis_inputs(data, var1_name, var2_name, class_name, p_threshold, abs_diff_threshold)
# Define tile sets and ideal pattern
TILES_DEFAULT <- c("Lower Left", "Lower Right", "Upper Left", "Upper Right")
TILES_ROTATED <- c("Mid Left", "Upper Mid", "Lower Mid", "Mid Right")
IDEAL_PATTERN <- c(1, 2, 2, 1)
# Assign tiles and create observed table
tiles_population <- assign_3x3_tiles(data, var1_name, var2_name, split_method)
observed_table <- table(data[[class_name]], tiles_population)
# Ensure we have the expected number of classes
if (nrow(observed_table) != 2) {
stop("Analysis requires exactly 2 classes, found ", nrow(observed_table), call. = FALSE)
}
# Compute dominance for both scenarios
dominance_results <- compute_scenario_dominance(observed_table, TILES_DEFAULT, TILES_ROTATED, IDEAL_PATTERN)
# Select best scenario
scenario_selection <- select_best_scenario(dominance_results)
chosen_tiles <- scenario_selection$tiles
scenario <- scenario_selection$scenario
# Analyze chosen scenario
analysis_result <- analyze_chosen_scenario(
observed_table, chosen_tiles, IDEAL_PATTERN, scenario,
p_threshold, abs_diff_threshold
)
# Add scenario information
analysis_result$xor_pattern <- scenario
analysis_result$dominance_default <- dominance_results$default
analysis_result$dominance_rotated <- dominance_results$rotated
return(analysis_result)
}
#' Validate inputs for tile analysis
#' @keywords internal
validate_tile_analysis_inputs <- function(data, var1_name, var2_name, class_name, p_threshold, abs_diff_threshold) {
if (!is.data.frame(data) || nrow(data) == 0) {
stop("'data' must be a non-empty data frame", call. = FALSE)
}
required_vars <- c(var1_name, var2_name, class_name)
missing_vars <- setdiff(required_vars, names(data))
if (length(missing_vars) > 0) {
stop("Missing variables in data: ", paste(missing_vars, collapse = ", "), call. = FALSE)
}
if (!is.numeric(p_threshold) || p_threshold <= 0 || p_threshold >= 1) {
stop("'p_threshold' must be between 0 and 1", call. = FALSE)
}
if (!is.numeric(abs_diff_threshold) || abs_diff_threshold < 0) {
stop("'abs_diff_threshold' must be non-negative", call. = FALSE)
}
}
#' Compute dominance scores for different scenarios
#' @keywords internal
compute_scenario_dominance <- function(observed_table, tiles_default, tiles_rotated, ideal_pattern) {
compute_dominance <- function(obs_table, tiles, ideal_pattern) {
if (!all(tiles %in% colnames(obs_table))) {
return(NA)
}
obs_sub <- obs_table[, tiles, drop = FALSE]
if (sum(obs_sub) == 0) {
return(NA)
}
ideal_cells <- mapply(function(class, col) {
if (class <= nrow(obs_sub) && col <= ncol(obs_sub)) {
obs_sub[class, col]
} else {
0
}
}, ideal_pattern, seq_along(ideal_pattern))
sum(ideal_cells) / sum(obs_sub)
}
list(
default = compute_dominance(observed_table, tiles_default, ideal_pattern),
rotated = compute_dominance(observed_table, tiles_rotated, ideal_pattern)
)
}
#' Select the best scenario based on dominance scores
#' @keywords internal
select_best_scenario <- function(dominance_results) {
TILES_DEFAULT <- c("Lower Left", "Lower Right", "Upper Left", "Upper Right")
TILES_ROTATED <- c("Mid Left", "Upper Mid", "Lower Mid", "Mid Right")
dom_default <- dominance_results$default
dom_rotated <- dominance_results$rotated
if (is.na(dom_default) && is.na(dom_rotated)) {
# Both scenarios invalid, default to default
return(list(tiles = TILES_DEFAULT, scenario = "default"))
} else if (is.na(dom_rotated) || (!is.na(dom_default) && dom_default >= dom_rotated)) {
return(list(tiles = TILES_DEFAULT, scenario = "default"))
} else {
return(list(tiles = TILES_ROTATED, scenario = "rotated"))
}
}
#' Analyze the chosen scenario in detail
#' @keywords internal
analyze_chosen_scenario <- function(observed_table, chosen_tiles, ideal_pattern, scenario, p_threshold, abs_diff_threshold) {
# Extract observed data for chosen tiles
observed_sub <- observed_table[, chosen_tiles, drop = FALSE]
tile_totals <- colSums(observed_sub)
# Determine if we need strict mode
strict_mode <- any(tile_totals == 0)
strict_note <- if (strict_mode) "Some tiles empty: using strict (ideal) expected table." else ""
# Create expected tables
expected_table <- create_expected_table_2x4(observed_table, chosen_tiles, ideal_pattern, strict_mode)
expected_table_neutral <- create_expected_table_2x4_neutral(observed_table, chosen_tiles, strict_mode)
# Perform statistical tests
diff_result <- check_chisquared_difference(
observed_sub, expected_table,
abs_diff_threshold = abs_diff_threshold,
p_threshold = p_threshold
)
diff_result_neutral <- check_chisquared_difference(
observed_sub, expected_table_neutral,
abs_diff_threshold = abs_diff_threshold,
p_threshold = p_threshold
)
# Compute corner conditions
corner_analysis <- analyze_corner_conditions(observed_table, scenario)
# Create interpretation
interpretation <- create_interpretation_message(
strict_note, diff_result, corner_analysis$message
)
# Determine XOR detection
xor_shape_detected <- (!(diff_result$differs || diff_result$differs_mc)) && diff_result_neutral$differs
list(
observed_table = observed_sub,
expected_table = expected_table,
chi_squared_test = list(
statistic = diff_result$statistic,
df = diff_result$df,
p_value = diff_result$p_value,
statistic_mc = diff_result$statistic_mc,
p_value_mc = diff_result$p_value_mc
),
abs_difference_sum = diff_result$abs_difference_sum,
interpretation = interpretation,
xor_shape_detected = xor_shape_detected,
strict_mode = strict_mode
)
}
#' Create expected table for XOR pattern
#' @keywords internal
create_expected_table_2x4 <- function(obs_table, tiles, ideal_pattern, strict = FALSE) {
obs_sub <- obs_table[, tiles, drop = FALSE]
expected_table <- matrix(0, nrow = 2, ncol = 4, dimnames = list(rownames(obs_table), tiles))
if (strict) {
total <- sum(obs_sub)
for (i in seq_along(ideal_pattern)) {
expected_class <- ideal_pattern[i]
expected_table[expected_class, i] <- total / 4
}
} else {
tile_totals <- colSums(obs_sub)
for (i in seq_along(ideal_pattern)) {
expected_class <- ideal_pattern[i]
expected_table[expected_class, i] <- tile_totals[i]
}
}
expected_table
}
#' Create neutral expected table
#' @keywords internal
create_expected_table_2x4_neutral <- function(obs_table, tiles, strict = FALSE) {
obs_sub <- obs_table[, tiles, drop = FALSE]
n_classes <- rowSums(obs_sub)
if (sum(n_classes) == 0) {
return(matrix(0, nrow = 2, ncol = 4, dimnames = list(rownames(obs_table), tiles)))
}
rel_classes <- n_classes / sum(n_classes)
expected_table <- matrix(0, nrow = 2, ncol = 4, dimnames = list(rownames(obs_table), tiles))
if (strict) {
expected_table[1, ] <- n_classes[1] / 4
expected_table[2, ] <- n_classes[2] / 4
} else {
tile_totals <- colSums(obs_sub)
expected_table[1, ] <- tile_totals * rel_classes[1]
expected_table[2, ] <- tile_totals * rel_classes[2]
}
expected_table
}
#' Analyze corner/edge conditions for different scenarios
#' @keywords internal
analyze_corner_conditions <- function(observed_table, scenario) {
percent_in_tiles <- function(tab, tiles) {
if (!all(tiles %in% colnames(tab))) {
return(rep(NA, nrow(tab)))
}
apply(tab, 1, function(row) {
sum(row[tiles]) / sum(row)
})
}
corner_percentages <- percent_in_tiles(observed_table, c("Upper Left", "Lower Left", "Upper Right", "Lower Right"))
mid_percentages <- percent_in_tiles(observed_table, c("Mid Left", "Mid Right", "Upper Mid", "Lower Mid"))
if (scenario == "default") {
target_percentages <- corner_percentages
message <- sprintf(
"Proportion of class 1 in target tiles: %s; class 2: %s.",
signif(target_percentages[1], 3),
signif(target_percentages[2], 3)
)
} else {
target_percentages <- mid_percentages
message <- sprintf(
"Proportion of class 1 in target tiles: %s; class 2: %s.",
signif(target_percentages[1], 3),
signif(target_percentages[2], 3)
)
}
list(
target_percentages = target_percentages,
message = message
)
}
#' Create interpretation message
#' @keywords internal
create_interpretation_message <- function(strict_note, diff_result, corner_message) {
paste0(
if (nchar(strict_note) > 0) paste0(strict_note, " ") else "",
"Standard p = ", signif(diff_result$p_value, 3),
", Monte Carlo p = ", signif(diff_result$p_value_mc, 3), ". ",
diff_result$message, "\n",
corner_message, "\n"
)
}
#' Compute tile pattern analysis for all pairwise datasets (modularized)
#'
#' @param orig_pair_list List. Output from create_pairwise_datasets.
#' @param class_col Character. Name of the class column (default: "class").
#' @param p_threshold Numeric. Significance threshold for p-values (default: 0.05).
#' @param abs_diff_threshold Numeric. Absolute difference threshold (default: 20).
#' @param split_method Character. Method for splitting tiles ("quantile" or "range") (default: "quantile").
#' @return The input list, but with results$tile_pattern filled for each pair.
#' @importFrom stats quantile chisq.test
#' @export
compute_tile_patterns_for_pairs <- function(
orig_pair_list,
class_col = "class",
p_threshold = 0.05,
abs_diff_threshold = 20,
split_method = "quantile"
) {
# Input validation
if (!is.list(orig_pair_list) || length(orig_pair_list) == 0) {
stop("'orig_pair_list' must be a non-empty list", call. = FALSE)
}
if (!is.character(class_col) || length(class_col) != 1) {
stop("'class_col' must be a single character string", call. = FALSE)
}
if (!is.numeric(p_threshold) || p_threshold <= 0 || p_threshold >= 1) {
stop("'p_threshold' must be between 0 and 1", call. = FALSE)
}
if (!is.numeric(abs_diff_threshold) || abs_diff_threshold < 0) {
stop("'abs_diff_threshold' must be non-negative", call. = FALSE)
}
if (!split_method %in% c("quantile", "range")) {
stop("'split_method' must be either 'quantile' or 'range'", call. = FALSE)
}
# Process each pair
for (nm in names(orig_pair_list)) {
tryCatch({
pair <- orig_pair_list[[nm]]
# Validate pair structure
if (is.null(pair$data) || !is.data.frame(pair$data)) {
warning("Pair '", nm, "' missing valid data, skipping", call. = FALSE)
next
}
df <- pair$data
var_names <- setdiff(names(df), class_col)
if (length(var_names) != 2) {
warning("Pair '", nm, "' must contain exactly 2 variables + class column, skipping", call. = FALSE)
next
}
if (!class_col %in% names(df)) {
warning("Class column '", class_col, "' not found in pair '", nm, "', skipping", call. = FALSE)
next
}
# Perform tile pattern analysis
tile_pattern_result <- analyze_tile_dominance(
data = df,
var1_name = var_names[1],
var2_name = var_names[2],
class_name = class_col,
p_threshold = p_threshold,
abs_diff_threshold = abs_diff_threshold,
split_method = split_method
)
# Store results
if (is.null(orig_pair_list[[nm]]$results)) {
orig_pair_list[[nm]]$results <- list()
}
orig_pair_list[[nm]]$results$tile_pattern <- tile_pattern_result
}, error = function(e) {
warning("Error processing pair '", nm, "': ", e$message, call. = FALSE)
})
}
orig_pair_list
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.