Nothing
# Comprehensive Cross-Pathway Type Consistency Tests
# Testing KEGG, MetaCyc, and GO pathway integration and consistency
library(testthat)
library(ggpicrust2)
# Test Data Generators for Cross-Pathway Analysis
create_cross_pathway_test_data <- function(n_samples = 24, effect_size = 1.5) {
set.seed(42)
# Create comprehensive KO abundance data (for KEGG and GO)
ko_features <- c(
# Glycolysis pathway
"K00844", "K12407", "K00845", "K00886", "K08074", "K01810", "K00134",
# TCA cycle
"K00239", "K00240", "K00241", "K00242", "K01902", "K01903", "K00031",
# Pentose phosphate pathway
"K00016", "K00018", "K00128", "K01595", "K01596", "K00036",
# Fatty acid metabolism
"K01623", "K01624", "K11645", "K01803", "K15633", "K00059",
# Additional random KOs
paste0("K", sprintf("%05d", sample(1000:9999, 30)))
)
ko_abundance <- matrix(0, nrow = length(ko_features), ncol = n_samples)
rownames(ko_abundance) <- ko_features
colnames(ko_abundance) <- paste0("Sample", 1:n_samples)
# Create realistic log-normal abundance with group differences
for (i in 1:length(ko_features)) {
# Base abundance
base_abundance <- rlnorm(n_samples, meanlog = runif(1, 3, 7), sdlog = 0.8)
# Add group-specific effects for some features
if (i <= 20) { # First 20 features show group differences
group_effect <- c(rep(-effect_size/2, n_samples/2), rep(effect_size/2, n_samples/2))
base_abundance <- base_abundance * exp(group_effect + rnorm(n_samples, 0, 0.2))
}
ko_abundance[i, ] <- base_abundance
}
# Create EC abundance data (for MetaCyc) - similar structure but EC numbers
ec_features <- c(
# Core metabolic ECs
"EC:2.7.1.1", "EC:2.7.1.2", "EC:4.1.2.13", "EC:1.2.1.12", "EC:2.3.1.12",
"EC:1.1.1.27", "EC:4.2.1.11", "EC:1.8.1.4", "EC:6.2.1.1", "EC:1.3.1.9",
"EC:1.1.1.35", "EC:2.8.3.5", "EC:1.1.1.40", "EC:4.1.1.31", "EC:2.3.1.16",
# Additional random ECs
paste0("EC:", sample(1:6, 25, replace = TRUE), ".",
sample(1:20, 25, replace = TRUE), ".",
sample(1:50, 25, replace = TRUE), ".",
sample(1:100, 25, replace = TRUE))
)
ec_abundance <- matrix(0, nrow = length(ec_features), ncol = n_samples)
rownames(ec_abundance) <- ec_features
colnames(ec_abundance) <- paste0("Sample", 1:n_samples)
for (i in 1:length(ec_features)) {
base_abundance <- rlnorm(n_samples, meanlog = runif(1, 3, 7), sdlog = 0.8)
if (i <= 15) { # First 15 features show group differences
group_effect <- c(rep(-effect_size/2, n_samples/2), rep(effect_size/2, n_samples/2))
base_abundance <- base_abundance * exp(group_effect + rnorm(n_samples, 0, 0.2))
}
ec_abundance[i, ] <- base_abundance
}
# Create metadata
metadata <- data.frame(
sample_name = paste0("Sample", 1:n_samples),
Environment = factor(rep(c("Healthy", "Disease"), each = n_samples/2)),
Batch = factor(rep(1:3, length.out = n_samples)),
Age = runif(n_samples, 25, 75),
BMI = rnorm(n_samples, 25, 4)
)
rownames(metadata) <- metadata$sample_name
return(list(
ko_abundance = ko_abundance,
ec_abundance = ec_abundance,
metadata = metadata
))
}
create_mock_gene_sets <- function(pathway_type, gene_names) {
set.seed(123)
if (pathway_type == "KEGG") {
pathways <- paste0("ko", sprintf("%05d", 10:25))
gene_sets <- list()
for (i in seq_along(pathways)) {
pathway_size <- sample(8:25, 1)
genes <- sample(gene_names, min(pathway_size, length(gene_names)))
gene_sets[[pathways[i]]] <- genes
}
} else if (pathway_type == "MetaCyc") {
pathways <- paste0("PWY-", sample(1000:9999, 16))
gene_sets <- list()
for (i in seq_along(pathways)) {
pathway_size <- sample(5:20, 1)
genes <- sample(gene_names, min(pathway_size, length(gene_names)))
gene_sets[[pathways[i]]] <- genes
}
} else if (pathway_type == "GO") {
pathways <- paste0("GO:", sprintf("%07d", sample(1:9999999, 14)))
gene_sets <- list()
for (i in seq_along(pathways)) {
pathway_size <- sample(10:30, 1)
genes <- sample(gene_names, min(pathway_size, length(gene_names)))
gene_sets[[pathways[i]]] <- genes
}
}
return(gene_sets)
}
# API Consistency Tests
test_that("all pathway types have identical function signatures", {
# Test that pathway_gsea accepts all pathway types with same parameters
test_data <- create_cross_pathway_test_data()
# Create base parameters
base_params <- list(
metadata = test_data$metadata,
group = "Environment",
method = "fgsea",
rank_method = "signal2noise",
nperm = 100,
min_size = 5,
max_size = 200,
p.adjust = "BH",
seed = 123
)
pathway_types <- c("KEGG", "MetaCyc", "GO")
for (pathway_type in pathway_types) {
# Mock prepare_gene_sets for each pathway type
if (pathway_type == "KEGG" || pathway_type == "GO") {
abundance_data <- test_data$ko_abundance
gene_names <- rownames(abundance_data)
} else {
abundance_data <- test_data$ec_abundance
gene_names <- rownames(abundance_data)
}
mock_gene_sets <- create_mock_gene_sets(pathway_type, gene_names)
mockery::stub(pathway_gsea, "prepare_gene_sets", function(...) mock_gene_sets)
# Mock fgsea result
mock_result <- data.frame(
pathway_id = names(mock_gene_sets),
pathway_name = names(mock_gene_sets),
size = sapply(mock_gene_sets, length),
ES = runif(length(mock_gene_sets), -2, 2),
NES = runif(length(mock_gene_sets), -3, 3),
pvalue = runif(length(mock_gene_sets), 0.001, 0.5),
p.adjust = runif(length(mock_gene_sets), 0.005, 0.8),
leading_edge = sapply(mock_gene_sets, function(x) paste(sample(x, min(5, length(x))), collapse = ";")),
stringsAsFactors = FALSE
)
mockery::stub(pathway_gsea, "run_fgsea", function(...) mock_result)
# Test function call
params <- c(list(abundance = abundance_data, pathway_type = pathway_type), base_params)
expect_no_error({
result <- do.call(pathway_gsea, params)
}, info = paste("pathway_gsea should accept", pathway_type, "pathway type"))
# Check result structure is identical
expected_columns <- c("pathway_id", "pathway_name", "size", "ES", "NES",
"pvalue", "p.adjust", "leading_edge", "method")
expect_named(result, expected_columns,
info = paste("Result structure should be consistent for", pathway_type))
}
})
test_that("return data structures are identical across pathway types", {
test_data <- create_cross_pathway_test_data()
# Test each pathway type and collect results
results <- list()
pathway_types <- c("KEGG", "MetaCyc", "GO")
for (pathway_type in pathway_types) {
if (pathway_type == "KEGG" || pathway_type == "GO") {
abundance_data <- test_data$ko_abundance
gene_names <- rownames(abundance_data)
} else {
abundance_data <- test_data$ec_abundance
gene_names <- rownames(abundance_data)
}
mock_gene_sets <- create_mock_gene_sets(pathway_type, gene_names)
mockery::stub(pathway_gsea, "prepare_gene_sets", function(...) mock_gene_sets)
# Create standardized mock result
mock_result <- data.frame(
pathway_id = names(mock_gene_sets),
pathway_name = names(mock_gene_sets),
size = rep(15, length(mock_gene_sets)), # Same size for comparison
ES = rep(c(-1.5, 1.2, -0.8, 2.1), length.out = length(mock_gene_sets)),
NES = rep(c(-2.3, 1.8, -1.2, 3.1), length.out = length(mock_gene_sets)),
pvalue = rep(c(0.01, 0.05, 0.2, 0.001), length.out = length(mock_gene_sets)),
p.adjust = rep(c(0.05, 0.1, 0.3, 0.01), length.out = length(mock_gene_sets)),
leading_edge = rep("gene1;gene2;gene3", length(mock_gene_sets)),
stringsAsFactors = FALSE
)
mockery::stub(pathway_gsea, "run_fgsea", function(...) mock_result)
results[[pathway_type]] <- pathway_gsea(
abundance = abundance_data,
metadata = test_data$metadata,
group = "Environment",
pathway_type = pathway_type,
method = "fgsea",
seed = 123
)
}
# Compare column names and data types across all pathway types
for (i in 1:(length(results)-1)) {
for (j in (i+1):length(results)) {
type1 <- names(results)[i]
type2 <- names(results)[j]
expect_equal(names(results[[i]]), names(results[[j]]),
info = paste("Column names should match between", type1, "and", type2))
expect_equal(sapply(results[[i]], class), sapply(results[[j]], class),
info = paste("Column types should match between", type1, "and", type2))
}
}
})
# Statistical Consistency Tests
test_that("statistical calculations are consistent across pathway types", {
skip_if_not_installed("fgsea")
test_data <- create_cross_pathway_test_data()
# Use same ranking method and compare statistical properties
ranking_methods <- c("signal2noise", "t_test", "log2_ratio", "diff_abundance")
pathway_types <- c("KEGG", "MetaCyc", "GO")
for (rank_method in ranking_methods) {
rank_results <- list()
for (pathway_type in pathway_types) {
if (pathway_type == "KEGG" || pathway_type == "GO") {
abundance_data <- test_data$ko_abundance
} else {
abundance_data <- test_data$ec_abundance
}
# Calculate ranking metric directly
rank_metric <- calculate_rank_metric(
abundance = abundance_data,
metadata = test_data$metadata,
group = "Environment",
method = rank_method
)
rank_results[[pathway_type]] <- rank_metric
}
# Compare statistical properties of ranking metrics
rank_stats <- lapply(rank_results, function(x) {
list(
mean = mean(x, na.rm = TRUE),
sd = sd(x, na.rm = TRUE),
min = min(x, na.rm = TRUE),
max = max(x, na.rm = TRUE),
n_finite = sum(is.finite(x))
)
})
# All ranking methods should produce valid statistics
for (pathway_type in pathway_types) {
expect_true(is.finite(rank_stats[[pathway_type]]$mean),
info = paste(rank_method, "should produce finite mean for", pathway_type))
expect_true(rank_stats[[pathway_type]]$sd > 0,
info = paste(rank_method, "should produce positive SD for", pathway_type))
expect_true(rank_stats[[pathway_type]]$n_finite > 0,
info = paste(rank_method, "should produce finite values for", pathway_type))
}
}
})
test_that("p-value distributions are appropriate for all pathway types", {
skip_if_not_installed("fgsea")
test_data <- create_cross_pathway_test_data()
pathway_types <- c("KEGG", "MetaCyc", "GO")
pvalue_distributions <- list()
for (pathway_type in pathway_types) {
if (pathway_type == "KEGG" || pathway_type == "GO") {
abundance_data <- test_data$ko_abundance
gene_names <- rownames(abundance_data)
} else {
abundance_data <- test_data$ec_abundance
gene_names <- rownames(abundance_data)
}
mock_gene_sets <- create_mock_gene_sets(pathway_type, gene_names)
mockery::stub(pathway_gsea, "prepare_gene_sets", function(...) mock_gene_sets)
# Create realistic p-value distribution
n_pathways <- length(mock_gene_sets)
# Mix of significant and non-significant p-values
pvalues <- c(
runif(n_pathways * 0.2, 0.001, 0.05), # 20% significant
runif(n_pathways * 0.8, 0.05, 1.0) # 80% non-significant
)[1:n_pathways]
mock_result <- data.frame(
pathway_id = names(mock_gene_sets),
pathway_name = names(mock_gene_sets),
size = sapply(mock_gene_sets, length),
ES = runif(n_pathways, -2, 2),
NES = runif(n_pathways, -3, 3),
pvalue = pvalues,
p.adjust = p.adjust(pvalues, method = "BH"),
leading_edge = sapply(mock_gene_sets, function(x) paste(sample(x, min(3, length(x))), collapse = ";")),
stringsAsFactors = FALSE
)
mockery::stub(pathway_gsea, "run_fgsea", function(...) mock_result)
result <- pathway_gsea(
abundance = abundance_data,
metadata = test_data$metadata,
group = "Environment",
pathway_type = pathway_type,
method = "fgsea",
seed = 123
)
pvalue_distributions[[pathway_type]] <- result$pvalue
}
# Test p-value distribution properties
for (pathway_type in pathway_types) {
pvals <- pvalue_distributions[[pathway_type]]
# P-values should be between 0 and 1
expect_true(all(pvals >= 0 & pvals <= 1),
info = paste("P-values should be valid for", pathway_type))
# Should have reasonable distribution (not all 0 or 1)
expect_true(mean(pvals) > 0.01 & mean(pvals) < 0.99,
info = paste("P-value distribution should be reasonable for", pathway_type))
# Should have some variation
expect_true(sd(pvals) > 0.01,
info = paste("P-values should show variation for", pathway_type))
}
})
# Integration Workflow Tests
test_that("switching between pathway types in same session works", {
skip_if_not_installed("fgsea")
test_data <- create_cross_pathway_test_data()
# Simulate a workflow where user switches between pathway types
workflow_results <- list()
# KEGG analysis
kegg_gene_sets <- create_mock_gene_sets("KEGG", rownames(test_data$ko_abundance))
mockery::stub(pathway_gsea, "prepare_gene_sets", function(pathway_type, ...) {
if (pathway_type == "KEGG") return(kegg_gene_sets)
else return(list())
})
kegg_mock <- data.frame(
pathway_id = names(kegg_gene_sets)[1:10],
pathway_name = names(kegg_gene_sets)[1:10],
size = rep(15, 10),
ES = runif(10, -2, 2),
NES = runif(10, -3, 3),
pvalue = runif(10, 0.01, 0.3),
p.adjust = runif(10, 0.05, 0.4),
leading_edge = rep("K00001;K00002", 10),
stringsAsFactors = FALSE
)
mockery::stub(pathway_gsea, "run_fgsea", function(...) kegg_mock)
workflow_results$kegg <- pathway_gsea(
abundance = test_data$ko_abundance,
metadata = test_data$metadata,
group = "Environment",
pathway_type = "KEGG",
method = "fgsea",
seed = 123
)
# Switch to MetaCyc analysis
metacyc_gene_sets <- create_mock_gene_sets("MetaCyc", rownames(test_data$ec_abundance))
mockery::stub(pathway_gsea, "prepare_gene_sets", function(pathway_type, ...) {
if (pathway_type == "MetaCyc") return(metacyc_gene_sets)
else return(list())
})
metacyc_mock <- data.frame(
pathway_id = names(metacyc_gene_sets)[1:10],
pathway_name = names(metacyc_gene_sets)[1:10],
size = rep(12, 10),
ES = runif(10, -1.5, 1.5),
NES = runif(10, -2.5, 2.5),
pvalue = runif(10, 0.02, 0.4),
p.adjust = runif(10, 0.06, 0.5),
leading_edge = rep("EC:1.1.1.1;EC:2.2.2.2", 10),
stringsAsFactors = FALSE
)
mockery::stub(pathway_gsea, "run_fgsea", function(...) metacyc_mock)
workflow_results$metacyc <- pathway_gsea(
abundance = test_data$ec_abundance,
metadata = test_data$metadata,
group = "Environment",
pathway_type = "MetaCyc",
method = "fgsea",
seed = 123
)
# Switch to GO analysis
go_gene_sets <- create_mock_gene_sets("GO", rownames(test_data$ko_abundance))
mockery::stub(pathway_gsea, "prepare_gene_sets", function(pathway_type, ...) {
if (pathway_type == "GO") return(go_gene_sets)
else return(list())
})
go_mock <- data.frame(
pathway_id = names(go_gene_sets)[1:10],
pathway_name = names(go_gene_sets)[1:10],
size = rep(18, 10),
ES = runif(10, -1.8, 1.8),
NES = runif(10, -2.8, 2.8),
pvalue = runif(10, 0.005, 0.25),
p.adjust = runif(10, 0.02, 0.35),
leading_edge = rep("K00100;K00200", 10),
stringsAsFactors = FALSE
)
mockery::stub(pathway_gsea, "run_fgsea", function(...) go_mock)
workflow_results$go <- pathway_gsea(
abundance = test_data$ko_abundance,
metadata = test_data$metadata,
group = "Environment",
pathway_type = "GO",
method = "fgsea",
seed = 123
)
# Verify all analyses completed successfully
for (pathway_type in names(workflow_results)) {
expect_s3_class(workflow_results[[pathway_type]], "data.frame",
info = paste("Workflow result should be data frame for", pathway_type))
expect_true(nrow(workflow_results[[pathway_type]]) == 10,
info = paste("Workflow should return expected number of pathways for", pathway_type))
}
# Verify no contamination between analyses
expect_true(all(grepl("^ko", workflow_results$kegg$pathway_id)),
info = "KEGG results should contain KEGG pathway IDs")
expect_true(all(grepl("^PWY-", workflow_results$metacyc$pathway_id)),
info = "MetaCyc results should contain MetaCyc pathway IDs")
expect_true(all(grepl("^GO:", workflow_results$go$pathway_id)),
info = "GO results should contain GO pathway IDs")
})
test_that("data format compatibility across pathway transitions", {
test_data <- create_cross_pathway_test_data()
# Test that user can easily switch between analyses
pathway_types <- c("KEGG", "MetaCyc", "GO")
results <- list()
for (pathway_type in pathway_types) {
if (pathway_type == "KEGG" || pathway_type == "GO") {
abundance_data <- test_data$ko_abundance
gene_names <- rownames(abundance_data)
} else {
abundance_data <- test_data$ec_abundance
gene_names <- rownames(abundance_data)
}
mock_gene_sets <- create_mock_gene_sets(pathway_type, gene_names)
mockery::stub(pathway_gsea, "prepare_gene_sets", function(...) mock_gene_sets)
mock_result <- data.frame(
pathway_id = names(mock_gene_sets)[1:8],
pathway_name = names(mock_gene_sets)[1:8],
size = rep(12, 8),
ES = runif(8, -1, 1),
NES = runif(8, -2, 2),
pvalue = runif(8, 0.01, 0.3),
p.adjust = runif(8, 0.05, 0.4),
leading_edge = rep("gene1;gene2", 8),
stringsAsFactors = FALSE
)
mockery::stub(pathway_gsea, "run_fgsea", function(...) mock_result)
results[[pathway_type]] <- pathway_gsea(
abundance = abundance_data,
metadata = test_data$metadata,
group = "Environment",
pathway_type = pathway_type,
method = "fgsea",
seed = 123
)
}
# Test that results can be combined for comparison
combined_results <- do.call(rbind, lapply(names(results), function(type) {
df <- results[[type]]
df$pathway_type <- type
return(df)
}))
expect_s3_class(combined_results, "data.frame")
expect_equal(nrow(combined_results), 24) # 8 pathways × 3 types
expect_true("pathway_type" %in% colnames(combined_results))
# Test that each pathway type is represented
type_counts <- table(combined_results$pathway_type)
expect_equal(as.numeric(type_counts), rep(8, 3))
expect_equal(names(type_counts), c("GO", "KEGG", "MetaCyc"))
})
# Visualization Consistency Tests
test_that("visualize_gsea works consistently across pathway types", {
skip_if_not_installed("ggplot2")
test_data <- create_cross_pathway_test_data()
pathway_types <- c("KEGG", "MetaCyc", "GO")
for (pathway_type in pathway_types) {
if (pathway_type == "KEGG" || pathway_type == "GO") {
gene_names <- rownames(test_data$ko_abundance)
} else {
gene_names <- rownames(test_data$ec_abundance)
}
# Create mock GSEA results
mock_gene_sets <- create_mock_gene_sets(pathway_type, gene_names)
gsea_results <- data.frame(
pathway_id = names(mock_gene_sets)[1:5],
pathway_name = names(mock_gene_sets)[1:5],
size = rep(15, 5),
ES = c(-1.5, 1.2, -0.8, 2.1, -1.1),
NES = c(-2.3, 1.8, -1.2, 3.1, -1.7),
pvalue = c(0.01, 0.05, 0.2, 0.001, 0.08),
p.adjust = c(0.05, 0.1, 0.3, 0.01, 0.15),
leading_edge = rep("gene1;gene2;gene3", 5),
method = "fgsea",
stringsAsFactors = FALSE
)
# Test different plot types
plot_types <- c("enrichment_plot", "dotplot", "barplot")
for (plot_type in plot_types) {
expect_no_error({
plot <- visualize_gsea(
gsea_results = gsea_results,
plot_type = plot_type,
n_pathways = 5
)
}, info = paste("visualize_gsea should work with", pathway_type, plot_type))
expect_s3_class(plot, "ggplot",
info = paste("Should return ggplot object for", pathway_type, plot_type))
}
}
})
test_that("pathway annotation system consistency", {
test_data <- create_cross_pathway_test_data()
pathway_types <- c("KEGG", "MetaCyc", "GO")
for (pathway_type in pathway_types) {
# Create mock GSEA results
if (pathway_type == "KEGG") {
pathway_ids <- paste0("ko", sprintf("%05d", 10:15))
} else if (pathway_type == "MetaCyc") {
pathway_ids <- paste0("PWY-", 1000:1005)
} else {
pathway_ids <- paste0("GO:", sprintf("%07d", 1:6))
}
gsea_results <- data.frame(
pathway_id = pathway_ids,
pathway_name = pathway_ids, # Will be updated by annotation
size = rep(15, 6),
ES = runif(6, -2, 2),
NES = runif(6, -3, 3),
pvalue = runif(6, 0.01, 0.3),
p.adjust = runif(6, 0.05, 0.4),
leading_edge = rep("gene1;gene2", 6),
method = "fgsea",
stringsAsFactors = FALSE
)
# Mock annotation references
if (pathway_type == "KEGG") {
mock_reference <- data.frame(
pathway = pathway_ids,
pathway_name = paste("KEGG pathway", 1:6),
class = rep("Metabolism", 6),
stringsAsFactors = FALSE
)
mockery::stub(gsea_pathway_annotation, "load", function(file) {
kegg_reference <<- mock_reference
})
} else if (pathway_type == "MetaCyc") {
mock_reference <- data.frame(
V1 = pathway_ids,
V2 = paste("MetaCyc pathway", 1:6),
stringsAsFactors = FALSE
)
mockery::stub(gsea_pathway_annotation, "load", function(file) {
MetaCyc_reference <<- mock_reference
})
} else {
mock_reference <- data.frame(
go_id = pathway_ids,
go_name = paste("GO term", 1:6),
category = rep("BP", 6),
ko_members = rep("K00001;K00002", 6),
stringsAsFactors = FALSE
)
mockery::stub(gsea_pathway_annotation, "ko_to_go_reference", mock_reference)
}
# Test annotation function
expect_no_error({
annotated_results <- gsea_pathway_annotation(
gsea_results = gsea_results,
pathway_type = pathway_type
)
}, info = paste("Annotation should work for", pathway_type))
# Check that pathway names were added
expect_true("pathway_name" %in% colnames(annotated_results),
info = paste("Annotated results should have pathway_name column for", pathway_type))
# Check that some annotations were applied
expect_true(any(annotated_results$pathway_name != annotated_results$pathway_id),
info = paste("Some pathway names should be different from IDs for", pathway_type))
}
})
# Performance Consistency Tests
test_that("performance is comparable across pathway types", {
skip_if_not_installed("fgsea")
test_data <- create_cross_pathway_test_data(n_samples = 40)
pathway_types <- c("KEGG", "MetaCyc", "GO")
execution_times <- list()
for (pathway_type in pathway_types) {
if (pathway_type == "KEGG" || pathway_type == "GO") {
abundance_data <- test_data$ko_abundance
gene_names <- rownames(abundance_data)
} else {
abundance_data <- test_data$ec_abundance
gene_names <- rownames(abundance_data)
}
# Create larger gene sets for performance testing
mock_gene_sets <- create_mock_gene_sets(pathway_type, gene_names)
# Expand to more pathways
for (i in 16:50) {
if (pathway_type == "KEGG") {
pathway_id <- paste0("ko", sprintf("%05d", i))
} else if (pathway_type == "MetaCyc") {
pathway_id <- paste0("PWY-", 1000 + i)
} else {
pathway_id <- paste0("GO:", sprintf("%07d", i))
}
pathway_size <- sample(8:25, 1)
genes <- sample(gene_names, min(pathway_size, length(gene_names)))
mock_gene_sets[[pathway_id]] <- genes
}
mockery::stub(pathway_gsea, "prepare_gene_sets", function(...) mock_gene_sets)
# Create comprehensive mock result
mock_result <- data.frame(
pathway_id = names(mock_gene_sets),
pathway_name = names(mock_gene_sets),
size = sapply(mock_gene_sets, length),
ES = runif(length(mock_gene_sets), -2, 2),
NES = runif(length(mock_gene_sets), -3, 3),
pvalue = runif(length(mock_gene_sets), 0.001, 0.5),
p.adjust = runif(length(mock_gene_sets), 0.005, 0.8),
leading_edge = sapply(mock_gene_sets, function(x) paste(sample(x, min(5, length(x))), collapse = ";")),
stringsAsFactors = FALSE
)
mockery::stub(pathway_gsea, "run_fgsea", function(...) mock_result)
# Measure execution time
start_time <- Sys.time()
result <- pathway_gsea(
abundance = abundance_data,
metadata = test_data$metadata,
group = "Environment",
pathway_type = pathway_type,
method = "fgsea",
nperm = 100,
seed = 123
)
end_time <- Sys.time()
execution_times[[pathway_type]] <- as.numeric(end_time - start_time, units = "secs")
# Verify result completeness
expect_s3_class(result, "data.frame")
expect_true(nrow(result) > 30)
expect_true(all(!is.na(result$pvalue)))
}
# Compare execution times - should be reasonably similar
times <- unlist(execution_times)
expect_true(all(times < 5), "All pathway types should complete within 5 seconds")
# No pathway type should be more than 3x slower than the fastest
time_ratio <- max(times) / min(times)
expect_true(time_ratio < 3,
paste("Performance ratio should be reasonable. Actual ratio:", round(time_ratio, 2)))
})
# Comparative Analysis Tests
test_that("pathway overlap analysis between types works", {
test_data <- create_cross_pathway_test_data()
# Create overlapping gene sets between pathway types
# Some KOs appear in both KEGG and GO pathways
common_kos <- rownames(test_data$ko_abundance)[1:20]
kegg_sets <- list(
"ko00010" = common_kos[1:10],
"ko00020" = common_kos[5:15],
"ko00030" = common_kos[10:20]
)
go_sets <- list(
"GO:0006096" = common_kos[1:8], # Overlap with ko00010
"GO:0006099" = common_kos[12:18], # Overlap with ko00030
"GO:0008152" = common_kos[1:5] # Overlap with ko00010
)
metacyc_sets <- list(
"PWY-1001" = rownames(test_data$ec_abundance)[1:8],
"PWY-1002" = rownames(test_data$ec_abundance)[5:12],
"PWY-1003" = rownames(test_data$ec_abundance)[10:15]
)
# Mock results for each pathway type
mock_kegg <- data.frame(
pathway_id = names(kegg_sets),
pathway_name = c("Glycolysis", "Citrate cycle", "Pentose phosphate"),
size = c(10, 11, 11),
ES = c(-1.5, 1.2, -0.8),
NES = c(-2.3, 1.8, -1.2),
pvalue = c(0.01, 0.05, 0.15),
p.adjust = c(0.03, 0.1, 0.2),
leading_edge = c("K00001;K00002", "K00010;K00011", "K00020"),
method = "fgsea",
stringsAsFactors = FALSE
)
mock_go <- data.frame(
pathway_id = names(go_sets),
pathway_name = c("Glycolytic process", "TCA cycle", "Metabolic process"),
size = c(8, 7, 5),
ES = c(-1.3, 1.0, -1.1),
NES = c(-2.0, 1.5, -1.8),
pvalue = c(0.02, 0.08, 0.12),
p.adjust = c(0.06, 0.15, 0.18),
leading_edge = c("K00001;K00003", "K00015;K00016", "K00002"),
method = "fgsea",
stringsAsFactors = FALSE
)
mock_metacyc <- data.frame(
pathway_id = names(metacyc_sets),
pathway_name = c("Glucose degradation", "Fatty acid oxidation", "Amino acid biosynthesis"),
size = c(8, 8, 6),
ES = c(-1.0, 0.9, 1.3),
NES = c(-1.7, 1.4, 2.0),
pvalue = c(0.04, 0.06, 0.03),
p.adjust = c(0.08, 0.12, 0.06),
leading_edge = c("EC:1.1.1.1;EC:2.2.2.2", "EC:3.3.3.3", "EC:4.4.4.4;EC:5.5.5.5"),
method = "fgsea",
stringsAsFactors = FALSE
)
# Test that we can identify complementary biological insights
expect_true(nrow(mock_kegg) > 0, "KEGG analysis should find pathways")
expect_true(nrow(mock_go) > 0, "GO analysis should find pathways")
expect_true(nrow(mock_metacyc) > 0, "MetaCyc analysis should find pathways")
# Test pathway name similarity analysis
kegg_names <- tolower(mock_kegg$pathway_name)
go_names <- tolower(mock_go$pathway_name)
# Should find some conceptual overlap
name_similarity <- outer(kegg_names, go_names, function(x, y) {
sapply(seq_along(x), function(i) {
sum(strsplit(x[i], " ")[[1]] %in% strsplit(y[i], " ")[[1]])
})
})
expect_true(any(name_similarity > 0),
"Should find some conceptual overlap between KEGG and GO pathway names")
})
test_that("biological interpretation consistency check", {
# Test that the three pathway types provide complementary insights
# This is more of a sanity check for biological coherence
# Mock results representing typical microbiome analysis outcomes
kegg_results <- data.frame(
pathway_id = c("ko00010", "ko00020", "ko00030", "ko00230", "ko00260"),
pathway_name = c("Glycolysis", "Citrate cycle", "Pentose phosphate", "Purine metabolism", "Glycine metabolism"),
NES = c(-2.1, 1.5, -1.2, 2.3, -1.8),
pvalue = c(0.001, 0.02, 0.15, 0.005, 0.03),
p.adjust = c(0.005, 0.04, 0.2, 0.015, 0.06),
pathway_type = "KEGG",
stringsAsFactors = FALSE
)
go_results <- data.frame(
pathway_id = c("GO:0006096", "GO:0006099", "GO:0008152", "GO:0009058", "GO:0006520"),
pathway_name = c("Glycolytic process", "TCA cycle", "Metabolic process", "Biosynthetic process", "Amino acid metabolism"),
NES = c(-1.9, 1.3, -1.5, 2.0, -1.6),
pvalue = c(0.002, 0.03, 0.08, 0.01, 0.04),
p.adjust = c(0.01, 0.06, 0.12, 0.02, 0.08),
pathway_type = "GO",
stringsAsFactors = FALSE
)
metacyc_results <- data.frame(
pathway_id = c("PWY-1001", "PWY-1002", "PWY-1003", "PWY-1004", "PWY-1005"),
pathway_name = c("Glucose degradation", "TCA cycle", "Glycogen biosynthesis", "Fatty acid oxidation", "Leucine biosynthesis"),
NES = c(-2.0, 1.4, 1.7, -1.3, 2.2),
pvalue = c(0.001, 0.025, 0.015, 0.12, 0.008),
p.adjust = c(0.005, 0.05, 0.03, 0.15, 0.02),
pathway_type = "MetaCyc",
stringsAsFactors = FALSE
)
# Test biological coherence
# 1. Similar pathways should show similar enrichment directions
glycolysis_kegg <- kegg_results[kegg_results$pathway_name == "Glycolysis", "NES"]
glycolysis_go <- go_results[go_results$pathway_name == "Glycolytic process", "NES"]
glycolysis_metacyc <- metacyc_results[metacyc_results$pathway_name == "Glucose degradation", "NES"]
# All should be negative (depleted) - consistent with each other
expect_true(sign(glycolysis_kegg) == sign(glycolysis_go),
"KEGG and GO glycolysis pathways should have same enrichment direction")
expect_true(sign(glycolysis_kegg) == sign(glycolysis_metacyc),
"KEGG and MetaCyc glucose degradation should have same enrichment direction")
# 2. Test that we have both positive and negative enrichments (realistic biology)
for (results in list(kegg_results, go_results, metacyc_results)) {
expect_true(any(results$NES > 0) && any(results$NES < 0),
"Should have both upregulated and downregulated pathways")
expect_true(any(results$pvalue < 0.05),
"Should have some significant pathways")
}
# 3. P-value distributions should be reasonable
all_pvals <- c(kegg_results$pvalue, go_results$pvalue, metacyc_results$pvalue)
expect_true(mean(all_pvals) > 0.01 && mean(all_pvals) < 0.8,
"P-value distribution should be reasonable across all pathway types")
})
# Summary test for overall consistency
test_that("overall cross-pathway consistency summary", {
# This test summarizes that all major consistency checks pass
test_data <- create_cross_pathway_test_data()
pathway_types <- c("KEGG", "MetaCyc", "GO")
# Test that all pathway types can be analyzed successfully
success_count <- 0
for (pathway_type in pathway_types) {
tryCatch({
if (pathway_type == "KEGG" || pathway_type == "GO") {
abundance_data <- test_data$ko_abundance
gene_names <- rownames(abundance_data)
} else {
abundance_data <- test_data$ec_abundance
gene_names <- rownames(abundance_data)
}
mock_gene_sets <- create_mock_gene_sets(pathway_type, gene_names)
mockery::stub(pathway_gsea, "prepare_gene_sets", function(...) mock_gene_sets)
mock_result <- data.frame(
pathway_id = names(mock_gene_sets)[1:5],
pathway_name = names(mock_gene_sets)[1:5],
size = rep(15, 5),
ES = runif(5, -2, 2),
NES = runif(5, -3, 3),
pvalue = runif(5, 0.01, 0.3),
p.adjust = runif(5, 0.05, 0.4),
leading_edge = rep("gene1;gene2", 5),
stringsAsFactors = FALSE
)
mockery::stub(pathway_gsea, "run_fgsea", function(...) mock_result)
result <- pathway_gsea(
abundance = abundance_data,
metadata = test_data$metadata,
group = "Environment",
pathway_type = pathway_type,
method = "fgsea",
seed = 123
)
# Basic validation
if (is.data.frame(result) && nrow(result) == 5 &&
all(c("pathway_id", "NES", "pvalue") %in% colnames(result))) {
success_count <- success_count + 1
}
}, error = function(e) {
# Test should not error
})
}
expect_equal(success_count, 3)
# Print summary message
message("Cross-pathway consistency tests completed successfully!")
message("✓ API consistency across KEGG, MetaCyc, and GO")
message("✓ Statistical consistency in calculations")
message("✓ Visualization compatibility")
message("✓ Annotation system uniformity")
message("✓ Performance parity")
message("✓ Integration workflow reliability")
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.