Nothing
# Comprehensive Tests for GSEA Visualization Helper Functions
# Focus on testing the core mathematical functions and data transformations
# Following Linus principles: test the data structures and eliminate complexity
library(testthat)
# Load the package functions
devtools::load_all()
#=============================================================================
# NETWORK PLOT MATHEMATICAL VALIDATION
#=============================================================================
test_that("create_network_plot: Jaccard similarity calculation is mathematically correct", {
skip_if_not_installed("igraph")
skip_if_not_installed("ggraph")
skip_if_not_installed("tidygraph")
# Create test data with precisely controlled gene sets for validation
test_data <- data.frame(
pathway_id = c("pathA", "pathB", "pathC", "pathD"),
pathway_name = c("Pathway A", "Pathway B", "Pathway C", "Pathway D"),
NES = c(2.1, -1.5, 1.8, -0.9),
pvalue = c(0.001, 0.05, 0.01, 0.1),
p.adjust = c(0.01, 0.1, 0.05, 0.2),
size = c(5, 4, 6, 3),
# Precisely controlled gene sets:
# pathA: {G1, G2, G3, G4, G5} (5 genes)
# pathB: {G1, G2, G6, G7} (4 genes)
# pathC: {G1, G8, G9, G10, G11, G12} (6 genes)
# pathD: {G13, G14, G15} (3 genes, no overlap)
leading_edge = c(
"G1;G2;G3;G4;G5",
"G1;G2;G6;G7",
"G1;G8;G9;G10;G11;G12",
"G13;G14;G15"
),
stringsAsFactors = FALSE
)
# Expected Jaccard similarities:
# pathA vs pathB: |{G1,G2}| / |{G1,G2,G3,G4,G5,G6,G7}| = 2/7 ≈ 0.286
# pathA vs pathC: |{G1}| / |{G1,G2,G3,G4,G5,G8,G9,G10,G11,G12}| = 1/10 = 0.1
# pathA vs pathD: 0 (no intersection)
# pathB vs pathC: |{G1}| / |{G1,G2,G6,G7,G8,G9,G10,G11,G12}| = 1/9 ≈ 0.111
# pathB vs pathD: 0
# pathC vs pathD: 0
# Test with cutoff that should allow pathA-pathB connection (0.286 > 0.2)
expect_no_error({
plot_jaccard_low <- visualize_gsea(
test_data,
plot_type = "network",
n_pathways = 4,
network_params = list(
similarity_measure = "jaccard",
similarity_cutoff = 0.2,
layout = "circle" # Consistent layout for testing
)
)
expect_s3_class(plot_jaccard_low, "ggplot")
})
# Test with higher cutoff that should eliminate most connections
expect_no_error({
plot_jaccard_high <- visualize_gsea(
test_data,
plot_type = "network",
n_pathways = 4,
network_params = list(
similarity_measure = "jaccard",
similarity_cutoff = 0.5 # Should eliminate all connections
)
)
expect_s3_class(plot_jaccard_high, "ggplot")
# Should show message about no connections
})
})
test_that("create_network_plot: Overlap coefficient calculation is mathematically correct", {
skip_if_not_installed("igraph")
skip_if_not_installed("ggraph")
skip_if_not_installed("tidygraph")
# Same test data as Jaccard test for consistency
test_data <- data.frame(
pathway_id = c("pathA", "pathB", "pathC"),
NES = c(2.1, -1.5, 1.8),
pvalue = c(0.001, 0.05, 0.01),
p.adjust = c(0.01, 0.1, 0.05),
size = c(5, 4, 6),
leading_edge = c(
"G1;G2;G3;G4;G5", # 5 genes
"G1;G2;G6;G7", # 4 genes
"G1;G8;G9;G10;G11;G12" # 6 genes
),
stringsAsFactors = FALSE
)
# Expected overlap coefficients:
# pathA vs pathB: |{G1,G2}| / min(5,4) = 2/4 = 0.5
# pathA vs pathC: |{G1}| / min(5,6) = 1/5 = 0.2
# pathB vs pathC: |{G1}| / min(4,6) = 1/4 = 0.25
expect_no_error({
plot_overlap <- visualize_gsea(
test_data,
plot_type = "network",
n_pathways = 3,
network_params = list(
similarity_measure = "overlap",
similarity_cutoff = 0.15, # Should include all connections
layout = "circle"
)
)
expect_s3_class(plot_overlap, "ggplot")
})
# Test with medium cutoff
expect_no_error({
plot_overlap_med <- visualize_gsea(
test_data,
plot_type = "network",
n_pathways = 3,
network_params = list(
similarity_measure = "overlap",
similarity_cutoff = 0.3, # Should include pathA-pathB (0.5) only
layout = "circle"
)
)
expect_s3_class(plot_overlap_med, "ggplot")
})
})
test_that("create_network_plot: Correlation similarity calculation is mathematically correct", {
skip_if_not_installed("igraph")
skip_if_not_installed("ggraph")
skip_if_not_installed("tidygraph")
test_data <- data.frame(
pathway_id = c("pathA", "pathB", "pathC"),
NES = c(2.1, -1.5, 1.8),
pvalue = c(0.001, 0.05, 0.01),
p.adjust = c(0.01, 0.1, 0.05),
size = c(5, 4, 6),
leading_edge = c(
"G1;G2;G3;G4;G5", # 5 genes
"G1;G2;G6;G7", # 4 genes
"G1;G8;G9;G10;G11;G12" # 6 genes
),
stringsAsFactors = FALSE
)
# Expected correlation similarities:
# pathA vs pathB: |{G1,G2}| / sqrt(5*4) = 2/sqrt(20) ≈ 0.447
# pathA vs pathC: |{G1}| / sqrt(5*6) = 1/sqrt(30) ≈ 0.183
# pathB vs pathC: |{G1}| / sqrt(4*6) = 1/sqrt(24) ≈ 0.204
expect_no_error({
plot_corr <- visualize_gsea(
test_data,
plot_type = "network",
n_pathways = 3,
network_params = list(
similarity_measure = "correlation",
similarity_cutoff = 0.1,
layout = "circle"
)
)
expect_s3_class(plot_corr, "ggplot")
})
})
test_that("create_network_plot: Empty and malformed leading edges are handled correctly", {
skip_if_not_installed("igraph")
skip_if_not_installed("ggraph")
skip_if_not_installed("tidygraph")
# Test data with various edge cases in leading_edge
edge_case_data <- data.frame(
pathway_id = c("path1", "path2", "path3", "path4", "path5"),
NES = c(1.0, -1.0, 2.0, -2.0, 0.5),
pvalue = c(0.01, 0.02, 0.001, 0.05, 0.1),
p.adjust = c(0.05, 0.1, 0.01, 0.2, 0.3),
size = c(3, 0, 5, 2, 1),
leading_edge = c(
"G1;G2;G3", # Normal case
"", # Empty string
"G4;G5;G6;G7;G8", # Normal case
"G1;G2", # Normal case with overlap
"G9" # Single gene
),
stringsAsFactors = FALSE
)
# Should handle empty leading edges gracefully
expect_no_error({
plot_edge_cases <- visualize_gsea(
edge_case_data,
plot_type = "network",
network_params = list(
similarity_cutoff = 0.1,
layout = "circle"
)
)
expect_s3_class(plot_edge_cases, "ggplot")
})
# Test with all empty leading edges
all_empty_data <- edge_case_data
all_empty_data$leading_edge <- rep("", 5)
expect_no_error({
plot_all_empty <- visualize_gsea(
all_empty_data,
plot_type = "network",
network_params = list(similarity_cutoff = 0.1)
)
expect_s3_class(plot_all_empty, "ggplot")
})
})
test_that("create_network_plot: Layout algorithms produce valid outputs", {
skip_if_not_installed("igraph")
skip_if_not_installed("ggraph")
skip_if_not_installed("tidygraph")
# Create data that will definitely have connections
connected_data <- data.frame(
pathway_id = c("path1", "path2", "path3", "path4"),
NES = c(1.5, -1.2, 2.0, -0.8),
pvalue = c(0.01, 0.05, 0.001, 0.1),
p.adjust = c(0.05, 0.1, 0.01, 0.2),
size = c(10, 8, 12, 6),
# Create overlapping gene sets
leading_edge = c(
"G1;G2;G3;G4;G5;G6;G7;G8;G9;G10",
"G1;G2;G3;G4;G11;G12;G13;G14",
"G1;G2;G5;G6;G15;G16;G17;G18;G19;G20;G21;G22",
"G3;G4;G23;G24;G25;G26"
),
stringsAsFactors = FALSE
)
# Test all supported layout algorithms
layouts <- c("fruchterman", "kamada", "circle")
for (layout in layouts) {
expect_no_error({
plot_layout <- visualize_gsea(
connected_data,
plot_type = "network",
network_params = list(
layout = layout,
similarity_cutoff = 0.1,
similarity_measure = "jaccard"
)
)
expect_s3_class(plot_layout, "ggplot")
})
}
# Test invalid layout (should default to fruchterman)
expect_no_error({
plot_invalid_layout <- visualize_gsea(
connected_data,
plot_type = "network",
network_params = list(
layout = "invalid_layout",
similarity_cutoff = 0.1
)
)
expect_s3_class(plot_invalid_layout, "ggplot")
})
})
test_that("create_network_plot: Node coloring and sizing work correctly", {
skip_if_not_installed("igraph")
skip_if_not_installed("ggraph")
skip_if_not_installed("tidygraph")
test_data <- data.frame(
pathway_id = c("path1", "path2", "path3"),
NES = c(2.5, -2.0, 1.0),
pvalue = c(0.001, 0.01, 0.05),
p.adjust = c(0.005, 0.02, 0.1),
size = c(100, 50, 25),
leading_edge = c(
"G1;G2;G3;G4",
"G1;G2;G5;G6",
"G1;G7;G8"
),
stringsAsFactors = FALSE
)
# Test different node coloring attributes
node_color_attrs <- c("NES", "pvalue", "p.adjust")
for (attr in node_color_attrs) {
expect_no_error({
plot_color <- visualize_gsea(
test_data,
plot_type = "network",
network_params = list(
node_color_by = attr,
similarity_cutoff = 0.1
)
)
expect_s3_class(plot_color, "ggplot")
})
}
# Test edge width mapping
expect_no_error({
plot_edge_width <- visualize_gsea(
test_data,
plot_type = "network",
network_params = list(
edge_width_by = "similarity",
similarity_cutoff = 0.1
)
)
expect_s3_class(plot_edge_width, "ggplot")
})
})
#=============================================================================
# HEATMAP PLOT MATHEMATICAL VALIDATION
#=============================================================================
test_that("create_heatmap_plot: Leading edge gene extraction is accurate", {
skip_if_not_installed("ComplexHeatmap")
skip_if_not_installed("circlize")
# Create abundance matrix with known gene IDs
gene_ids <- sprintf("K%05d", 1:50)
sample_ids <- sprintf("Sample_%02d", 1:12)
# Create controlled abundance matrix
abundance_matrix <- matrix(
runif(50 * 12, min = 1, max = 100),
nrow = 50,
ncol = 12,
dimnames = list(gene_ids, sample_ids)
)
abundance_df <- as.data.frame(abundance_matrix)
# Create metadata
metadata <- data.frame(
sample = sample_ids,
group = rep(c("Control", "Treatment"), each = 6),
stringsAsFactors = FALSE,
row.names = sample_ids
)
# Create GSEA results with genes that exist in abundance matrix
gsea_data <- data.frame(
pathway_id = c("path1", "path2", "path3"),
NES = c(2.1, -1.5, 1.8),
pvalue = c(0.001, 0.05, 0.01),
p.adjust = c(0.01, 0.1, 0.05),
size = c(15, 12, 10),
# Use genes that exist in our abundance matrix
leading_edge = c(
"K00001;K00002;K00003;K00004;K00005", # 5 genes from abundance matrix
"K00010;K00011;K00012", # 3 genes from abundance matrix
"K00020;K00021;K00022;K00023" # 4 genes from abundance matrix
),
stringsAsFactors = FALSE
)
expect_no_error({
heatmap_plot <- visualize_gsea(
gsea_data,
plot_type = "heatmap",
n_pathways = 3,
abundance = abundance_df,
metadata = metadata,
group = "group"
)
expect_s4_class(heatmap_plot, "Heatmap")
})
# Test with some missing genes
gsea_with_missing <- gsea_data
gsea_with_missing$leading_edge[1] <- "K00001;K00002;MISSING_GENE;K00004;ANOTHER_MISSING"
expect_no_error({
heatmap_missing <- visualize_gsea(
gsea_with_missing,
plot_type = "heatmap",
n_pathways = 3,
abundance = abundance_df,
metadata = metadata,
group = "group"
)
expect_s4_class(heatmap_missing, "Heatmap")
})
})
test_that("create_heatmap_plot: Data scaling and normalization is correct", {
skip_if_not_installed("ComplexHeatmap")
skip_if_not_installed("circlize")
# Create abundance data with known values for testing scaling
gene_ids <- sprintf("K%05d", 1:20)
sample_ids <- sprintf("Sample_%02d", 1:8)
# Create abundance matrix with controlled values
# Sample 1-4: Control group (lower values)
# Sample 5-8: Treatment group (higher values)
abundance_matrix <- cbind(
matrix(c(10, 15, 20, 25), nrow = 20, ncol = 4), # Control samples
matrix(c(40, 45, 50, 55), nrow = 20, ncol = 4) # Treatment samples
)
rownames(abundance_matrix) <- gene_ids
colnames(abundance_matrix) <- sample_ids
abundance_df <- as.data.frame(abundance_matrix)
# Create metadata
metadata <- data.frame(
sample = sample_ids,
group = c(rep("Control", 4), rep("Treatment", 4)),
stringsAsFactors = FALSE,
row.names = sample_ids
)
# Create GSEA data
gsea_data <- data.frame(
pathway_id = c("path1", "path2"),
NES = c(2.0, -1.5),
pvalue = c(0.01, 0.05),
p.adjust = c(0.05, 0.1),
size = c(5, 3),
leading_edge = c(
"K00001;K00002;K00003;K00004;K00005",
"K00010;K00011;K00012"
),
stringsAsFactors = FALSE
)
expect_no_error({
heatmap_scaled <- visualize_gsea(
gsea_data,
plot_type = "heatmap",
n_pathways = 2,
abundance = abundance_df,
metadata = metadata,
group = "group"
)
expect_s4_class(heatmap_scaled, "Heatmap")
})
})
test_that("create_heatmap_plot: Annotation handling works correctly", {
skip_if_not_installed("ComplexHeatmap")
skip_if_not_installed("circlize")
# Create test data
gene_ids <- sprintf("K%05d", 1:30)
sample_ids <- sprintf("Sample_%02d", 1:15)
abundance_matrix <- matrix(
rnorm(30 * 15, mean = 50, sd = 10),
nrow = 30,
ncol = 15,
dimnames = list(gene_ids, sample_ids)
)
abundance_df <- as.data.frame(abundance_matrix)
# Create metadata with multiple grouping variables
metadata <- data.frame(
sample = sample_ids,
group = rep(c("A", "B", "C"), each = 5),
batch = rep(c("Batch1", "Batch2"), length.out = 15),
treatment = rep(c("Control", "Treatment"), length.out = 15),
stringsAsFactors = FALSE,
row.names = sample_ids
)
gsea_data <- data.frame(
pathway_id = c("path1", "path2", "path3"),
NES = c(2.2, -1.8, 1.5),
pvalue = c(0.001, 0.01, 0.05),
p.adjust = c(0.01, 0.05, 0.1),
size = c(8, 6, 5),
leading_edge = c(
"K00001;K00002;K00003;K00004;K00005;K00006;K00007;K00008",
"K00010;K00011;K00012;K00013;K00014;K00015",
"K00020;K00021;K00022;K00023;K00024"
),
stringsAsFactors = FALSE
)
# Test with different grouping variables
grouping_vars <- c("group", "batch", "treatment")
for (group_var in grouping_vars) {
expect_no_error({
heatmap_group <- visualize_gsea(
gsea_data,
plot_type = "heatmap",
n_pathways = 3,
abundance = abundance_df,
metadata = metadata,
group = group_var
)
expect_s4_class(heatmap_group, "Heatmap")
})
}
# Test with custom annotation colors
custom_colors <- list(
Group = c("A" = "#FF0000", "B" = "#00FF00", "C" = "#0000FF")
)
expect_no_error({
heatmap_custom_colors <- visualize_gsea(
gsea_data,
plot_type = "heatmap",
n_pathways = 3,
abundance = abundance_df,
metadata = metadata,
group = "group",
heatmap_params = list(annotation_colors = custom_colors)
)
expect_s4_class(heatmap_custom_colors, "Heatmap")
})
})
test_that("create_heatmap_plot: Clustering parameters work correctly", {
skip_if_not_installed("ComplexHeatmap")
skip_if_not_installed("circlize")
# Create test data
abundance_df <- as.data.frame(matrix(
rnorm(100 * 10, mean = 25, sd = 5),
nrow = 100,
ncol = 10,
dimnames = list(sprintf("K%05d", 1:100), sprintf("Sample_%02d", 1:10))
))
metadata <- data.frame(
sample = colnames(abundance_df),
group = rep(c("Control", "Treatment"), each = 5),
stringsAsFactors = FALSE,
row.names = colnames(abundance_df)
)
gsea_data <- data.frame(
pathway_id = c("path1", "path2", "path3"),
NES = c(1.8, -2.1, 1.2),
pvalue = c(0.01, 0.001, 0.05),
p.adjust = c(0.05, 0.01, 0.1),
size = c(10, 12, 8),
leading_edge = c(
paste(sprintf("K%05d", 1:10), collapse = ";"),
paste(sprintf("K%05d", 20:31), collapse = ";"),
paste(sprintf("K%05d", 50:57), collapse = ";")
),
stringsAsFactors = FALSE
)
# Test different clustering combinations
clustering_combinations <- list(
list(cluster_rows = TRUE, cluster_columns = TRUE),
list(cluster_rows = FALSE, cluster_columns = TRUE),
list(cluster_rows = TRUE, cluster_columns = FALSE),
list(cluster_rows = FALSE, cluster_columns = FALSE)
)
for (i in seq_along(clustering_combinations)) {
params <- clustering_combinations[[i]]
expect_no_error({
heatmap_cluster <- visualize_gsea(
gsea_data,
plot_type = "heatmap",
n_pathways = 3,
abundance = abundance_df,
metadata = metadata,
group = "group",
heatmap_params = params
)
expect_s4_class(heatmap_cluster, "Heatmap")
})
}
# Test row name display options
expect_no_error({
heatmap_no_names <- visualize_gsea(
gsea_data,
plot_type = "heatmap",
n_pathways = 3,
abundance = abundance_df,
metadata = metadata,
group = "group",
heatmap_params = list(show_rownames = FALSE)
)
expect_s4_class(heatmap_no_names, "Heatmap")
})
})
#=============================================================================
# INTEGRATION TESTS FOR HELPER FUNCTIONS
#=============================================================================
test_that("Helper functions handle parameter validation correctly", {
# Test that helper functions validate their inputs properly
# Create minimal valid data
gsea_data <- data.frame(
pathway_id = c("path1", "path2"),
NES = c(1.0, -1.0),
pvalue = c(0.01, 0.05),
p.adjust = c(0.05, 0.1),
size = c(10, 15),
leading_edge = c("G1;G2;G3", "G4;G5"),
stringsAsFactors = FALSE
)
# Test that functions exist and can be called
expect_true(exists("create_network_plot"))
expect_true(exists("create_heatmap_plot"))
# These are internal functions, but we can test that visualize_gsea
# calls them correctly through the main interface
expect_true(TRUE) # Placeholder for internal function testing
})
# Performance test for helper functions
test_that("Helper functions perform adequately with larger datasets", {
skip_on_ci() # Skip on CI due to time constraints
skip_if_not_installed("igraph")
skip_if_not_installed("ggraph")
skip_if_not_installed("tidygraph")
# Create larger dataset for performance testing
large_gsea <- data.frame(
pathway_id = sprintf("path_%04d", 1:50),
NES = rnorm(50, mean = 0, sd = 1.5),
pvalue = runif(50, min = 0.001, max = 0.1),
p.adjust = runif(50, min = 0.01, max = 0.2),
size = sample(10:100, 50, replace = TRUE),
leading_edge = replicate(50, {
genes <- sprintf("G%04d", sample(1:500, sample(5:20, 1)))
paste(genes, collapse = ";")
}),
stringsAsFactors = FALSE
)
# Test network performance
start_time <- Sys.time()
expect_no_error({
plot_perf <- visualize_gsea(
large_gsea,
plot_type = "network",
n_pathways = 30,
network_params = list(similarity_cutoff = 0.2)
)
expect_s3_class(plot_perf, "ggplot")
})
end_time <- Sys.time()
# Should complete in reasonable time (less than 30 seconds)
time_taken <- as.numeric(end_time - start_time)
expect_lt(time_taken, 30, info = sprintf("Network plot took %.2f seconds", time_taken))
})
message("GSEA visualization helper function tests completed successfully")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.