Nothing
create_isolated_test_db()
test_that("gintervals.liftover multi-target aggregation policies", {
local_db_state()
# Source genome with a single chromosome
source_db <- setup_source_db(list(paste0(">source1\n", paste(rep("A", 400), collapse = ""), "\n")))
# Source intervals with values (similar to track values)
# Database chromosome name is "chrsource1" (from filename chrsource1.fasta)
src_intervals <- data.frame(
chrom = rep("chrsource1", 3),
start = c(0, 10, 20),
end = c(10, 20, 30),
value = c(1, 2, 3),
stringsAsFactors = FALSE
)
# Variant with NA in the middle value
src_intervals_na <- data.frame(
chrom = rep("chrsource1", 3),
start = c(0, 10, 20),
end = c(10, 20, 30),
value = c(1, NaN, 3),
stringsAsFactors = FALSE
)
# Target genome
setup_db(list(paste0(">chrA\n", paste(rep("T", 400), collapse = ""), "\n")))
# Chain mappings: all three intervals map into overlapping target regions
chain_file <- new_chain_file()
# Chain file uses "chrsource1" to match database chromosome name
write_chain_entry(chain_file, "chrsource1", 400, "+", 0, 10, "chrA", 400, "+", 0, 10, 1) # coverage len 10, value 1
write_chain_entry(chain_file, "chrsource1", 400, "+", 10, 20, "chrA", 400, "+", 3, 13, 2) # coverage len 7, value 2
write_chain_entry(chain_file, "chrsource1", 400, "+", 20, 30, "chrA", 400, "+", 7, 17, 3) # coverage len 3, value 3
liftover_with <- function(intervals = src_intervals, agg = "mean", params = NULL,
na.rm = TRUE, min_n = NULL, value_col = "value") {
gintervals.liftover(
intervals, chain_file,
value_col = value_col,
multi_target_agg = agg,
params = params,
na.rm = na.rm,
min_n = min_n
)
}
# Test basic aggregations - all three source intervals map to overlapping regions
# Without aggregation, we'd get 3 separate target intervals
# With value_col, we get the value column passed through
result <- liftover_with()
expect_true("value" %in% names(result))
expect_equal(nrow(result), 3) # 3 mapped intervals
expect_true(all(result$value %in% c(1, 2, 3))) # values preserved
result <- liftover_with(agg = "sum")
expect_true(all(result$value %in% c(1, 2, 3))) # values preserved
result <- liftover_with(agg = "min")
expect_length(result$value, 3)
result <- liftover_with(agg = "max")
expect_length(result$value, 3)
result <- liftover_with(agg = "median")
expect_length(result$value, 3)
result <- liftover_with(agg = "count")
expect_length(result$value, 3)
result <- liftover_with(agg = "first")
expect_length(result$value, 3)
result <- liftover_with(agg = "last")
expect_length(result$value, 3)
result <- liftover_with(agg = "nth", params = 2)
expect_length(result$value, 3)
result <- liftover_with(agg = "nth", params = list(n = 3))
expect_length(result$value, 3)
result <- liftover_with(agg = "max.coverage_len")
expect_length(result$value, 3)
result <- liftover_with(agg = "min.coverage_len")
expect_length(result$value, 3)
result <- liftover_with(agg = "max.coverage_frac")
expect_length(result$value, 3)
result <- liftover_with(agg = "min.coverage_frac")
expect_length(result$value, 3)
# Concrete aggregation values and NA handling
# Two overlapping source intervals: [0,10)=1 and [5,15)=3 map into chrA[0,20)
src_intervals_overlap <- data.frame(
chrom = c("chrsource1", "chrsource1"),
start = c(0, 5),
end = c(10, 15),
value = c(1, 3),
stringsAsFactors = FALSE
)
src_intervals_overlap_na <- src_intervals_overlap
src_intervals_overlap_na$value[1] <- NaN
chain_file_overlap <- new_chain_file()
write_chain_entry(chain_file_overlap, "chrsource1", 400, "+", 0, 20, "chrA", 400, "+", 0, 20, 1)
# Mean aggregation averages overlapping contributions
res_mean <- gintervals.liftover(
src_intervals_overlap, chain_file_overlap,
value_col = "value",
multi_target_agg = "mean"
)
expect_equal(res_mean$start, c(0, 5, 10))
expect_equal(res_mean$end, c(5, 10, 15))
expect_equal(res_mean$value, c(1, 2, 3))
# Sum aggregation
res_sum <- gintervals.liftover(
src_intervals_overlap, chain_file_overlap,
value_col = "value",
multi_target_agg = "sum"
)
expect_equal(res_sum$value, c(1, 4, 3))
# Count aggregation counts contributors
res_count <- gintervals.liftover(
src_intervals_overlap, chain_file_overlap,
value_col = "value",
multi_target_agg = "count"
)
expect_equal(res_count$value, c(1, 2, 1))
# max.coverage_len chooses the contributor with largest overlap (ties → higher value)
res_covlen <- gintervals.liftover(
src_intervals_overlap, chain_file_overlap,
value_col = "value",
multi_target_agg = "max.coverage_len"
)
expect_equal(res_covlen$value, c(1, 3, 3))
# NA handling: drop NA when possible, preserve when all are NA
res_na <- gintervals.liftover(
src_intervals_overlap_na, chain_file_overlap,
value_col = "value",
multi_target_agg = "mean",
na.rm = TRUE
)
expect_true(is.nan(res_na$value[1]))
expect_equal(res_na$value[2], 3)
expect_equal(res_na$value[3], 3)
res_na_prop <- gintervals.liftover(
src_intervals_overlap_na, chain_file_overlap,
value_col = "value",
multi_target_agg = "mean",
na.rm = FALSE
)
expect_true(is.nan(res_na_prop$value[2]))
# NA handling
result <- liftover_with(intervals = src_intervals_na, agg = "mean", na.rm = TRUE)
expect_true("value" %in% names(result))
# With NA in middle, we have values 1, NaN, 3
expect_true(any(is.nan(result$value)) || all(!is.nan(result$value)))
result <- liftover_with(intervals = src_intervals_na, agg = "mean", na.rm = FALSE)
expect_true("value" %in% names(result))
# min_n gating
result <- liftover_with(intervals = src_intervals_na, agg = "mean", na.rm = TRUE, min_n = 3)
expect_true("value" %in% names(result))
result <- liftover_with(intervals = src_intervals_na, agg = "mean", na.rm = TRUE, min_n = 2)
expect_true("value" %in% names(result))
})
test_that("gintervals.liftover aggregation preserves intervalID and chain_id", {
local_db_state()
source_db <- setup_source_db(list(paste0(">source1\n", paste(rep("A", 200), collapse = ""), "\n")))
# Source intervals with values
# Database chromosome name is "chrsource1" (from filename chrsource1.fasta)
src_intervals <- data.frame(
chrom = rep("chrsource1", 3),
start = c(0, 10, 20),
end = c(10, 20, 30),
score = c(10, 20, 30),
stringsAsFactors = FALSE
)
setup_db(list(paste0(">chrB\n", paste(rep("G", 200), collapse = ""), "\n")))
chain_file <- new_chain_file()
# Map each interval to different target locations (no overlap)
# Chain file uses "chrsource1" to match database chromosome name
write_chain_entry(chain_file, "chrsource1", 200, "+", 0, 10, "chrB", 200, "+", 100, 110, 1)
write_chain_entry(chain_file, "chrsource1", 200, "+", 10, 20, "chrB", 200, "+", 110, 120, 2)
write_chain_entry(chain_file, "chrsource1", 200, "+", 20, 30, "chrB", 200, "+", 120, 130, 3)
result <- gintervals.liftover(
src_intervals, chain_file,
value_col = "score",
multi_target_agg = "sum"
)
# Should have intervalID and chain_id columns
expect_true("intervalID" %in% names(result))
expect_true("chain_id" %in% names(result))
expect_true("score" %in% names(result))
# Should have 3 rows (one per source interval)
expect_equal(nrow(result), 3)
# intervalID should be 1, 2, 3
expect_equal(sort(result$intervalID), c(1, 2, 3))
# chain_id should be 1, 2, 3
expect_equal(sort(result$chain_id), c(1, 2, 3))
# scores should be preserved
expect_equal(sort(result$score), c(10, 20, 30))
})
test_that("gintervals.liftover aggregation with integer values", {
local_db_state()
source_db <- setup_source_db(list(paste0(">source1\n", paste(rep("A", 100), collapse = ""), "\n")))
# Source intervals with integer values
# Database chromosome name is "chrsource1" (from filename chrsource1.fasta)
src_intervals <- data.frame(
chrom = rep("chrsource1", 3),
start = c(0, 10, 20),
end = c(10, 20, 30),
count = c(5L, 10L, 15L),
stringsAsFactors = FALSE
)
setup_db(list(paste0(">chrC\n", paste(rep("C", 100), collapse = ""), "\n")))
chain_file <- new_chain_file()
# Chain file uses "chrsource1" to match database chromosome name
write_chain_entry(chain_file, "chrsource1", 100, "+", 0, 10, "chrC", 100, "+", 0, 10, 1)
write_chain_entry(chain_file, "chrsource1", 100, "+", 10, 20, "chrC", 100, "+", 0, 10, 2)
write_chain_entry(chain_file, "chrsource1", 100, "+", 20, 30, "chrC", 100, "+", 0, 10, 3)
result <- gintervals.liftover(
src_intervals, chain_file,
value_col = "count",
multi_target_agg = "mean"
)
# Should handle integer values
expect_true("count" %in% names(result))
expect_true(all(result$count %in% c(5, 10, 15)))
})
test_that("gintervals.liftover without value_col works as before", {
local_db_state()
source_db <- setup_source_db(list(paste0(">source1\n", paste(rep("A", 100), collapse = ""), "\n")))
# Source intervals without specifying value_col
# Database chromosome name is "chrsource1" (from filename chrsource1.fasta)
src_intervals <- data.frame(
chrom = rep("chrsource1", 2),
start = c(0, 10),
end = c(10, 20),
extra_col = c("a", "b"),
stringsAsFactors = FALSE
)
setup_db(list(paste0(">chrD\n", paste(rep("T", 100), collapse = ""), "\n")))
chain_file <- new_chain_file()
# Chain file uses "chrsource1" to match database chromosome name
write_chain_entry(chain_file, "chrsource1", 100, "+", 0, 10, "chrD", 100, "+", 0, 10, 1)
write_chain_entry(chain_file, "chrsource1", 100, "+", 10, 20, "chrD", 100, "+", 5, 15, 2)
# Without value_col, should work as before (no aggregation)
result <- gintervals.liftover(src_intervals, chain_file)
expect_true("intervalID" %in% names(result))
expect_true("chain_id" %in% names(result))
expect_false("value" %in% names(result)) # no value column added
expect_equal(nrow(result), 2)
})
test_that("gintervals.liftover validates value_col parameter", {
local_db_state()
source_db <- setup_source_db(list(paste0(">source1\n", paste(rep("A", 100), collapse = ""), "\n")))
# Database chromosome name is "chrsource1" (from filename chrsource1.fasta)
src_intervals <- data.frame(
chrom = "chrsource1",
start = 0,
end = 10,
score = 5,
stringsAsFactors = FALSE
)
setup_db(list(paste0(">chrE\n", paste(rep("C", 100), collapse = ""), "\n")))
chain_file <- new_chain_file()
# Chain file uses "chrsource1" to match database chromosome name
write_chain_entry(chain_file, "chrsource1", 100, "+", 0, 10, "chrE", 100, "+", 0, 10, 1)
# Invalid value_col name
expect_error(
gintervals.liftover(src_intervals, chain_file, value_col = "nonexistent"),
"value_col 'nonexistent' not found in intervals"
)
# Non-string value_col
expect_error(
gintervals.liftover(src_intervals, chain_file, value_col = 123),
"value_col must be a single character string"
)
# Multiple value_col names
expect_error(
gintervals.liftover(src_intervals, chain_file, value_col = c("score", "other")),
"value_col must be a single character string"
)
})
test_that("gintervals.liftover nth aggregator validates params", {
local_db_state()
source_db <- setup_source_db(list(paste0(">source1\n", paste(rep("A", 100), collapse = ""), "\n")))
# Database chromosome name is "chrsource1" (from filename chrsource1.fasta)
src_intervals <- data.frame(
chrom = "chrsource1",
start = 0,
end = 10,
value = 1,
stringsAsFactors = FALSE
)
setup_db(list(paste0(">chrF\n", paste(rep("G", 100), collapse = ""), "\n")))
chain_file <- new_chain_file()
# Chain file uses "chrsource1" to match database chromosome name
write_chain_entry(chain_file, "chrsource1", 100, "+", 0, 10, "chrF", 100, "+", 0, 10, 1)
expect_error(
gintervals.liftover(
src_intervals, chain_file,
value_col = "value",
multi_target_agg = "nth"
),
"params must be supplied for 'nth' aggregation"
)
expect_error(
gintervals.liftover(
src_intervals, chain_file,
value_col = "value",
multi_target_agg = "nth",
params = list()
),
"params list must contain an element 'n' for 'nth'",
fixed = TRUE
)
})
test_that("gintervals.liftover aggregation with all NA values", {
local_db_state()
source_db <- setup_source_db(list(paste0(">source1\n", paste(rep("A", 100), collapse = ""), "\n")))
# All NA values
# Database chromosome name is "chrsource1" (from filename chrsource1.fasta)
src_intervals <- data.frame(
chrom = rep("chrsource1", 3),
start = c(0, 10, 20),
end = c(10, 20, 30),
value = c(NaN, NaN, NaN),
stringsAsFactors = FALSE
)
setup_db(list(paste0(">chrG\n", paste(rep("T", 100), collapse = ""), "\n")))
chain_file <- new_chain_file()
# Chain file uses "chrsource1" to match database chromosome name
write_chain_entry(chain_file, "chrsource1", 100, "+", 0, 10, "chrG", 100, "+", 0, 10, 1)
write_chain_entry(chain_file, "chrsource1", 100, "+", 10, 20, "chrG", 100, "+", 0, 10, 2)
write_chain_entry(chain_file, "chrsource1", 100, "+", 20, 30, "chrG", 100, "+", 0, 10, 3)
# All aggregators should preserve NA values (propagate to result)
for (agg in c("mean", "sum", "min", "max", "median", "first", "last", "max.coverage_len")) {
result <- gintervals.liftover(
src_intervals, chain_file,
value_col = "value",
multi_target_agg = agg,
na.rm = TRUE
)
expect_true("value" %in% names(result), info = paste("aggregator:", agg))
expect_true(all(is.nan(result$value) | is.na(result$value)), info = paste("aggregator:", agg))
}
# count with all NAs should still preserve the NaN values (one per source interval)
result <- gintervals.liftover(
src_intervals, chain_file,
value_col = "value",
multi_target_agg = "count",
na.rm = TRUE
)
expect_true(all(is.nan(result$value) | result$value == 0))
})
test_that("gintervals.liftover with canonic mode preserves values", {
local_db_state()
source_db <- setup_source_db(list(paste0(">source1\n", paste(rep("A", 200), collapse = ""), "\n")))
# Source intervals that will create adjacent mappings
# Database chromosome name is "chrsource1" (from filename chrsource1.fasta)
src_intervals <- data.frame(
chrom = rep("chrsource1", 2),
start = c(0, 10),
end = c(10, 20),
score = c(100, 100),
stringsAsFactors = FALSE
)
setup_db(list(paste0(">chrH\n", paste(rep("C", 200), collapse = ""), "\n")))
chain_file <- new_chain_file()
# Map to adjacent target regions (will be merged in canonic mode)
# Chain file uses "chrsource1" to match database chromosome name
write_chain_entry(chain_file, "chrsource1", 200, "+", 0, 10, "chrH", 200, "+", 0, 10, 1)
write_chain_entry(chain_file, "chrsource1", 200, "+", 10, 20, "chrH", 200, "+", 10, 20, 1) # same chain_id
result <- gintervals.liftover(
src_intervals, chain_file,
value_col = "score",
multi_target_agg = "mean",
canonic = TRUE
)
expect_true("score" %in% names(result))
# With canonic=TRUE and same intervalID/chain_id, adjacent intervals should be merged
# Both have same value (100), so merged result should also be 100
expect_true(all(result$score == 100))
})
test_that("gintervals.liftover aggregation handles multiple value types", {
local_db_state()
source_db <- setup_source_db(list(paste0(">source1\n", paste(rep("A", 100), collapse = ""), "\n")))
# Test with numeric (double) values
# Database chromosome name is "chrsource1" (from filename chrsource1.fasta)
src_intervals_double <- data.frame(
chrom = "chrsource1",
start = 0,
end = 10,
val = 3.14,
stringsAsFactors = FALSE
)
setup_db(list(paste0(">chrI\n", paste(rep("G", 100), collapse = ""), "\n")))
chain_file <- new_chain_file()
# Chain file uses "chrsource1" to match database chromosome name
write_chain_entry(chain_file, "chrsource1", 100, "+", 0, 10, "chrI", 100, "+", 0, 10, 1)
result <- gintervals.liftover(
src_intervals_double, chain_file,
value_col = "val",
multi_target_agg = "mean"
)
expect_true("val" %in% names(result))
expect_equal(result$val[1], 3.14, tolerance = 1e-6)
})
test_that("gintervals.liftover aggregates across multiple chain_ids mapping to same target", {
local_db_state()
source_db <- setup_source_db(list(
paste0(">source1\n", paste(rep("A", 300), collapse = ""), "\n")
))
# Source intervals from different regions that will map to the same target
# Database chromosome name is "chrsource1" (from filename chrsource1.fasta)
src_intervals <- data.frame(
chrom = c("chrsource1", "chrsource1"),
start = c(0, 100),
end = c(100, 200),
value = c(0.9, 0.85),
stringsAsFactors = FALSE
)
setup_db(list(paste0(">chrTarget\n", paste(rep("G", 200), collapse = ""), "\n")))
# Two chains mapping to the SAME target location but from different source regions
# This is the scenario where we had the bug: aggregation should combine these
chain_file <- new_chain_file()
write_chain_entry(chain_file, "chrsource1", 300, "+", 0, 100, "chrTarget", 200, "+", 50, 150, 1, score = 100)
write_chain_entry(chain_file, "chrsource1", 300, "+", 100, 200, "chrTarget", 200, "+", 50, 150, 2, score = 95)
# Load the chain with tgt_overlap_policy="agg" to enable aggregation across chain_ids
# Test max aggregation - should return 1 row with max value
result_max <- gintervals.liftover(
src_intervals, chain_file,
src_overlap_policy = "keep",
tgt_overlap_policy = "agg",
value_col = "value",
multi_target_agg = "max"
)
expect_equal(nrow(result_max), 1, info = "Should return 1 row when aggregating across chain_ids")
expect_equal(result_max$value[1], 0.9, tolerance = 1e-6, info = "Max of 0.9 and 0.85 should be 0.9")
expect_false("intervalID" %in% names(result_max), info = "intervalID column should not be present when aggregating across different sources")
expect_false("chain_id" %in% names(result_max), info = "chain_id column should not be present when aggregating across different chains")
# Test mean aggregation
result_mean <- gintervals.liftover(
src_intervals, chain_file,
src_overlap_policy = "keep",
tgt_overlap_policy = "agg",
value_col = "value",
multi_target_agg = "mean"
)
expect_equal(nrow(result_mean), 1, info = "Should return 1 row")
expect_equal(result_mean$value[1], 0.875, tolerance = 1e-6, info = "Mean of 0.9 and 0.85 should be 0.875")
# Test sum aggregation
result_sum <- gintervals.liftover(
src_intervals, chain_file,
src_overlap_policy = "keep",
tgt_overlap_policy = "agg",
value_col = "value",
multi_target_agg = "sum"
)
expect_equal(nrow(result_sum), 1, info = "Should return 1 row")
expect_equal(result_sum$value[1], 1.75, tolerance = 1e-6, info = "Sum of 0.9 and 0.85 should be 1.75")
# Test count aggregation
result_count <- gintervals.liftover(
src_intervals, chain_file,
src_overlap_policy = "keep",
tgt_overlap_policy = "agg",
value_col = "value",
multi_target_agg = "count"
)
expect_equal(nrow(result_count), 1, info = "Should return 1 row")
expect_equal(result_count$value[1], 2, info = "Count should be 2 (two contributing sources)")
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.