Nothing
create_isolated_test_db()
# Helper function to check if liftOver binary is available
has_liftover_binary <- function() {
result <- tryCatch(
{
system2("which", "liftOver", stdout = TRUE, stderr = FALSE)
TRUE
},
error = function(e) FALSE,
warning = function(w) FALSE
)
if (!isTRUE(result)) {
result <- tryCatch(
{
system2("liftOver", stdout = FALSE, stderr = FALSE)
TRUE
},
error = function(e) FALSE,
warning = function(w) FALSE
)
}
return(isTRUE(result))
}
test_that("gtrack.liftover matches liftOver binary - basic sparse track", {
skip_if_not(has_liftover_binary(), "liftOver binary not found")
local_db_state()
# Create source genome
source_db <- setup_source_db(list(paste0(">source1\n", paste(rep("A", 100), collapse = ""), "\n")))
# Create sparse track with specific values
# Database chromosome name is "chrsource1" (from filename chrsource1.fasta)
src_intervals <- data.frame(
chrom = c("chrsource1", "chrsource1", "chrsource1"),
start = c(12, 20, 35),
end = c(18, 30, 45),
stringsAsFactors = FALSE
)
src_values <- c(100, 200, 300)
gtrack.create_sparse("src_track", "Source track", src_intervals, src_values)
src_track_dir <- file.path(source_db, "tracks", "src_track.track")
# Create target genome
setup_db(list(paste0(">chr1\n", paste(rep("T", 100), collapse = ""), "\n")))
# Create chain: chrsource1[10-50] -> chr1[5-45]
chain_file <- tempfile(fileext = ".chain")
withr::defer(unlink(chain_file))
cat("chain 1000 chrsource1 100 + 10 50 chr1 100 + 5 45 1\n", file = chain_file)
cat("40\n\n", file = chain_file, append = TRUE)
# Create BED file with values for liftOver binary
bed_input <- tempfile(fileext = ".bed")
bed_output <- tempfile(fileext = ".bed")
bed_unmapped <- tempfile(fileext = ".unmapped")
withr::defer({
unlink(bed_input)
unlink(bed_output)
unlink(bed_unmapped)
})
# BED format: chrom start end name score
# Using score field to store the value
# Chromosome name must match chain file: "chrsource1" (from database)
cat("chrsource1\t12\t18\tint1\t100\n", file = bed_input)
cat("chrsource1\t20\t30\tint2\t200\n", file = bed_input, append = TRUE)
cat("chrsource1\t35\t45\tint3\t300\n", file = bed_input, append = TRUE)
# Run liftOver binary
system2("liftOver", args = c(bed_input, chain_file, bed_output, bed_unmapped), stdout = FALSE, stderr = FALSE)
# Read binary output
binary_result <- read.table(bed_output, header = FALSE, stringsAsFactors = FALSE)
colnames(binary_result) <- c("chrom", "start", "end", "name", "value")
binary_result <- binary_result[order(binary_result$start), ]
# Run gtrack.liftover
lifted_track <- "lifted_track"
withr::defer({
if (gtrack.exists(lifted_track)) gtrack.rm(lifted_track, force = TRUE)
})
gtrack.liftover(lifted_track, "Lifted track", src_track_dir, chain_file)
# Extract results
misha_result <- gextract(lifted_track, gintervals.all())
misha_result <- misha_result[order(misha_result$start), ]
# Compare results
expect_equal(nrow(misha_result), nrow(binary_result))
expect_equal(as.character(misha_result$chrom), binary_result$chrom)
expect_equal(as.numeric(misha_result$start), binary_result$start)
expect_equal(as.numeric(misha_result$end), binary_result$end)
expect_equal(as.numeric(misha_result[[lifted_track]]), binary_result$value)
})
test_that("gtrack.liftover matches liftOver binary - one-to-many mapping with keep policy", {
skip_if_not(has_liftover_binary(), "liftOver binary not found")
local_db_state()
# Create source genome
source_db <- setup_source_db(list(paste0(">source1\n", paste(rep("A", 300), collapse = ""), "\n")))
# Create sparse track
# Database chromosome name is "chrsource1" (from filename chrsource1.fasta)
src_intervals <- data.frame(
chrom = c("chrsource1", "chrsource1", "chrsource1"),
start = c(100, 121, 180),
end = c(120, 122, 185),
stringsAsFactors = FALSE
)
src_values <- c(4, 5, 6)
gtrack.create_sparse("src_track", "Source track", src_intervals, src_values)
src_track_dir <- file.path(source_db, "tracks", "src_track.track")
# Create target genome
setup_db(list(paste0(">chr1\n", paste(rep("T", 400), collapse = ""), "\n")))
# Create chain with source overlap: chrsource1[100-200) maps to TWO target locations
chain_file <- tempfile(fileext = ".chain")
withr::defer(unlink(chain_file))
# First mapping: chrsource1[100-200) -> chr1[0-100)
cat("chain 1000 chrsource1 300 + 100 200 chr1 400 + 0 100 1\n", file = chain_file)
cat("100\n\n", file = chain_file, append = TRUE)
# Second mapping: chrsource1[100-200) -> chr1[200-300) (source overlap)
cat("chain 1000 chrsource1 300 + 100 200 chr1 400 + 200 300 2\n", file = chain_file, append = TRUE)
cat("100\n\n", file = chain_file, append = TRUE)
# Create BED file for liftOver binary
bed_input <- tempfile(fileext = ".bed")
bed_output <- tempfile(fileext = ".bed")
bed_unmapped <- tempfile(fileext = ".unmapped")
withr::defer({
unlink(bed_input)
unlink(bed_output)
unlink(bed_unmapped)
})
# Chromosome name must match chain file: "chrsource1" (from database)
cat("chrsource1\t100\t120\tint1\t4\n", file = bed_input)
cat("chrsource1\t121\t122\tint2\t5\n", file = bed_input, append = TRUE)
cat("chrsource1\t180\t185\tint3\t6\n", file = bed_input, append = TRUE)
# Run liftOver binary with -multiple and -noSerial flags
# -noSerial prevents serial numbers from being written to the value field
system2("liftOver", args = c("-multiple", "-noSerial", bed_input, chain_file, bed_output, bed_unmapped), stdout = FALSE, stderr = FALSE)
# Read binary output
binary_result <- read.table(bed_output, header = FALSE, stringsAsFactors = FALSE)
colnames(binary_result) <- c("chrom", "start", "end", "name", "value")
binary_result <- binary_result[order(binary_result$start), ]
# Run gtrack.liftover with keep policy
lifted_track <- "lifted_track"
withr::defer({
if (gtrack.exists(lifted_track)) gtrack.rm(lifted_track, force = TRUE)
})
gtrack.liftover(lifted_track, "Lifted track", src_track_dir, chain_file,
src_overlap_policy = "keep", tgt_overlap_policy = "auto"
)
# Extract results
misha_result <- gextract(lifted_track, gintervals.all())
misha_result <- misha_result[order(misha_result$start), ]
# Compare results
expect_equal(nrow(misha_result), nrow(binary_result))
expect_equal(as.character(misha_result$chrom), binary_result$chrom)
expect_equal(as.numeric(misha_result$start), binary_result$start)
expect_equal(as.numeric(misha_result$end), binary_result$end)
expect_equal(as.numeric(misha_result[[lifted_track]]), binary_result$value)
})
test_that("gtrack.liftover matches liftOver binary - multiple chromosomes", {
skip_if_not(has_liftover_binary(), "liftOver binary not found")
local_db_state()
# Create source genome with multiple chromosomes
source_db <- setup_source_db(list(
paste0(">source1\n", paste(rep("A", 100), collapse = ""), "\n"),
paste0(">source2\n", paste(rep("C", 100), collapse = ""), "\n")
))
# Create sparse track with intervals on both chromosomes
# Database chromosome names are "chrsource1" and "chrsource2" (from filenames)
src_intervals <- data.frame(
chrom = c("chrsource1", "chrsource1", "chrsource2", "chrsource2"),
start = c(10, 40, 15, 50),
end = c(20, 50, 25, 60),
stringsAsFactors = FALSE
)
src_values <- c(10, 20, 30, 40)
gtrack.create_sparse("src_track", "Source track", src_intervals, src_values)
src_track_dir <- file.path(source_db, "tracks", "src_track.track")
# Create target genome
setup_db(list(
paste0(">chr1\n", paste(rep("T", 200), collapse = ""), "\n"),
paste0(">chr2\n", paste(rep("G", 200), collapse = ""), "\n")
))
# Create chain mapping both source chromosomes
chain_file <- tempfile(fileext = ".chain")
withr::defer(unlink(chain_file))
# chrsource1[0-60] -> chr1[10-70]
cat("chain 1000 chrsource1 100 + 0 60 chr1 200 + 10 70 1\n", file = chain_file)
cat("60\n\n", file = chain_file, append = TRUE)
# chrsource2[10-70] -> chr2[20-80]
cat("chain 1000 chrsource2 100 + 10 70 chr2 200 + 20 80 2\n", file = chain_file, append = TRUE)
cat("60\n\n", file = chain_file, append = TRUE)
# Create BED file
bed_input <- tempfile(fileext = ".bed")
bed_output <- tempfile(fileext = ".bed")
bed_unmapped <- tempfile(fileext = ".unmapped")
withr::defer({
unlink(bed_input)
unlink(bed_output)
unlink(bed_unmapped)
})
# Chromosome names must match chain file: "chrsource1" and "chrsource2" (from database)
cat("chrsource1\t10\t20\tint1\t10\n", file = bed_input)
cat("chrsource1\t40\t50\tint2\t20\n", file = bed_input, append = TRUE)
cat("chrsource2\t15\t25\tint3\t30\n", file = bed_input, append = TRUE)
cat("chrsource2\t50\t60\tint4\t40\n", file = bed_input, append = TRUE)
# Run liftOver binary
system2("liftOver", args = c(bed_input, chain_file, bed_output, bed_unmapped), stdout = FALSE, stderr = FALSE)
# Read binary output
binary_result <- read.table(bed_output, header = FALSE, stringsAsFactors = FALSE)
colnames(binary_result) <- c("chrom", "start", "end", "name", "value")
binary_result <- binary_result[order(binary_result$chrom, binary_result$start), ]
# Run gtrack.liftover
lifted_track <- "lifted_track"
withr::defer({
if (gtrack.exists(lifted_track)) gtrack.rm(lifted_track, force = TRUE)
})
gtrack.liftover(lifted_track, "Lifted track", src_track_dir, chain_file)
# Extract results
misha_result <- gextract(lifted_track, gintervals.all())
misha_result <- misha_result[order(misha_result$chrom, misha_result$start), ]
# Compare results
expect_equal(nrow(misha_result), nrow(binary_result))
expect_equal(as.character(misha_result$chrom), binary_result$chrom)
expect_equal(as.numeric(misha_result$start), binary_result$start)
expect_equal(as.numeric(misha_result$end), binary_result$end)
expect_equal(as.numeric(misha_result[[lifted_track]]), binary_result$value)
})
test_that("gtrack.liftover matches liftOver binary - reverse strand", {
skip_if_not(has_liftover_binary(), "liftOver binary not found")
local_db_state()
# Create source genome
source_db <- setup_source_db(list(paste0(">source1\n", paste(rep("A", 200), collapse = ""), "\n")))
# Create sparse track
# Database chromosome name is "chrsource1" (from filename chrsource1.fasta)
src_intervals <- data.frame(
chrom = c("chrsource1", "chrsource1", "chrsource1"),
start = c(10, 50, 100),
end = c(20, 60, 120),
stringsAsFactors = FALSE
)
src_values <- c(111, 222, 333)
gtrack.create_sparse("src_track", "Source track", src_intervals, src_values)
src_track_dir <- file.path(source_db, "tracks", "src_track.track")
# Create target genome
setup_db(list(paste0(">chr1\n", paste(rep("T", 200), collapse = ""), "\n")))
# Create chain with reverse strand
chain_file <- tempfile(fileext = ".chain")
withr::defer(unlink(chain_file))
# chrsource1[0-150) + -> chr1[50-200) - (reverse strand)
cat("chain 1000 chrsource1 200 + 0 150 chr1 200 - 0 150 1\n", file = chain_file)
cat("150\n\n", file = chain_file, append = TRUE)
# Create BED file
bed_input <- tempfile(fileext = ".bed")
bed_output <- tempfile(fileext = ".bed")
bed_unmapped <- tempfile(fileext = ".unmapped")
withr::defer({
unlink(bed_input)
unlink(bed_output)
unlink(bed_unmapped)
})
# Chromosome name must match chain file: "chrsource1" (from database)
cat("chrsource1\t10\t20\tint1\t111\n", file = bed_input)
cat("chrsource1\t50\t60\tint2\t222\n", file = bed_input, append = TRUE)
cat("chrsource1\t100\t120\tint3\t333\n", file = bed_input, append = TRUE)
# Run liftOver binary
system2("liftOver", args = c(bed_input, chain_file, bed_output, bed_unmapped), stdout = FALSE, stderr = FALSE)
# Read binary output
binary_result <- read.table(bed_output, header = FALSE, stringsAsFactors = FALSE)
colnames(binary_result) <- c("chrom", "start", "end", "name", "value")
binary_result <- binary_result[order(binary_result$start), ]
# Run gtrack.liftover
lifted_track <- "lifted_track"
withr::defer({
if (gtrack.exists(lifted_track)) gtrack.rm(lifted_track, force = TRUE)
})
gtrack.liftover(lifted_track, "Lifted track", src_track_dir, chain_file)
# Extract results
misha_result <- gextract(lifted_track, gintervals.all())
misha_result <- misha_result[order(misha_result$start), ]
# Compare results
expect_equal(nrow(misha_result), nrow(binary_result))
expect_equal(as.character(misha_result$chrom), binary_result$chrom)
# For reverse strand, check that values are preserved
expect_true(all(sort(as.numeric(misha_result[[lifted_track]])) == sort(binary_result$value)))
})
test_that("gtrack.liftover matches liftOver binary - chain with gaps", {
skip_if_not(has_liftover_binary(), "liftOver binary not found")
local_db_state()
# Create source genome - needs to be at least 250bp for the chain
source_db <- setup_source_db(list(paste0(">source1\n", paste(rep("A", 300), collapse = ""), "\n")))
# Create sparse track
# Database chromosome name is "chrsource1" (from filename chrsource1.fasta)
src_intervals <- data.frame(
chrom = c("chrsource1", "chrsource1", "chrsource1"),
start = c(10, 100, 200),
end = c(20, 110, 210),
stringsAsFactors = FALSE
)
src_values <- c(111, 222, 333)
gtrack.create_sparse("src_track", "Source track", src_intervals, src_values)
src_track_dir <- file.path(source_db, "tracks", "src_track.track")
# Create target genome
setup_db(list(paste0(">chr1\n", paste(rep("T", 300), collapse = ""), "\n")))
# Create chain with gaps: maps [0-50) and [150-250), but NOT [50-150)
chain_file <- tempfile(fileext = ".chain")
withr::defer(unlink(chain_file))
cat("chain 1000 chrsource1 300 + 0 50 chr1 300 + 0 50 1\n", file = chain_file)
cat("50\n\n", file = chain_file, append = TRUE)
cat("chain 1000 chrsource1 300 + 150 250 chr1 300 + 100 200 2\n", file = chain_file, append = TRUE)
cat("100\n\n", file = chain_file, append = TRUE)
# Create BED file
bed_input <- tempfile(fileext = ".bed")
bed_output <- tempfile(fileext = ".bed")
bed_unmapped <- tempfile(fileext = ".unmapped")
withr::defer({
unlink(bed_input)
unlink(bed_output)
unlink(bed_unmapped)
})
# Chromosome name must match chain file: "chrsource1" (from database)
cat("chrsource1\t10\t20\tint1\t111\n", file = bed_input)
cat("chrsource1\t100\t110\tint2\t222\n", file = bed_input, append = TRUE) # In gap, won't be mapped
cat("chrsource1\t200\t210\tint3\t333\n", file = bed_input, append = TRUE)
# Run liftOver binary
system2("liftOver", args = c(bed_input, chain_file, bed_output, bed_unmapped), stdout = FALSE, stderr = FALSE)
# Read binary output
if (file.exists(bed_output) && file.info(bed_output)$size > 0) {
binary_result <- read.table(bed_output, header = FALSE, stringsAsFactors = FALSE)
colnames(binary_result) <- c("chrom", "start", "end", "name", "value")
binary_result <- binary_result[order(binary_result$start), ]
} else {
binary_result <- data.frame(chrom = character(), start = numeric(), end = numeric(), name = character(), value = numeric())
}
# Run gtrack.liftover
lifted_track <- "lifted_track"
withr::defer({
if (gtrack.exists(lifted_track)) gtrack.rm(lifted_track, force = TRUE)
})
gtrack.liftover(lifted_track, "Lifted track", src_track_dir, chain_file)
# Extract results
misha_result <- gextract(lifted_track, gintervals.all())
if (nrow(misha_result) > 0) {
misha_result <- misha_result[order(misha_result$start), ]
}
# Compare results
expect_equal(nrow(misha_result), nrow(binary_result))
if (nrow(binary_result) > 0) {
expect_equal(as.character(misha_result$chrom), binary_result$chrom)
expect_equal(as.numeric(misha_result$start), binary_result$start)
expect_equal(as.numeric(misha_result$end), binary_result$end)
expect_equal(as.numeric(misha_result[[lifted_track]]), binary_result$value)
}
})
test_that("gtrack.liftover matches liftOver binary - small intervals", {
skip_if_not(has_liftover_binary(), "liftOver binary not found")
local_db_state()
# Create source genome
source_db <- setup_source_db(list(paste0(">source1\n", paste(rep("A", 1000), collapse = ""), "\n")))
# Create sparse track with mix of very small and regular intervals
# Database chromosome name is "chrsource1" (from filename chrsource1.fasta)
src_intervals <- data.frame(
chrom = c("chrsource1", "chrsource1", "chrsource1", "chrsource1"),
start = c(100, 101, 200, 500),
end = c(101, 102, 250, 600),
stringsAsFactors = FALSE
)
src_values <- c(1, 2, 50, 100)
gtrack.create_sparse("src_track", "Source track", src_intervals, src_values)
src_track_dir <- file.path(source_db, "tracks", "src_track.track")
# Create target genome
setup_db(list(paste0(">chr1\n", paste(rep("T", 1000), collapse = ""), "\n")))
# Simple 1:1 mapping
chain_file <- tempfile(fileext = ".chain")
withr::defer(unlink(chain_file))
cat("chain 1000 chrsource1 1000 + 0 700 chr1 1000 + 0 700 1\n", file = chain_file)
cat("700\n\n", file = chain_file, append = TRUE)
# Create BED file
bed_input <- tempfile(fileext = ".bed")
bed_output <- tempfile(fileext = ".bed")
bed_unmapped <- tempfile(fileext = ".unmapped")
withr::defer({
unlink(bed_input)
unlink(bed_output)
unlink(bed_unmapped)
})
# Chromosome name must match chain file: "chrsource1" (from database)
cat("chrsource1\t100\t101\tint1\t1\n", file = bed_input)
cat("chrsource1\t101\t102\tint2\t2\n", file = bed_input, append = TRUE)
cat("chrsource1\t200\t250\tint3\t50\n", file = bed_input, append = TRUE)
cat("chrsource1\t500\t600\tint4\t100\n", file = bed_input, append = TRUE)
# Run liftOver binary
system2("liftOver", args = c(bed_input, chain_file, bed_output, bed_unmapped), stdout = FALSE, stderr = FALSE)
# Read binary output
binary_result <- read.table(bed_output, header = FALSE, stringsAsFactors = FALSE)
colnames(binary_result) <- c("chrom", "start", "end", "name", "value")
binary_result <- binary_result[order(binary_result$start), ]
# Run gtrack.liftover
lifted_track <- "lifted_track"
withr::defer({
if (gtrack.exists(lifted_track)) gtrack.rm(lifted_track, force = TRUE)
})
gtrack.liftover(lifted_track, "Lifted track", src_track_dir, chain_file)
# Extract results
misha_result <- gextract(lifted_track, gintervals.all())
misha_result <- misha_result[order(misha_result$start), ]
# Compare results
expect_equal(nrow(misha_result), nrow(binary_result))
expect_equal(as.character(misha_result$chrom), binary_result$chrom)
expect_equal(as.numeric(misha_result$start), binary_result$start)
expect_equal(as.numeric(misha_result$end), binary_result$end)
expect_equal(as.numeric(misha_result[[lifted_track]]), binary_result$value)
})
test_that("gtrack.liftover matches liftOver binary - boundary intervals", {
skip_if_not(has_liftover_binary(), "liftOver binary not found")
local_db_state()
# Create source genome
source_db <- setup_source_db(list(paste0(">source1\n", paste(rep("A", 100), collapse = ""), "\n")))
# Create sparse track with intervals at boundaries
# Database chromosome name is "chrsource1" (from filename chrsource1.fasta)
src_intervals <- data.frame(
chrom = c("chrsource1", "chrsource1", "chrsource1"),
start = c(0, 45, 90),
end = c(10, 55, 100),
stringsAsFactors = FALSE
)
src_values <- c(111, 222, 333)
gtrack.create_sparse("src_track", "Source track", src_intervals, src_values)
src_track_dir <- file.path(source_db, "tracks", "src_track.track")
# Create target genome
setup_db(list(paste0(">chr1\n", paste(rep("T", 150), collapse = ""), "\n")))
# Map entire source chromosome to target
chain_file <- tempfile(fileext = ".chain")
withr::defer(unlink(chain_file))
cat("chain 1000 chrsource1 100 + 0 100 chr1 150 + 0 100 1\n", file = chain_file)
cat("100\n\n", file = chain_file, append = TRUE)
# Create BED file
bed_input <- tempfile(fileext = ".bed")
bed_output <- tempfile(fileext = ".bed")
bed_unmapped <- tempfile(fileext = ".unmapped")
withr::defer({
unlink(bed_input)
unlink(bed_output)
unlink(bed_unmapped)
})
# Chromosome name must match chain file: "chrsource1" (from database)
cat("chrsource1\t0\t10\tint1\t111\n", file = bed_input)
cat("chrsource1\t45\t55\tint2\t222\n", file = bed_input, append = TRUE)
cat("chrsource1\t90\t100\tint3\t333\n", file = bed_input, append = TRUE)
# Run liftOver binary
system2("liftOver", args = c(bed_input, chain_file, bed_output, bed_unmapped), stdout = FALSE, stderr = FALSE)
# Read binary output
binary_result <- read.table(bed_output, header = FALSE, stringsAsFactors = FALSE)
colnames(binary_result) <- c("chrom", "start", "end", "name", "value")
binary_result <- binary_result[order(binary_result$start), ]
# Run gtrack.liftover
lifted_track <- "lifted_track"
withr::defer({
if (gtrack.exists(lifted_track)) gtrack.rm(lifted_track, force = TRUE)
})
gtrack.liftover(lifted_track, "Lifted track", src_track_dir, chain_file)
# Extract results
misha_result <- gextract(lifted_track, gintervals.all())
misha_result <- misha_result[order(misha_result$start), ]
# Compare results
expect_equal(nrow(misha_result), nrow(binary_result))
expect_equal(as.character(misha_result$chrom), binary_result$chrom)
expect_equal(as.numeric(misha_result$start), binary_result$start)
expect_equal(as.numeric(misha_result$end), binary_result$end)
expect_equal(as.numeric(misha_result[[lifted_track]]), binary_result$value)
})
test_that("gtrack.liftover matches liftOver binary - special values", {
skip_if_not(has_liftover_binary(), "liftOver binary not found")
local_db_state()
# Create source genome
source_db <- setup_source_db(list(paste0(">source1\n", paste(rep("A", 200), collapse = ""), "\n")))
# Create sparse track with special values (zero, large positive, large negative)
# Note: NaN cannot be represented in BED files easily, so we skip it
# Database chromosome name is "chrsource1" (from filename chrsource1.fasta)
src_intervals <- data.frame(
chrom = c("chrsource1", "chrsource1", "chrsource1"),
start = c(10, 50, 70),
end = c(20, 60, 80),
stringsAsFactors = FALSE
)
src_values <- c(0, 1e10, -1e10)
gtrack.create_sparse("src_track", "Source track", src_intervals, src_values)
src_track_dir <- file.path(source_db, "tracks", "src_track.track")
# Create target genome
setup_db(list(paste0(">chr1\n", paste(rep("T", 200), collapse = ""), "\n")))
# Simple 1:1 mapping
chain_file <- tempfile(fileext = ".chain")
withr::defer(unlink(chain_file))
cat("chain 1000 chrsource1 200 + 0 100 chr1 200 + 0 100 1\n", file = chain_file)
cat("100\n\n", file = chain_file, append = TRUE)
# Create BED file
bed_input <- tempfile(fileext = ".bed")
bed_output <- tempfile(fileext = ".bed")
bed_unmapped <- tempfile(fileext = ".unmapped")
withr::defer({
unlink(bed_input)
unlink(bed_output)
unlink(bed_unmapped)
})
# Chromosome name must match chain file: "chrsource1" (from database)
cat("chrsource1\t10\t20\tint1\t0\n", file = bed_input)
cat("chrsource1\t50\t60\tint2\t1e10\n", file = bed_input, append = TRUE)
cat("chrsource1\t70\t80\tint3\t-1e10\n", file = bed_input, append = TRUE)
# Run liftOver binary
system2("liftOver", args = c(bed_input, chain_file, bed_output, bed_unmapped), stdout = FALSE, stderr = FALSE)
# Read binary output
binary_result <- read.table(bed_output, header = FALSE, stringsAsFactors = FALSE)
colnames(binary_result) <- c("chrom", "start", "end", "name", "value")
binary_result <- binary_result[order(binary_result$start), ]
# Run gtrack.liftover
lifted_track <- "lifted_track"
withr::defer({
if (gtrack.exists(lifted_track)) gtrack.rm(lifted_track, force = TRUE)
})
gtrack.liftover(lifted_track, "Lifted track", src_track_dir, chain_file)
# Extract results
misha_result <- gextract(lifted_track, gintervals.all())
misha_result <- misha_result[order(misha_result$start), ]
# Compare results
expect_equal(nrow(misha_result), nrow(binary_result))
expect_equal(as.character(misha_result$chrom), binary_result$chrom)
expect_equal(as.numeric(misha_result$start), binary_result$start)
expect_equal(as.numeric(misha_result$end), binary_result$end)
# Check special values
expect_true(any(misha_result[[lifted_track]] == 0))
expect_true(any(misha_result[[lifted_track]] == 1e10))
expect_true(any(misha_result[[lifted_track]] == -1e10))
})
test_that("gtrack.liftover matches liftOver binary - unmapped intervals", {
skip_if_not(has_liftover_binary(), "liftOver binary not found")
local_db_state()
# Create source genome
source_db <- setup_source_db(list(paste0(">source1\n", paste(rep("A", 300), collapse = ""), "\n")))
# Create sparse track with intervals in both mapped and unmapped regions
# Database chromosome name is "chrsource1" (from filename chrsource1.fasta)
src_intervals <- data.frame(
chrom = c("chrsource1", "chrsource1", "chrsource1"),
start = c(10, 100, 200), # 10 is mapped, 100 is in gap, 200 is mapped
end = c(20, 110, 210),
stringsAsFactors = FALSE
)
src_values <- c(111, 222, 333)
gtrack.create_sparse("src_track", "Source track", src_intervals, src_values)
src_track_dir <- file.path(source_db, "tracks", "src_track.track")
# Create target genome
setup_db(list(paste0(">chr1\n", paste(rep("T", 300), collapse = ""), "\n")))
# Create chain with gaps: maps [0-50) and [150-250), but NOT [50-150)
chain_file <- tempfile(fileext = ".chain")
withr::defer(unlink(chain_file))
cat("chain 1000 chrsource1 300 + 0 50 chr1 300 + 0 50 1\n", file = chain_file)
cat("50\n\n", file = chain_file, append = TRUE)
cat("chain 1000 chrsource1 300 + 150 250 chr1 300 + 100 200 2\n", file = chain_file, append = TRUE)
cat("100\n\n", file = chain_file, append = TRUE)
# Create BED file
bed_input <- tempfile(fileext = ".bed")
bed_output <- tempfile(fileext = ".bed")
bed_unmapped <- tempfile(fileext = ".unmapped")
withr::defer({
unlink(bed_input)
unlink(bed_output)
unlink(bed_unmapped)
})
# Chromosome name must match chain file: "chrsource1" (from database)
cat("chrsource1\t10\t20\tint1\t111\n", file = bed_input)
cat("chrsource1\t100\t110\tint2\t222\n", file = bed_input, append = TRUE) # In gap
cat("chrsource1\t200\t210\tint3\t333\n", file = bed_input, append = TRUE)
# Run liftOver binary
system2("liftOver", args = c(bed_input, chain_file, bed_output, bed_unmapped), stdout = FALSE, stderr = FALSE)
# Read binary output
if (file.exists(bed_output) && file.info(bed_output)$size > 0) {
binary_result <- read.table(bed_output, header = FALSE, stringsAsFactors = FALSE)
colnames(binary_result) <- c("chrom", "start", "end", "name", "value")
binary_result <- binary_result[order(binary_result$start), ]
} else {
binary_result <- data.frame(chrom = character(), start = numeric(), end = numeric(), name = character(), value = numeric())
}
# Check unmapped - should contain int2 (value 222)
unmapped_lines <- readLines(bed_unmapped)
unmapped_intervals <- unmapped_lines[!grepl("^#", unmapped_lines)]
expect_true(length(unmapped_intervals) > 0)
expect_true(any(grepl("int2", unmapped_intervals)))
# Run gtrack.liftover
lifted_track <- "lifted_track"
withr::defer({
if (gtrack.exists(lifted_track)) gtrack.rm(lifted_track, force = TRUE)
})
gtrack.liftover(lifted_track, "Lifted track", src_track_dir, chain_file)
# Extract results
misha_result <- gextract(lifted_track, gintervals.all())
if (nrow(misha_result) > 0) {
misha_result <- misha_result[order(misha_result$start), ]
}
# Compare results - should only have the 2 mapped intervals (111 and 333)
expect_equal(nrow(misha_result), nrow(binary_result))
expect_equal(nrow(misha_result), 2) # Only 2 intervals mapped
if (nrow(binary_result) > 0) {
expect_equal(as.character(misha_result$chrom), binary_result$chrom)
expect_equal(as.numeric(misha_result$start), binary_result$start)
expect_equal(as.numeric(misha_result$end), binary_result$end)
expect_equal(as.numeric(misha_result[[lifted_track]]), binary_result$value)
# Verify the unmapped interval (222) is NOT in the results
expect_false(any(misha_result[[lifted_track]] == 222))
}
})
test_that("gtrack.liftover matches liftOver binary - consecutive intervals", {
skip_if_not(has_liftover_binary(), "liftOver binary not found")
local_db_state()
# Create source genome
source_db <- setup_source_db(list(paste0(">source1\n", paste(rep("A", 300), collapse = ""), "\n")))
# Create sparse track with consecutive non-overlapping intervals
# Database chromosome name is "chrsource1" (from filename chrsource1.fasta)
src_intervals <- data.frame(
chrom = c("chrsource1", "chrsource1", "chrsource1", "chrsource1"),
start = c(100, 110, 120, 130),
end = c(110, 120, 130, 140),
stringsAsFactors = FALSE
)
src_values <- c(10, 20, 30, 40)
gtrack.create_sparse("src_track", "Source track", src_intervals, src_values)
src_track_dir <- file.path(source_db, "tracks", "src_track.track")
# Create target genome
setup_db(list(paste0(">chr1\n", paste(rep("T", 300), collapse = ""), "\n")))
# Simple 1:1 mapping with offset
chain_file <- tempfile(fileext = ".chain")
withr::defer(unlink(chain_file))
cat("chain 1000 chrsource1 300 + 100 150 chr1 300 + 50 100 1\n", file = chain_file)
cat("50\n\n", file = chain_file, append = TRUE)
# Create BED file
bed_input <- tempfile(fileext = ".bed")
bed_output <- tempfile(fileext = ".bed")
bed_unmapped <- tempfile(fileext = ".unmapped")
withr::defer({
unlink(bed_input)
unlink(bed_output)
unlink(bed_unmapped)
})
# Chromosome name must match chain file: "chrsource1" (from database)
cat("chrsource1\t100\t110\tint1\t10\n", file = bed_input)
cat("chrsource1\t110\t120\tint2\t20\n", file = bed_input, append = TRUE)
cat("chrsource1\t120\t130\tint3\t30\n", file = bed_input, append = TRUE)
cat("chrsource1\t130\t140\tint4\t40\n", file = bed_input, append = TRUE)
# Run liftOver binary
system2("liftOver", args = c(bed_input, chain_file, bed_output, bed_unmapped), stdout = FALSE, stderr = FALSE)
# Read binary output
binary_result <- read.table(bed_output, header = FALSE, stringsAsFactors = FALSE)
colnames(binary_result) <- c("chrom", "start", "end", "name", "value")
binary_result <- binary_result[order(binary_result$start), ]
# Run gtrack.liftover
lifted_track <- "lifted_track"
withr::defer({
if (gtrack.exists(lifted_track)) gtrack.rm(lifted_track, force = TRUE)
})
gtrack.liftover(lifted_track, "Lifted track", src_track_dir, chain_file)
# Extract results
misha_result <- gextract(lifted_track, gintervals.all())
misha_result <- misha_result[order(misha_result$start), ]
# Compare results - all 4 consecutive intervals should be preserved
expect_equal(nrow(misha_result), nrow(binary_result))
expect_equal(as.character(misha_result$chrom), binary_result$chrom)
expect_equal(as.numeric(misha_result$start), binary_result$start)
expect_equal(as.numeric(misha_result$end), binary_result$end)
expect_equal(as.numeric(misha_result[[lifted_track]]), binary_result$value)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.