Nothing
write_chain_entry <- function(con, srcName, srcSize, srcStrand, srcStart, srcEnd,
tgtName, tgtSize, tgtStrand, tgtStart, tgtEnd, id, score = 1000, append = NULL) {
# "chain 1000 source1 100 + 10 30 chr1 44 + 0 20 1"
# where "source*" is the source (query) side and "chr*" is the target side.
# Use %.0f for large integers to avoid %d overflow
# Auto-detect append mode: if file exists and has content, append
if (is.null(append)) {
append <- file.exists(con) && file.info(con)$size > 0
}
cat(
sprintf(
"chain %.0f %s %.0f %s %.0f %.0f %s %.0f %s %.0f %.0f %d\n",
as.numeric(score),
srcName, as.numeric(srcSize), srcStrand, as.numeric(srcStart), as.numeric(srcEnd),
tgtName, as.numeric(tgtSize), tgtStrand, as.numeric(tgtStart), as.numeric(tgtEnd), id
),
file = con, append = append
)
# single block size line (srcEnd - srcStart), then blank line
cat(sprintf("%.0f\n\n", as.numeric(srcEnd - srcStart)), file = con, append = TRUE)
}
setup_db <- function(chrom_defs, return_db = FALSE) {
# chrom_defs = list(">chr1\nAAAA...", ... as strings to write)
# OR list(">chr1\n", "AAAA...", "\n", ">chr2\n", "CCCC...", "\n") - header and sequence can be separate
# .gseq.import() extracts chromosome name from filename: chr(\\w+)
# We need separate FASTA files for each chromosome with names like chr1.fasta, chr2.fasta
# If return_db is TRUE, returns the database path; otherwise returns the first FASTA file path
# Combine all elements into a single string first
combined <- paste(chrom_defs, collapse = "")
# Split by FASTA headers to get individual chromosomes
# Handle any chromosome name (not just "chr" prefixed)
lines <- strsplit(combined, "\n")[[1]]
chrom_entries <- list()
current_entry <- NULL
for (line in lines) {
if (grepl("^>", line)) {
# Start a new chromosome entry (any header starting with ">")
if (!is.null(current_entry)) {
chrom_entries[[length(chrom_entries) + 1]] <- paste(current_entry, collapse = "\n")
}
current_entry <- c(line)
} else if (!is.null(current_entry)) {
# Add to current entry
current_entry <- c(current_entry, line)
}
}
# Add the last entry
if (!is.null(current_entry)) {
chrom_entries[[length(chrom_entries) + 1]] <- paste(current_entry, collapse = "\n")
}
if (length(chrom_entries) == 0) {
stop("No valid FASTA entries found")
}
# Extract chromosome names and create files
fastas <- character(length(chrom_entries))
for (i in seq_along(chrom_entries)) {
entry <- chrom_entries[[i]]
# Extract header line (first line)
entry_lines <- strsplit(entry, "\n")[[1]]
header_line <- entry_lines[1]
# Extract chromosome name from header (e.g., "chr1" from ">chr1" or "AncRef" from ">AncRef")
chrom_name <- gsub("^>(\\S+).*$", "\\1", header_line, perl = TRUE)
if (chrom_name == header_line || chrom_name == "") {
stop(sprintf("Invalid FASTA header in entry %d: %s", i, header_line))
}
# .gseq.import() requires filename to start with "chr" and match pattern ^chr(\\w+)
# The database chromosome name is extracted from filename: chr(\\w+) -> adds "chr" prefix
# So if filename is "chrX.fasta", it extracts "X" and database has "chrX"
# If filename is "chrAncRef.fasta", it extracts "AncRef" and database has "chrAncRef"
# We need to update the FASTA header to match what will be in the database
if (grepl("^chr", chrom_name)) {
# Already starts with "chr", database will have same name
filename_chrom <- chrom_name
db_chrom_name <- chrom_name
} else {
# Doesn't start with "chr", filename will be "chr<name>.fasta"
# Database will extract "<name>" and add "chr" -> "chr<name>"
filename_chrom <- paste0("chr", chrom_name)
db_chrom_name <- paste0("chr", chrom_name)
}
fastas[i] <- file.path(tempdir(), paste0(filename_chrom, ".fasta"))
# Update FASTA header to match database chromosome name
entry_lines[1] <- paste0(">", db_chrom_name)
entry <- paste(entry_lines, collapse = "\n")
# Ensure entry ends with newline for proper FASTA format
if (!grepl("\n$", entry)) {
entry <- paste0(entry, "\n")
}
cat(entry, file = fastas[i])
}
db <- tempfile()
suppressMessages(gdb.create(groot = db, fasta = fastas))
gdb.init(db)
withr::defer(
{
unlink(db, recursive = TRUE)
unlink(fastas)
},
testthat::teardown_env()
)
if (return_db) {
db # Return database path
} else {
fastas[1] # Return first file path for compatibility
}
}
setup_source_db <- function(chrom_defs) {
# Similar to setup_db but for source databases - returns the database path
# chrom_defs = list(">source1\nAAAA...", ... as strings to write)
# OR list(">source1\n", "AAAA...", "\n", ">source2\n", "CCCC...", "\n") - header and sequence can be separate
setup_db(chrom_defs, return_db = TRUE)
}
new_chain_file <- function() {
f <- tempfile(fileext = ".chain")
withr::defer(unlink(f), testthat::teardown_env())
f
}
has_liftover_binary <- function() {
# Helper function to check if liftOver binary is available
result <- tryCatch(
{
system2("which", "liftOver", stdout = TRUE, stderr = FALSE)
TRUE
},
error = function(e) FALSE,
warning = function(w) FALSE
)
if (!isTRUE(result)) {
result <- tryCatch(
{
system2("liftOver", stdout = FALSE, stderr = FALSE)
TRUE
},
error = function(e) FALSE,
warning = function(w) FALSE
)
}
return(isTRUE(result))
}
run_kent_liftover <- function(src_bed, chain_file, multiple = TRUE, minMatch = 1e-7, ret_cmd = FALSE) {
withr::local_options(list(scipen = 9999))
# Returns a list with 'mapped' (data.frame) and 'unmapped' (data.frame) results
kent_result <- tempfile(fileext = ".bed")
kent_unmapped <- tempfile(fileext = ".unmapped")
# Run liftOver with -multiple flag if multiple is TRUE
system2("liftOver", c(if (multiple) "-multiple" else NULL, if (!is.null(minMatch)) paste0("-minMatch=", minMatch) else NULL, src_bed, chain_file, kent_result, kent_unmapped),
stdout = FALSE, stderr = FALSE
)
if (ret_cmd) {
return(paste("liftOver ", if (multiple) "-multiple" else NULL, if (!is.null(minMatch)) paste0("-minMatch=", minMatch) else NULL, src_bed, chain_file, kent_result, kent_unmapped))
}
# Read mapped results (with name and value columns from -multiple)
kent_data <- if (file.exists(kent_result) && file.info(kent_result)$size > 0) {
if (multiple) {
read.table(kent_result,
header = FALSE,
col.names = c("chrom", "start", "end", "name", "value"),
stringsAsFactors = FALSE
)
} else {
read.table(kent_result,
header = FALSE,
col.names = c("chrom", "start", "end"),
stringsAsFactors = FALSE
)
}
} else {
data.frame(
chrom = character(), start = integer(), end = integer(),
name = character(), value = integer(), stringsAsFactors = FALSE
)
}
# Read unmapped results
kent_unmapped_data <- if (file.exists(kent_unmapped) && file.info(kent_unmapped)$size > 0) {
if (multiple) {
read.table(kent_unmapped,
header = FALSE,
col.names = c("chrom", "start", "end"),
stringsAsFactors = FALSE
)
} else {
read.table(kent_unmapped,
header = FALSE,
col.names = c("chrom", "start", "end"),
stringsAsFactors = FALSE
)
}
} else {
data.frame(
chrom = character(), start = integer(), end = integer(),
stringsAsFactors = FALSE
)
}
# Clean up temp files
unlink(c(kent_result, kent_unmapped))
return(list(mapped = kent_data, unmapped = kent_unmapped_data))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.