In the BED12 format,
the position of each box in the object is encoded in the blockCount
,
blockSize
and blockStarts
columns.
In UCSCData
track objects of the rtracklayer package, the information is
encoded as an IRangesList
metadata column, where each element has one
IRanges
object representing the blocks.
In CAGEr's TagClusters
objects, the position of the central blocks is
represented by the quantile information columns.
The problem here is to create this IRanges
list in a computationally efficient
way in CAGEr's exportToTrack
function.
The best approach, f.mat.direct
is not exactly fast, but is 5 × faster than my
original attempt (f.grline
).
options(width=120) knitr::opts_chunk$set(cache = TRUE, cache.lazy = FALSE) knitr::opts_knit$set(verbose = TRUE)
suppressPackageStartupMessages(library("GenomicRanges")) suppressPackageStartupMessages(library("CAGEr")) tc <- CAGEr::exampleCAGEexp |> CAGEr::tagClustersGR(1) tcl <- split(tc, seq_along(tc)) decoded.mat <- rbind(width(tc), decode(tc$q_0.1), decode(tc$q_0.9))
# Operate directly on each line f.grline <- function(grline) { # A GRanges line is a GRanges object of length 1 # This function computes a blocks value for each line qLow_value <- decode(grline$q_0.1) qUp_value <- decode(grline$q_0.9) ir <- IRanges() |> c( if(qLow_value != 1) IRanges(1)) |> c( IRanges(qLow_value, qUp_value)) |> c( if(qUp_value != width(grline)) IRanges(width(grline))) ir } # Decode the Rle objects once for all and loop on each entry f.mat.direct <- function(x) { width_value <- x[1] qLow_value <- x[2] qUp_value <- x[3] ir <- IRanges() |> c( if(qLow_value != 1) IRanges(1)) |> c( IRanges(qLow_value, qUp_value)) |> c( if(qUp_value != width_value) IRanges(width_value)) } # Same but try to call the IRanges constructor only once. f.mat.viaString <- function(x) { width_value <- x[1] qLow_value <- x[2] qUp_value <- x[3] str <- c( c( if(qLow_value != 1) "1"), c( paste0(qLow_value, "-", qUp_value)), c( if(qUp_value != width_value) width_value) ) IRanges(str) }
tc[1] microbenchmark::microbenchmark(f.grline(tcl[[1]]), f.mat.direct(decoded.mat[,1]), f.mat.viaString(decoded.mat[,1]), times = 100) tc[5] microbenchmark::microbenchmark(f.grline(tcl[[5]]), f.mat.direct(decoded.mat[,5]), f.mat.viaString(decoded.mat[,5]), times = 100)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.