Nothing
setClass("DeBruijn",
slots = c("G" = "igraph",
"adjmat" = "matrix",
"motifs" = "table"))
## rawseq are the raw sequences, typically separated by hyphens since
## they are represented as multicharacter symbols (often chromosome number
## plus an arbitrary alphabetic character).
##
## M = length of motifs to be used in the DeBruijn graph.
##
deBruijn <- function(rawseq, M) {
## Start by re-encoding in a single character alphabet.
bethe <- Cipher(rawseq)
rs <- encode(bethe, rawseq)
## Get a list of motifs (character strings) of length M
## present in the data. Note that the `countWordss` function
## wants to convert the element names back to the original alphabnet
motifs <- countWords(rs, M, bethe)
## decompose each long-read breakpoint sequences (LRBPS) as a
## sequence of overlapping M-mers. Uses this function:
nb <- bethe@bytes
## Don't mess with really small sequences
rs6 <- rs[nchar(rs) > M*nb]
## actually decompose everything that is decomposable
layers <- sapply(rs6, function(sqn) {
dek <- decode(bethe, dekomp(sqn, M, nb))
sapply(dek, function(src) {
which(names(motifs) == src)
})
})
## Create a (directed) adjacency matrix from the decompositions
adjmat <- matrix(0, nrow = length(motifs), ncol = length(motifs))
rownames(adjmat) <- colnames(adjmat) <- names(motifs)
for (LRval in names(layers)) {
LYR <- layers[[LRval]]
for (J in 1:(length(LYR) - 1)) {
a <- names(LYR)[J]
b <- names(LYR)[J + 1]
DBG <- adjmat[a, b]
adjmat[a, b] <- DBG + 1
# cat(a, "\t", b, "\t", DBG, "\t", adjmat[a, b], "\n")
if (all(adjmat == 0)) stop("fuck")
}
}
## Create a graph from the adjacency matrix
G <- graph_from_adjacency_matrix(adjmat, mode = "directed", weighted = TRUE)
G <- set_edge_attr(G, "width", value = 1 + log(edge_attr(G, "weight")))
G <- set_vertex_attr(G, "size", value = 3)
new("DeBruijn",
G = G,
adjmat = adjmat,
motifs = motifs)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.