library(GenomicRanges)
library(plyr)
forbidden <- c("seqnames", "ranges", "strand", "seqlevels", "seqlengths",
"isCircular", "start", "end", "width", "element")
roll.haplotypes <- function(haps, seqlengths = multiparental:::seqlengths, seal = FALSE, ...) {
dlply(haps, .(id), function(d) {
rez <- dlply(d, .(origin), function(p) {
seg <- makeGRangesFromDataFrame(p, keep.extra.columns = TRUE, seqinfo = seqlengths)
seg <- sort(seg)
if (seal)
return( seal.segments(seg) )
else
return(seg)
})
class(rez) <- c("haplotypes", class(rez))
return(rez)
}, .progress = "text")
}
roll.recombinations <- function(haps, seqlengths = multiparental:::seqlengths, ...) {
dlply(haps, .(id), function(d) {
rez <- dlply(d, .(origin), function(p) {
gr <- makeGRangesFromDataFrame(p, keep.extra.columns = TRUE, seqinfo = seqlengths)
gr <- sort(gr)
return(gr)
})
# class(rez) <- c("haplotypes", class(rez))
return(rez)
})
}
invert.recombinations <- function(recombs, seal = FALSE, ...) {
lapply(recombs, function(r.all) {
segments <- GRanges()
for (c in unique(runValue(seqnames(r.all)))) {
r <- r.all[ seqnames(r.all) == c ]
if (seal) {
# 'seal' haplotype segments together -- eg. get rid of uncertainty around recomb events
g <- GRanges(seqnames = c, seqinfo = seqinfo(r),
ranges = IRanges(start = c(1, pmin( mid(ranges(r))+1, seqlengths(r)[c] )),
end = c(mid(ranges(r)), seqlengths(r)[c])),
strain = c( as.character(values(r)$from), rev(as.character(values(r)$to))[1] )
)
segments <- c(segments, g)
}
else {
if (length(r)) {
if (length(r) > 1) {
i <- 1:(length(r)-1)
# un-overlap possibly overlapping recomb events
end(r)[i] <- pmin( end(r)[i], start(r)[i+1]-1 )
start(r)[i+1] <- pmax( end(r)[i]+2, start(r)[i+1] )
}
r <- sort(r)
g <- gaps(r)
g <- g[ strand(g) == "*" & (as(seqnames(g), "character") %in% as(seqnames(r), "character")) ]
values(g)$strain <- c( as.character(values(r)$from), rev(as.character(values(r)$to))[1] )
segments <- c(segments, g)
}
}
}
# print(segments)
return(segments)
})
}
# overlap.with.meta <- function(query, subject, metavars = colnames(values(query)), ...) {
#
# flag <- (query %over% subject)
# meta.flag <- matrix(ncol = 0, nrow = length(query))
#
# if (length(subject) > 1 & !(length(subject) == length(query))) {
# warning("Result will probably not be well-defined due to shape of query vs subject ranges.")
# }
#
# for (i in colnames(values(query))) {
# if (i %in% colnames(values(subject))) {
# # hits.this.subject <- sapply(1:length(subject), function(j) flag & (as.character(values(query)[,i]) == as.character(values(subject)[j,i])) )
# # print(hits.this.subject)
# hits.this.subject <- flag & (values(query)[,i] == values(subject)[,i])
# meta.flag <- cbind(meta.flag, hits.this.subject)
# }
# }
#
# return(meta.flag)
#
# }
#
# grep.haplotypes <- function(haps, target, strain.col = "strain", ...) {
#
# ## how many haplotype segments in <haps> overlap <target>?
#
# stopifnot(length(target) > 0)
#
# llply(haps, function(h) {
# rez <- lapply(h, function(hh) {
# sapply(1:length(target), function(t) overlap.with.meta(hh, target[t], metavars = strain.col))
# })
# return( sum(sapply(rez, any)) )
# }, .progress = "text")
#
# }
#
# grep.recombinations <- function(recombs, target, from = NULL, to = NULL, ...) {
#
# stopifnot(length(target) > 0)
#
# llply(recombs, function(h) {
# sapply(h, function(hh) {
# i <- rep(TRUE, length(hh))
# if (!is.null(from))
# i <- i & (values(hh)$from == from)
# if (!is.null(to))
# i <- i | (values(hh)$to == to)
# countOverlaps(target, hh[i])
# })
# }, .progress = "text")
#
# }
overlap.with.meta <- function(haps, target, haps.flat = NULL, meta.cols = NULL, by = .(id), ...) {
## given a list-of-lists of rolled haplotypes, find those which overlap <target> AND have same metadata
## (if a flattened haplotypes object is already available, allow it to be passed in to avoid re-flattening)
all.gr <- GRanges()
if (!is.null(haps.flat))
all.gr <- haps.flat
else
all.gr <- flatten.haplotypes(haps, "all")
olap <- all.gr %over% target
if (is.null(meta.cols))
meta.cols <- intersect( colnames(values(target)), colnames(values(all.gr)) )
for (col in meta.cols) {
olap <- olap & (values(all.gr)[ ,col ] == values(target)[ ,col ])
}
all.gr$olap <- olap
all.df <- as.data.frame(all.gr)
rez <- daply(all.df, by, summarize, sum(as.numeric(olap)))
return(rez)
}
grep.haplotypes <- function(haps, target, meta.cols = "strain", ...) {
## how many haplotype segments in <haps> overlap <target>?
stopifnot(length(target) > 0)
overlap.with.meta(haps, target, meta.cols = meta.cols, ...)
}
grep.recombinations <- function(recombs, target, meta.cols = c("to","from"), ...) {
## how many recombination events in <recombs> overlap <target>?
stopifnot(length(target) > 0)
overlap.with.meta(recombs, target, meta.cols = meta.cols, ...)
}
find.recombinants <- function(...) {
rez <- grep.recombinations(...)
sapply(rez, sum) > 0
}
seal.segments <- function(gr, ...) {
## "seal" haplotype segments by joining them at midpoint of uncertainty interval
df <- as.data.frame(sort(gr))
rez <- ddply(df, .(seqnames), function(d) {
mids <- floor((d$start[-1] + d$end[ -nrow(d) ])/2)
d$start[1] <- 1
d$end[-nrow(d)] <- mids
d$start[-1] <- mids + 1
d$end[ nrow(d) ] <- seqlengths(gr)[ d$seqnames[1] ]
d$width <- NULL # not an allowed column name to makeGRangesFromDataFrame()
return(d)
})
makeGRangesFromDataFrame(rez, keep.extra.columns = TRUE, ignore.strand = TRUE)
}
flatten.haplotypes <- function(haps, by = c("parent","all"), ...) {
## flatten the nested-list haplotype structure either into a list of GRanges() (by = "parent") or a single GRanges(by = "all")
if (by == "all") {
rez <- ldply(haps, function(h) ldply(h, as.data.frame),
.progress = "text")
if ("width" %in% colnames(rez))
rez$width <- NULL
makeGRangesFromDataFrame(rez, keep.extra.columns = TRUE, ignore.strand = TRUE)
}
else if (by == "parent") {
rez <- llply(haps, function(h) {
hh <- ldply(h, as.data.frame)
if ("width" %in% colnames(hh))
hh$width <- NULL
makeGRangesFromDataFrame(hh, keep.extra.columns = TRUE, ignore.strand = TRUE)
}, .progress = "text")
return(rez)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.