##' @title Annotate the overlap between SVs using the reciprocal coverage method
##' @param ol.gr overlaps prepared with \code{prepareOl} function.
##' @param min.cov the minimum coverage to be considered a match. Default is 0.5
##' @return an updated and filtered version of the input ol.gr
##' @author Jean Monlong
##' @importFrom magrittr %>%
##' @importFrom rlang .data
##' @keywords internal
annotateOl.coverage <- function(ol.gr, min.cov=.5){
ol.gr$queryOl = ol.gr$subjectOl = FALSE
qs.ol.l = lapply(unique(ol.gr$type), function(svtype){
ol.gr = ol.gr[which(ol.gr$type==svtype)]
if(svtype=='INS'){
ol.df = as.data.frame(ol.gr)
## Coverage on queries
query.cov = ol.df %>%
dplyr::group_by(.data$queryHits, .data$querySize) %>%
dplyr::summarize(ol.size=sum(.data$subjectSize)) %>%
dplyr::filter(.data$ol.size / .data$querySize >= min.cov)
## Coverage on call set
subject.cov = ol.df %>%
dplyr::group_by(.data$subjectHits, .data$subjectSize) %>%
dplyr::summarize(ol.size=sum(.data$querySize)) %>%
dplyr::filter(.data$ol.size / .data$subjectSize >= min.cov)
return(list(query.ol=unique(query.cov$queryHits),
subject.ol=unique(subject.cov$subjectHits)))
} else { ## "ranges" SVs
## The intersection ranges in ol.gr need to be "reduced" to avoid double-counting bases.
## this is done after grouping the ranges by query or subject id
## Overlap coverage on the truth set
gr.l = GenomicRanges::GRangesList(GenomicRanges::split(ol.gr, ol.gr$queryHits))
gr.l = GenomicRanges::reduce(gr.l)
cov.l = lapply(GenomicRanges::width(gr.l), sum)
query.cov.df = data.frame(queryHits=as.numeric(names(cov.l)), cov=unlist(cov.l))
query.cov.df = merge(query.cov.df, as.data.frame(ol.gr)[, c('queryHits', 'querySize')])
query.cov = query.cov.df$queryHits[which(query.cov.df$cov/query.cov.df$querySize>=min.cov)]
## Overlap coverge on the call set
gr.l = GenomicRanges::GRangesList(GenomicRanges::split(ol.gr, ol.gr$subjectHits))
gr.l = GenomicRanges::reduce(gr.l)
cov.l = lapply(GenomicRanges::width(gr.l), sum)
subject.cov.df = data.frame(subjectHits=as.numeric(names(cov.l)), cov=unlist(cov.l))
subject.cov.df = merge(subject.cov.df, as.data.frame(ol.gr)[, c('subjectHits', 'subjectSize')])
subject.cov = subject.cov.df$subjectHits[which(subject.cov.df$cov/subject.cov.df$subjectSize>=min.cov)]
return(list(query.ol=unique(query.cov),
subject.ol=unique(subject.cov)))
}
})
## merge the query ids to keep and filter the input ol.gr
query.ol = unlist(lapply(qs.ol.l, function(ll) ll$query.ol))
ol.gr$queryOl[which(ol.gr$queryHits %in% query.ol)] = TRUE
## same for the subject ids
subject.ol = unlist(lapply(qs.ol.l, function(ll) ll$subject.ol))
ol.gr$subjectOl[which(ol.gr$subjectHits %in% subject.ol)] = TRUE
## compute the reciprocal overlap between the variants, for info
ol.gr$olScore = ifelse(ol.gr$subjectSize > ol.gr$querySize,
ol.gr$interSize / ol.gr$subjectSize,
ol.gr$interSize / ol.gr$querySize)
return(ol.gr[which(ol.gr$queryOl | ol.gr$subjectOl)])
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.