##' Some of the hits might have a to low coverage, the function
##' filters the reads given the parameters by the par_list.
##'
##' To reduce the overall false positive hits, the function filters to
##' low coverage references and references with a low mapping rate.
##' @title Filter the hits by the coverage
##' @param hit_id Genebank IDs of the hits
##' @param map_dna_list Output of \code{\link{map_dna_ref}
##' @param par_list Parameter given by the par_list(), see
##' \code{\link{set_par_list}}
##' @param out_file File path to the results dir
##' @return tibble
##' @author Jochen Kruppa
##' @export
coverage_filter <- function(hit_id, map_dna_list, par_list, out_file){
viral_length <- collect(filter(par_list["species_info"], genebank_id %in% hit_id)) %>%
select(genebank_id, length = Length)
coverage_tbl <- ldply(hit_id, function(x) {
viral_width <- filter(map_dna_list$alignment_df, genebank_id == x) %$%
IRanges::reduce(IRanges(start = .$pos_start, end = .$pos_end)) %>%
width(.)
viral_read_length_mapq <- select(filter(map_dna_list$alignment_df, genebank_id == x),
genebank_id, read_length, mapq)
coverage <- sum(viral_width)/filter(viral_length, genebank_id == x)$length
tibble(genebank_id = x,
coverage = round(coverage, 4),
mean_mapq = mean(viral_read_length_mapq$mapq),
mean_read_length = mean(viral_read_length_mapq$read_length),
hits = length(viral_width))
}, .parallel = TRUE)
png(str_c(out_file, "_coverage.png"), width = 18, height = 18, res = 300, units = "cm")
p <- ggplot(coverage_tbl, aes(coverage)) +
geom_density(fill = par$cbbPalette[3], alpha = 0.6) +
theme_bw() +
xlab("Coverage of the reference genome") + ylab("Density") +
ggtitle(str_c(basename(out_file), " - Coverage")) +
geom_vline(xintercept = 0.05, color = par$cbbPalette[7])
print(p)
dev.off()
png(str_c(out_file, "_read_length_hist.png"), width = 18, height = 18, res = 300, units = "cm")
dens <- density(coverage_tbl$mean_read_length)
max_y <- which.max(dens$y)
x_break <- c(dens$x[max_y], round(seq(from = min(dens$x), to = max(dens$x), length.out = 5)))
p <- ggplot(coverage_tbl, aes(mean_read_length)) +
geom_density(fill = par$cbbPalette[3], alpha = 0.6) +
theme_bw() + xlab("Read length") + ylab("Density") +
geom_vline(xintercept = dens$x[max_y], color = par$cbbPalette[7]) +
ggtitle(str_c(basename(out_file), " - Mean read length")) +
scale_x_continuous(breaks = x_break)
print(p)
dev.off()
return(coverage_tbl)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.