opts_chunk$set(echo=FALSE)
opts_chunk$set(warning=FALSE)
opts_chunk$set(fig.pos = 'H')

library(MotifBinner2)

if (!exists('result'))
{
  result <- all_results[[grep('qualTrim', names(all_results))[1]]]
}
if (class(result) != 'qualTrim')
{
  result <- all_results[[grep('qualTrim', names(all_results))[1]]]
}
figure_path <- paste('figure_', result$config$op_full_name, '/', sep = '')

r result$config$op_full_name

Removes sequences that are low quality. A sequence is considered as low quality if either it has an average quality score below r result$config$op_args$avg_qual or if the proportion of the reads bases that are below quality score r result$config$op_args$bad_base_threshold is higher than r result$config$op_args$max_bad_bases.

per_read_metrics <- result$metrics$per_read_metrics

fig.cap1 <- "Figure: Histograms of the average qualities and percentage of bad bases in the reads"
options(scipen=99)

ticks <- 10^(0:12)
ticks <- ticks[ticks < nrow(per_read_metrics)]

p1 <- 
ggplot(per_read_metrics, aes(x=avg_qual)) + 
  geom_histogram(bins=30) +
  scale_y_continuous(trans="log1p", breaks = ticks) +
  xlab('Average quality\n(per read)') +
  ylab('Number of Reads') +
  geom_vline(xintercept = result$config$op_args$avg_qual, color = 'red')
p2 <- 
ggplot(per_read_metrics, aes(x=perc_bad)) + 
  geom_histogram(bins=30) +
  scale_y_continuous(trans="log1p", breaks = ticks) +
  xlab(paste('Percentage of bad bases\n(<',
             result$config$op_args$bad_base_threshold, ' quality score)',
             sep = '')) +
  ylab('Number of Reads') +
  geom_vline(xintercept = result$config$op_args$max_bad_bases, color = 'red')
grid.arrange(p1, p2, ncol=2)
kable_summary(result$summary)

timingTable(result)
cat('\n\n---\n\n')


HIVDiversity/MotifBinner2 documentation built on May 6, 2019, 6:44 p.m.