opts_chunk$set(echo=FALSE)
opts_chunk$set(warning=FALSE)
opts_chunk$set(fig.pos = 'H')

library(MotifBinner2)

if (!exists('result'))
{
  result <- all_results[[grep('trimAffixes', names(all_results))[1]]]
}
if (class(result) != 'trimAffixes')
{
  result <- all_results[[grep('trimAffixes', names(all_results))[1]]]
}
figure_path <- paste('figure_', result$config$op_full_name, '/', sep = '')

r result$config$op_full_name

Trims the prefix (the primers) of the reads away and stored the trimmed bits so that the primers can later be extracted.

The primer is r result$config$op_args$primer_seq which is r nchar(result$config$op_args$primer_seq) in length and contains r nchar(gsub('[^ACGT]', '', result$config$op_args$primer_seq)) non-ACGT characters. The required matching score is r result$config$op_args$min_score.

A note about the alignment score computation: Matches to ambiguous letters also count 1, so each N in the primer guarentees a score of 1 for that base.

print(result$config$op_args$primer_seq)
fig.cap1 <- 'Histograms of the number of missing bases / superflous bases at the front of reads'
fig.cap2 <- 'Histograms of the alignment score between the primer and the sequences'
options(scipen=99)
aln_stats <- result$metrics$per_read_metrics
ticks <- 10^(0:12)
ticks <- ticks[ticks < nrow(aln_stats)]
aln_stats$bases_trimmed_cat <- cut(aln_stats$prefix_front_gaps, c(-Inf, 0:5, Inf),
                                   c(0:5, '>5'))
aln_stats$gaps_at_front_of_read_cat <- cut(aln_stats$read_front_gaps, c(-Inf, 0:5, Inf),
                                   c(0:5, '>5'))

p1 <-
ggplot(aln_stats, aes(x=bases_trimmed_cat)) +
  geom_bar(stat="count", colour = 'black') + 
  scale_y_continuous(trans="log1p", breaks = ticks) +
  ylab('Number of reads') +
  xlab('Number of bases trimmed from start of read')

p2 <- 
ggplot(aln_stats, aes(x=gaps_at_front_of_read_cat)) +
  geom_bar(stat='count', colour = 'black') + 
  scale_y_continuous(trans="log1p", breaks = ticks) +
  ylab('Number of reads') +
  xlab('Missing bases at the start of the read')

p3 <- 
ggplot(aln_stats, aes(x=score)) +
  geom_bar(stat='count', colour = 'black') + 
  scale_y_continuous(trans="log1p", breaks = ticks) +
  ylab('Number of reads') +
  xlab('Score of Alignment to Primer') +
  geom_vline(xintercept = result$config$op_args$min_score-0.5, col = 'red')

grid.arrange(p1, p2, ncol=2)
cat('\n\n')
print(p3)
kable_summary(result$summary)

timingTable(result)
cat('\n\n---\n\n')


HIVDiversity/MotifBinner2 documentation built on May 6, 2019, 6:44 p.m.