inst/doc/example_index.R

params <-
list(barcode_summary = "/tmp/Rtmp6yqJik/Rinst45c356ac7920/amplican/extdata/results/barcode_reads_filters.csv", 
    config_summary = "/tmp/Rtmp6yqJik/Rinst45c356ac7920/amplican/extdata/results/config_summary.csv", 
    links = "1. [Report by id](./example_id_report.html)\n2. [Report by barcode](./example_barcode_report.html)\n3. [Report by group](./example_group_report.html)\n4. [Report by guide](./example_guide_report.html)\n5. [Report by amplicon](./example_amplicon_report.html)\n")

## ----echo = F, results = 'asis'-----------------------------------------------
if (params$links != "") {
  cat("***\n")
  cat("# Other Reports\n")
  cat("***\n")
  cat(params$links, sep = "")
}

## ----echo = F-----------------------------------------------------------------
library(ggplot2)
library(waffle)
summaryDF <- data.frame(data.table::fread(params$barcode_summary))

total_reads <- sum(summaryDF$read_count)
total_good_reads <- sum(summaryDF$filtered_read_count)
read_q <- c(total_good_reads, total_reads - total_good_reads)
read_q_per <- round(read_q*100/total_reads)
read_q_per[is.na(read_q_per)] <- 0 # For some cases 0/0 happens, yelding NaN
names(read_q_per) <- c(paste0('Good Reads\n', read_q[1], ' (', read_q_per[1], '%)'), 
                       paste0('Bad Quality Reads\n', read_q[2], ' (', read_q_per[2], '%)'))
waffle(read_q_per, legend_pos = 'bottom', 
       title = "Quality of all reads", rows = 10, colors = c('#E69F00', '#000000'))

bad_read_q <- c(sum(summaryDF$bad_base_quality), 
                sum(summaryDF$bad_average_quality), 
                sum(summaryDF$bad_alphabet))
bad_read_q_per <- round(bad_read_q*100/sum(bad_read_q))
bad_read_q_per[is.na(bad_read_q_per)] <- 0
names(bad_read_q_per) <- c(
  paste0('Bad Base Quality\n', bad_read_q[1], ' (', bad_read_q_per[1], '%)'), 
  paste0('Bad Average Read Quality\n', bad_read_q[2], ' (', bad_read_q_per[2], '%)'),
  paste0('Bad Read Alphabet\n', bad_read_q[3], ' (', bad_read_q_per[3], '%)'))
waffle(bad_read_q_per, legend_pos = 'bottom', 
       title = "\n\nBad quality reads", rows = 10, colors = c('#D55e00', '#f0e442', '#009e73'))

## ----echo = F-----------------------------------------------------------------
# Assignment of reads from barcodes into experiments (ID)
total_ureads <- sum(summaryDF$unique_reads)
read_a <- c(sum(summaryDF$assigned_reads), sum(summaryDF$unassigned_reads))
read_a_per <- round(read_a*100/total_ureads)
read_a_per[is.na(read_a_per)] <- 0
names(read_a_per) <- c(paste0('Assigned Reads\n', read_a[1], ' (', read_a_per[1], '%)'), 
                       paste0('Unassigned Reads\n', read_a[2], ' (', read_a_per[2], '%)'))
waffle(read_a_per, legend_pos = 'bottom', 
       title = "Succesfull assignment of unique reads", 
       rows = 10, colors = c('#E69F00', '#000000'))

# Filtered reads
configDF <- data.frame(data.table::fread(params$config_summary))
height <- amplican::plot_height(length(unique(configDF$Barcode)))

F_reads <- c(sum(configDF$Reads_Filtered), sum(configDF$PRIMER_DIMER),
              sum(configDF$Low_Score))
F_reads_per <- round(F_reads*100/sum(configDF$Reads))
F_reads_per[is.na(F_reads_per)] <- 0
names(F_reads_per) <- c(paste0('Good Reads\n', F_reads[1], ' (', F_reads_per[1], '%)'), 
                        paste0('PRIMER DIMERs\n', F_reads[2], ' (', 
                               F_reads_per[2], '%)'),
                        paste0('Low Score\n', F_reads[3], ' (', 
                               F_reads_per[3], '%)'))
waffle(F_reads_per, legend_pos = 'bottom', 
       title = "\n\nFiltered Reads", 
       rows = 10, colors = c('#E69F00', '#000000', '#A9A9A9'))

## ----echo = F-----------------------------------------------------------------
total_reads <- sum(configDF$Reads_Filtered)
total_reads_ctr <- sum(configDF$Reads_Filtered[configDF$Control])
total_reads_tmt <- sum(configDF$Reads_Filtered[!configDF$Control])
reads_edited <- c(sum(configDF$Reads_Edited[!configDF$Control]), # Treatment
                  total_reads_tmt - sum(configDF$Reads_Edited[!configDF$Control]),
                  sum(configDF$Reads_Edited[configDF$Control]), # Control
                  total_reads_ctr - sum(configDF$Reads_Edited[configDF$Control]))
reads_edited_per <- round(reads_edited*100/total_reads)
reads_edited_per[is.na(reads_edited_per)] <- 0
names(reads_edited_per) <- c(
  paste0('Edits in Treatment\n', reads_edited[1], ' (', reads_edited_per[1], '%)'),
  paste0('No Edits in Treatment\n', reads_edited[2], ' (', reads_edited_per[2], '%)'),
  paste0('Edits in Control\n', reads_edited[3], ' (', reads_edited_per[3], '%)'), 
  paste0('No Edits in Control\n', reads_edited[4], ' (', reads_edited_per[4], '%)'))
waffle(reads_edited_per, legend_pos = 'bottom', 
       title = "Reads with indels", 
       rows = 10, colors = c('#E69F00', '#000000', '#A9A9A9', '#F0E442'))

frameshift <- c(sum(configDF$Reads_Frameshifted[!configDF$Control]), # Treatment
                total_reads_tmt - sum(configDF$Reads_Frameshifted[!configDF$Control]),
                sum(configDF$Reads_Frameshifted[configDF$Control]), # Control
                total_reads_ctr - sum(configDF$Reads_Frameshifted[configDF$Control]))
frameshift_per <- round(frameshift*100/total_reads)
frameshift_per[is.na(frameshift_per)] <- 0
names(frameshift_per) <- c(
  paste0('Frameshift in Treatment\n', frameshift[1], ' (', frameshift_per[1], '%)'),
  paste0('No Frameshift in Treatment\n', frameshift[2], ' (', frameshift_per[2], '%)'),
  paste0('Frameshift in Control\n', frameshift[3], ' (', frameshift_per[3], '%)'), 
  paste0('No Frameshift in Control\n', frameshift[4], ' (', frameshift_per[4], '%)'))
waffle(frameshift_per, legend_pos = 'bottom', 
       title = "\n\nReads with frameshift", 
       rows = 10, colors = c('#E69F00', '#000000', '#A9A9A9', '#F0E442'))


## ----fig.width=8, fig.height = height + 1, echo = F---------------------------
library(ggthemes)
summaryDF_per <- summaryDF
filters <- c('bad_base_quality', 'bad_average_quality', 'bad_alphabet')
summaryDF_per[, filters] <- 
  round(summaryDF_per[, filters]*100/rowSums(summaryDF_per[, filters]))
summaryDF_per[, c('assigned_reads', 'unassigned_reads')] <-
   round(summaryDF_per[, c('assigned_reads', 'unassigned_reads')]*100/
           summaryDF_per$unique_reads)
summaryDF_per$bad_read_count <- summaryDF_per$read_count - summaryDF_per$filtered_read_count 
summaryDF_per[, c('filtered_read_count', 'bad_read_count')] <- 
  round(
    summaryDF_per[, c('filtered_read_count','bad_read_count')]*100/summaryDF_per$read_count)
summaryDF_per[is.na(summaryDF_per)] <- 0
summaryDF_per <- data.table::as.data.table(summaryDF_per)

quality_melt <- data.table::melt(summaryDF_per, id.vars = c('Barcode'), 
                     measure.vars = c('filtered_read_count', 'bad_read_count'))
quality_det_melt <-  data.table::melt(summaryDF_per,
                     id.vars = c('Barcode'),
                     measure.vars = c('bad_base_quality',
                                      'bad_average_quality',
                                      'bad_alphabet'))
assignment_melt <-  data.table::melt(summaryDF_per,
                        id.vars = c('Barcode'),
                        measure.vars = c('assigned_reads', 'unassigned_reads'))

ggplot(data = quality_melt,
       aes(x = as.factor(Barcode),
           y = value,
           fill = factor(variable, labels = c('Good Reads', 'Bad Quality Reads')))) +
  geom_bar(position ='stack', stat ='identity') +
  ylab('% of reads in barcode') +
  xlab('Barcode') +
  ggtitle('Quality filtering of reads in barcodes') +
  theme(legend.position = 'top',
        legend.direction = 'horizontal',
        legend.title = element_blank()) +
  coord_flip() +
  scale_fill_manual(values = c('#E69F00', '#000000'))


## ----fig.width=8, fig.height=height + 1, echo = F-----------------------------
ggplot(data = quality_det_melt,
       aes(x = as.factor(Barcode),
           y = value,
           fill = factor(variable, labels = c('Bad Read Base Quality', 
                                              'Bad Average Read Quality', 
                                              'Bad Read Alphabet')))) +
  geom_bar(position='stack', stat='identity') +
  ylab('% of low quality reads in barcode') +
  xlab('Barcode') +
  ggtitle('\n\nDistribution of low quality reads in barcodes') +
  theme(legend.position = 'top',
        legend.direction = 'horizontal',
        legend.title = element_blank()) +
  coord_flip() + 
  scale_color_colorblind()+
  scale_fill_manual(values = c('#D55e00', '#f0e442', '#009e73'))

## ----fig.width=8, fig.height=height + 1, echo = F-----------------------------
ggplot(data = assignment_melt,
       aes(x = as.factor(Barcode),
           y = value,
           fill = factor(variable, labels = c('Assigned Reads', 'Unassigned Reads')))) +
  geom_bar(position='stack', stat='identity') +
  ylab('% of unique reads in barcode') +
  xlab('Barcode') +
  ggtitle('\n\nAssignment of reads in barcodes') +
  theme(legend.position = 'top',
        legend.direction = 'horizontal',
        legend.title = element_blank()) +
  coord_flip() +
  scale_fill_manual(values = c('#E69F00', '#000000'))

## ----echo = F-----------------------------------------------------------------
library(knitr)
names(summaryDF) <- c("Barcodes", "Experiment Count", "Read Count",
                      "Bad base quality", "Bad average quality", "Bad alphabet",
                      "Good Reads", "Unique Reads", "Unassigned Reads",
                      "Assigned Reads")
kable(summaryDF)

Try the amplican package in your browser

Any scripts or data that you put into this service are public.

amplican documentation built on Nov. 8, 2020, 11:10 p.m.