In uzh/ezRun: An R meta-package for the analysis of Next Generation Sequencing Data

knitr::opts_chunk$set(echo = TRUE)
require(knitr)
require(kableExtra)
require(plotly)
require(seqLogo)
require(ezRun)

output <- readRDS("output.rds")
param <- readRDS("param.rds")
input <- readRDS("input.rds")
rawScreenResult <- readRDS("rawScreenResult.rds")
procScreenResult <- readRDS("procScreenResult.rds")
virusResult <- readRDS("virusResult.rds")
if(file.exists('refseqResult.rds')){
    refseqResult = readRDS("refseqResult.rds")
} else {
    refseqResult = NULL
}
krakenResult <- readRDS("krakenResult.rds")
rRNAstrandResult <- readRDS("rRNAstrandResult.rds")
PWMresult <- readRDS("PWMs.rds")
dataset = input$meta
samples = input$getNames()

debug <- FALSE

Started on r format(Sys.time(), "%Y-%m-%d %H:%M:%S")

FastqScreen_Result {.tabset}

FastqScreen Mapping Rates to Adapters and rRNA

The raw reads are checked for adapter content. A local alignment using Bowtie2 is performed. Reads occuring from adapter dimers and adapter contamination at the 3' end can be discovered.

m <- list(l = 80, r = 80, b = 200, t = 100, pad = 0) # more margin space

percentMapped = sapply(rawScreenResult, function(x){100 - x$"%Unmapped"})

# MappingRateAdapters
p <- plot_ly(x=samples,
             y=percentMapped,
             type="bar") %>%
  layout(title = "MappingRate to Adapters - Bowtie2, local Alignment",
         yaxis = list(title = "MappedReads in %", range = c(0,100)),
         margin = m)
p

A Bowtie2 end-to-end alignment is performed to estimate the overall rRNA content of the libraries. SILVA is used as the reference database. The read orientation can be used to check if the sequenced libraries preserve the strand information.

# rRNA Mapping
rRnaPercentMapped = rRNAstrandResult[ , c("Sense", "Antisense")] / rRNAstrandResult$`Read Count` * 100
rRnaPercentMapped$sample = rownames(rRnaPercentMapped)
p <- plot_ly(rRnaPercentMapped, x=~sample, y=~Sense, 
             type="bar", name="sense") %>%
    add_trace(y = ~Antisense, name = "antisense") %>%
  layout(title = "rRNA Silva Mapping - Bowtie2, end-to-end Alignment",
         yaxis = list(title = "rRNA-Mapping-Rate in %"),
         xaxis = list(title = ""),
         barmode = 'stack', margin = m)
p

FastqScreen Mapping Per Sample

Each sample is mapped against multiple references using FastqScreen. This includes the most common model organisms, ribosomal RNA references (large and small subunit) based on SILVA on several taxonomic levels and typical biological and technical contaminants in NGS libraries (tRNAs, PhIX, Mycoplasma, UniVec).

for (nm in samples){
  par(mar=c(10.1, 4.1, 4.1, 2.1))
  x = procScreenResult[[nm]][ , c("%One_hit_one_genome", "%Multiple_hits_one_genome", 
                                  "%One_hit_multiple_genomes", "%Multiple_hits_multiple_genomes")]
  if (nrow(x) > 0){
    x = x[ , c("%One_hit_one_genome", "%Multiple_hits_one_genome", 
                                  "%One_hit_multiple_genomes", "%Multiple_hits_multiple_genomes")]
    bplt = barplot(t(x), las=2, ylim=c(0,100), 
                   legend.text=T, ylab="Mapped Reads in %", main=nm, names.arg=rep('', nrow(x)))
      text(x = bplt, y = par("usr")[3] - 2, srt = 45, adj = 1, labels = rownames(x), xpd = TRUE)
  } else {
    plot(1,1, type="n", axes=FALSE, main=nm, xlab="", ylab="", frame=TRUE)
    text(1,1, "no hits found")
  }
}

Mapping to RefSeq mRNA Per Sample

A mapping against all mRNAs in RefSeq can be useful to estimate the fraction reads which is specific for exonic regions. Furthermore contaminations with other species e.g. due to xenograft experiments can be discovered.

refseqSamples <- names(refseqResult)
names(refseqSamples) <- refseqSamples

if(length(refseqSamples) > 1){
  readFractionPerSample <- data.frame(sampleName = rep(refseqSamples[1], nrow(refseqResult[[refseqSamples[1]]])), 
                                           species = rownames(refseqResult[[refseqSamples[1]]]), 
                                           UniqueSpeciesPercent = refseqResult[[refseqSamples[1]]]$UniqueSpeciesPercent,
                                           MultipleSpeciesPercent = refseqResult[[refseqSamples[1]]]$MultipleSpeciesPercent
                                           )
  for (j in 2:length(refseqSamples)){
      tmp <- data.frame(sampleName = rep(refseqSamples[j], nrow(refseqResult[[refseqSamples[j]]])), species = rownames(refseqResult[[refseqSamples[j]]]), 
                        UniqueSpeciesPercent = refseqResult[[refseqSamples[j]]]$UniqueSpeciesPercent,
                        MultipleSpeciesPercent = refseqResult[[refseqSamples[j]]]$MultipleSpeciesPercent)
      readFractionPerSample <- rbind(readFractionPerSample, tmp)
  }
}

1. MultiSample Barplots for top hits

if(length(refseqSamples) > 1){
  m <- list(l = 80, r = 80, b = 200, t = 100, pad = 0)
  topSpecies <- sort(tapply(readFractionPerSample$UniqueSpeciesPercent, readFractionPerSample$species, sum), decreasing = TRUE)

    if (length(topSpecies) > 0){
    i = 1
    data <- readFractionPerSample[readFractionPerSample$species == names(topSpecies)[i],]

    p <- plot_ly(x=data$sampleName,
             y=data$UniqueSpeciesPercent + data$MultipleSpeciesPercent,
             type="bar") %>%
    layout(title = paste0('TopHit', i,':', names(topSpecies)[i], ' (Unique + MultiHits)'),
           yaxis = list(title = "MappedReads in %", range = c(0,100)), margin = m)
    p
  }
}

if(length(refseqSamples) > 1){
  m <- list(l = 80, r = 80, b = 200, t = 100, pad = 0)
  if (length(topSpecies) > 1){
    i = 2
    data <- readFractionPerSample[readFractionPerSample$species == names(topSpecies)[i],]

    p <- plot_ly(x=data$sampleName,
             y=data$UniqueSpeciesPercent + data$MultipleSpeciesPercent,
             type="bar") %>%
    layout(title = paste0('TopHit', i,':', names(topSpecies)[i], ' (Unique + MultiHits)'),
           yaxis = list(title = "MappedReads in %", range = c(0,100)), margin = m)
    p
  }
}

if(length(refseqSamples) > 2){
  m <- list(l = 80, r = 80, b = 200, t = 100, pad = 0)
  if (length(topSpecies) > 2){
    i = 3
   data <- readFractionPerSample[readFractionPerSample$species == names(topSpecies)[i],]

    p <- plot_ly(x=data$sampleName,
             y=data$UniqueSpeciesPercent + data$MultipleSpeciesPercent,
             type="bar") %>%
    layout(title = paste0('TopHit', i,':', names(topSpecies)[i], ' (Unique + MultiHits)'),
           yaxis = list(title = "MappedReads in %", range = c(0,100)), margin = m)
    p
  }
}

2. Per sample barplots

if(length(refseqSamples) > 100){
  message('Skipped: > 100 Samples')
} else {
  for (nm in refseqSamples){
        par(mar=c(10.1, 4.1, 4.1, 2.1))
        x = refseqResult[[nm]]
        if (is.null(x)) x = matrix(0, 2, 1, dimnames=list(c('UniqueSpeciesHits','MultipleSpeciesHits'),'Misc'))
        bplot = barplot(t(x), col=c("royalblue3", "lightblue"), las=2, ylim=c(0,100),
                        legend.text=T, ylab="Mapped Reads in %", main=nm, names.arg=rep('',nrow(x)) )
        text(y=t(x)[ 1,] + 5, x=bplot, font = 2, labels=t(x)[ 1, ], cex=1.1, col='black')
        text(x = bplot, y = par("usr")[3] - 2, srt = 45, adj = 1, 
             labels = rownames(x), xpd = TRUE)
    }
  }

Mapping to Genomes with Kraken2

Kraken2 is a tool to perform a taxonomic classification based on k-mer matches. The reference includes Refseq bacteria, archaea, viral libraries and the GRCh38 human genome. It's especially useful to discover bacterial contamination (e.g. Mycoplasma in cultivated cells.)

1. MultiSample Barplots for top hits

krakenSamples <- names(krakenResult)
names(krakenSamples) <- krakenSamples
if(length(krakenSamples) > 1){
  skipMultiSampleKraken <- FALSE

  readFractionPerSample <- data.frame(sampleName = rep(krakenSamples[1], nrow(krakenResult[[krakenSamples[1]]])), 
                                           species = krakenResult[[krakenSamples[1]]]$name, 
                                           readPercentage = krakenResult[[krakenSamples[1]]]$readPercentage
                                           )
  for (j in 2:length(krakenSamples)){
      tmp <- data.frame(sampleName = rep(names(krakenSamples)[j], nrow(krakenResult[[krakenSamples[j]]])), 
                                           species = krakenResult[[krakenSamples[j]]]$name, 
                                           readPercentage = krakenResult[[krakenSamples[j]]]$readPercentage
                                           )
      readFractionPerSample <- rbind(readFractionPerSample, tmp)
  }
} else {
    skipMultiSampleKraken <- TRUE    
}

if(!skipMultiSampleKraken){
  m <- list(l = 80, r = 80, b = 200, t = 100, pad = 0)
  topSpecies <- sort(tapply(readFractionPerSample$readPercentage, readFractionPerSample$species, sum), decreasing = TRUE)

    if (length(topSpecies) > 0){
    i = 1
    data <- readFractionPerSample[readFractionPerSample$species == names(topSpecies)[i],]

    p <- plot_ly(x=data$sampleName,
             y=data$readPercentage,
             type="bar") %>%
    layout(title = paste0('TopHit', i,':', names(topSpecies)[i]),
           yaxis = list(title = "MappedReads in %", range = c(0,100)), margin = m)
    p
  }
}

if(!skipMultiSampleKraken){
  m <- list(l = 80, r = 80, b = 200, t = 100, pad = 0)
    if (length(topSpecies) > 1){
    i = 2
    data <- readFractionPerSample[readFractionPerSample$species == names(topSpecies)[i],]

    p <- plot_ly(x=data$sampleName,
             y=data$readPercentage,
             type="bar") %>%
    layout(title = paste0('TopHit', i,':', names(topSpecies)[i]),
           yaxis = list(title = "MappedReads in %", range = c(0,100)), margin = m)
    p
  }
}

if(!skipMultiSampleKraken){
  m <- list(l = 80, r = 80, b = 200, t = 100, pad = 0)
    if (length(topSpecies) > 2){
    i = 3
    data <- readFractionPerSample[readFractionPerSample$species == names(topSpecies)[i],]

    p <- plot_ly(x=data$sampleName,
             y=data$readPercentage,
             type="bar") %>%
    layout(title = paste0('TopHit', i,':', names(topSpecies)[i]),
           yaxis = list(title = "MappedReads in %", range = c(0,100)), margin = m)
    p
  }
}

2. Per sample barplots

Abundances above 5% are truncated

if(length(krakenSamples) > 100){
  message('Skipped: > 100 Samples')
} else if(length(krakenSamples) > 1){
  for (nm in krakenSamples){
      x = readFractionPerSample[readFractionPerSample$sampleName == nm, 'readPercentage']
      names(x) = readFractionPerSample[readFractionPerSample$sampleName == nm, 'species']
      par(mar=c(15.1, 6.1, 4.1, 2.1))
      if (length(x[x > 0]) > 0){
          bplot <- barplot(x[x > 0], names.arg = rep('',length(x[x>0])), col = 'royalblue3', 
                           main = nm, ylab = 'Mapped Reads in %', las = 2, ylim=c(0, 5))
          text(x = bplot, y = par("usr")[3] - 0.3, srt = 45, adj = 1, 
               labels = names(x)[x > 0], xpd = TRUE)
      }
  }
}

Human pathogens check

This check includes a Bowtie2 mapping and is currently only performed for human samples. It uses all unmapped reads of the FastqScreen mapping as input and maps these reads against several common human pathogens (Bacteria and Viruses).

if(param[['virusCheck']]){
  for (nm in samples){ 
    par(mar=c(18.1, 7.1, 2.1, 2.1))
    x = virusResult[[nm]]
    if (is.null(x)) x = matrix(0, 2, 1, dimnames=list(c('UniqueSpeciesHits','MultipleSpeciesHits'),'Misc'))
    bplot = barplot(t(x), col=c("royalblue3", "lightblue"), las = 2, ylim = c(0,100),
                    legend.text=T, ylab="Mapped Reads in %", main=nm, names.arg=rep('',nrow(x)) )
    text(y=t(x)[ 1,] + 5, x=bplot, font = 2, labels=t(x)[ 1, ], cex = 1.1, col = 'black')
    text(x = bplot, y = par("usr")[3] - 2, srt = 60, adj = 1, 
         labels = rownames(x), xpd = TRUE)
  }
} else {
  cat("not run")
}

Read Patterns R1

PWMs <- PWMresult[['R1']]
for (nm in names(PWMs)){
        cat(nm, "\n \n")
        seqLogo(PWMs[[nm]])
        cat("\n \n")
}

Read Patterns R2

PWMs <- PWMresult[['R2']]

for (nm in names(PWMs)){
        cat(nm, "\n \n")
        seqLogo(PWMs[[nm]])
        cat("\n \n")
}

Settings

getAppVer <- function(appName) { sub("^.+/([^/]+)$", "\\1", Sys.getenv(appName)) }
settings = character()
settings["Configuration File:"] = param$confFile
settings["RefSeq mRNA Reference:"] = REFSEQ_mRNA_REF
settings["FastqScreen Version:"] = getAppVer("FastQScreen")
settings["Bowtie2 Version:"] = getAppVer("Bowtie2")
settings["Bowtie2 Parameters:"] = param$cmdOptions
settings["Minimum AlignmentScore:"] = param$minAlignmentScore
settings["TopSpecies:"] = param$nTopSpecies
kable(as.data.frame(settings), col.names=NA, row.names=TRUE, format="html") %>%
  kable_styling(bootstrap_options = "striped", full_width = F,
                position = "left")

Input Dataset

ezInteractiveTableRmd(values=dataset)

SessionInfo

ezSessionInfo()

uzh/ezRun documentation built on Sept. 16, 2024, 11:21 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

uzh/ezRun
An R meta-package for the analysis of Next Generation Sequencing Data

In uzh/ezRun: An R meta-package for the analysis of Next Generation Sequencing Data

FastqScreen_Result {.tabset}

FastqScreen Mapping Rates to Adapters and rRNA

FastqScreen Mapping Per Sample

Mapping to RefSeq mRNA Per Sample

1. MultiSample Barplots for top hits

2. Per sample barplots

Mapping to Genomes with Kraken2

1. MultiSample Barplots for top hits

2. Per sample barplots

Human pathogens check

Read Patterns R1

Read Patterns R2

Settings

Input Dataset

SessionInfo

R Package Documentation

Browse R Packages

We want your feedback!

uzh/ezRun An R meta-package for the analysis of Next Generation Sequencing Data

In uzh/ezRun: An R meta-package for the analysis of Next Generation Sequencing Data

FastqScreen_Result {.tabset}

FastqScreen Mapping Rates to Adapters and rRNA

FastqScreen Mapping Per Sample

Mapping to RefSeq mRNA Per Sample

1. MultiSample Barplots for top hits

2. Per sample barplots

Mapping to Genomes with Kraken2

1. MultiSample Barplots for top hits

2. Per sample barplots

Human pathogens check

Read Patterns R1

Read Patterns R2

Settings

Input Dataset

SessionInfo

R Package Documentation

Browse R Packages

We want your feedback!

uzh/ezRun
An R meta-package for the analysis of Next Generation Sequencing Data