In uzh/ezRun: An R meta-package for the analysis of Next Generation Sequencing Data

knitr::opts_chunk$set(echo = FALSE)

## debug
#title: "`r paste('VirDetect:', input$getNames(), 'minimum ', param$minReadCount, 'mapped reads per genome')`"
#wddir <- "/misc/GT/analysis/gtan/debug/p2150-VirDetect/4-2"
wddir <- "."
#params = list(sample="4-2",
#              minReadCount=10)
## end of debug

Sample

#params$sample=input$getNames()
params$sample
#params$minReadCount=param$minReadCount

Contamination and virome mapping rates

Raw reads were trimed and filtered based on quality scores, further filtered by contaminated genomes (human and none-human host), before being mapped to the reference virome databases.

dat.file <- file.path(wddir, "bowtie2.log")
dat <- readLines(dat.file)
counts1 <- data.frame( Count=c(scan(text=dat[1], what="")[1], scan(text=dat[16], what="")[1], scan(text=dat[31], what="")[1]))
row.names(counts1)<-c("QCReads", "HostRemoved", "Host&HumanRemoved")
library(knitr)
kable(counts1, caption = "Filtered read counts")
counts2 <- data.frame( Count=c(scan(text=dat[34], what="")[1], scan(text=dat[35], what="")[1]))
row.names(counts2)<-c("UniqueMapped", "MultiMapped")
kable(counts2, caption = "Virome mapped read counts")
par(mfrow=c(1,2), mar = c(10,4,4,2))
barplot(t(as.matrix(counts1)), main="Host and human contamination rate", cex.main=0.7, cex.names=0.7, las=2)
barplot(t(as.matrix(counts2)), main="virome mapping rate", cex.main=0.7, cex.names=0.7, las=2)

Detected viral genomes in the virome database

list.file <- file.path(wddir, "summary_table.tsv")
if (file.exists(list.file)){
    list<-read.table(list.file, header=TRUE, stringsAsFactors=FALSE,
                 sep="\t")
    sub<-list[list$mappedReads>params$minReadCount, ]
    sub<-na.omit(sub)
    if (nrow(sub)!=0){
    library(knitr)
    kable(sub, row.names=FALSE, caption = "Smmmary table")
    }else{
    txt<-paste("No virus detected with more than", params$minReadCount, "reads.")
    txt
    }
}else{
    txt<-paste("No virus detected with at least one read.")
        txt
}

Bubble plot of all detected viral families

if (exists("sub")){
    if (nrow(sub)!=0){
        agg<-aggregate(cbind(mappedBases, genomeCov_pect, aveDepth) ~ Family, data=sub, mean)
        symbols(agg$genomeCov_pect, agg$aveDepth, circles=sqrt(agg$mappedBases/pi),
                inches=0.35, fg=factor(agg$Family), bg="white", 
                xlab="% genome coverage", ylab="Mean depth",
                xlim=c(min(agg$genomeCov_pect)-max(sqrt(agg$mappedBases/pi))*0.35,
                    max(agg$genomeCov_pect)+max(sqrt(agg$mappedBases/pi))*0.35),
                ylim=c(min(agg$aveDepth)-max(sqrt(agg$mappedBases/pi))*0.35,
                    max(agg$aveDepth)+max(sqrt(agg$mappedBases/pi))*0.35))
        text(agg$genomeCov_pect, agg$aveDepth,agg$Family, cex=0.7, 
            col = 1:length(agg$Family))
    }else{
        txt<-paste("No virus detected with more than", params$minReadCount, "reads.")
        txt
    }
}else{
        txt<-paste("No virus detected with at least one read.")
        txt
}

Bubble plot of viral species, grouped by viral families

if (exists("sub")){
    if (nrow(sub)!=0){
        mylist<-split(sub, factor(sub$Family))
        for (i in length(mylist):1){
                df<-mylist[[i]]
                symbols(df$genomeCov_pect, df$aveDepth, circles=sqrt(df$mappedBases/pi), inches=0.35, fg=i, bg="white", xlab="% genome coverage", ylab="Mean depth", xlim=c(min(df$genomeCov_pect)-max(sqrt(agg$mappedBases/pi))*0.35, max(df$genomeCov_pect)+max(sqrt(agg$mappedBases/pi))*0.35), ylim=c(min(df$aveDepth)-max(sqrt(agg$mappedBases/pi))*0.35, max(df$aveDepth)+max(sqrt(agg$mappedBases/pi))*0.35), main=unique(df$Family))
                text(df$genomeCov_pect, df$aveDepth,df$CommonName, cex=0.7, c=i)
        }
    }else{
        txt<-paste("No virus detected with more than", params$minReadCount, "reads.")
        txt
    }
}else{
        txt<-paste("No virus detected with at least one read.")
        txt
}

Coverage plot of individual viral genome, sorted by abundance

if (exists("sub")){
    if (nrow(sub)!=0){
        for(i in 1:nrow(sub)) {
            chr=sub$ID[i]
            common_name=sub$CommonName[i]
            csv.file<-file.path(wddir, paste0(chr, ".csv"))
            if (file.exists(csv.file)){
                cov<-read.table(csv.file, header=FALSE, sep="\t", quote="", stringsAsFactors=FALSE)
                plot(cov$V5, cov$V6, type="l", col="blue", xlab=common_name, ylab="Depth", cex=0.5)
            }
        }   
    }else{
                txt<-paste("No virus detected with more than", params$minReadCount, "reads.")
                txt
        }
}else{
        txt<-paste("No virus detected with at least one read.")
        txt
}