###################################################################
# Functional Genomics Center Zurich
# This code is distributed under the terms of the GNU General
# Public License Version 3, June 2007.
# The terms are available here: http://www.gnu.org/licenses/gpl.html
# www.fgcz.ch
##' @title exceRpt_smallRNA report app
##' @description Use this reference class to process exceRpt_smallRNA's output after execution and perform a QC.
##' @author Miquel Anglada Girotto
EzAppExceRptReport =
setRefClass( "EzAppExceRptReport",
contains = "EzApp",
methods = list(
initialize = function()
{
"Initializes the application using its specific defaults."
runMethod <<- ezMethodExceRptReport
name <<- "EzAppExceRptReport"
appDefaults <<- rbind(name=ezFrame(Type="character", DefaultValue="Excerpt_Report", Description=""))
}
)
)
##' @title execute exceRpt_smallRNA's output processing.
##' @description create QC plots and count smallRNAs detected after executing ezMethodExceRpt().
##' @param input ezDataFrame()
##' @param output ezDataFrame()
##' @param param list() environment parameters.
##' @author Miquel Anglada Girotto
ezMethodExceRptReport = function(input=NA, output=NA, param=NA){
## set report's working directory
setwdNew(basename(output$getColumn("Report")))
## Process output
processedOutputDir = "processed_output"
samplePathsFilt = input$getFullPaths("excerpt")
plots = processSamples(samplePaths = samplePathsFilt,
outputDir = processedOutputDir)
## list files generated
dataFiles = list.files(processedOutputDir, pattern = '*.txt')
## Create report
makeRmdReport(plots=plots, dataFiles=dataFiles, rmdFile = "excerpt.Rmd",
reportTitle = paste0(param$name))
}
##' @title generate smallRNA counts and QC.
##' @description Obtain counts and QC plots across all samples.
##' @param samplePaths <string> vector of all the files that need to be processed together.
##' @param outputDir <string> directory name where to output generated objects and report after processing.
##' @param getPlotsObjects <bool> TRUE to output plots as an object.
##' @author (Rob Kitchen) Miquel Anglada Girotto
processSamples = function(samplePaths, outputDir){
require(plyr)
require(gplots)
require(marray)
require(reshape2)
require(ggplot2)
require(tools)
require(Rgraphviz)
require(scales)
## delete -e from .stats (they do not appear in the ExampleData, but they do when I ran the program)
#delete_e(samplePaths)
## Get directories containing counts
samplePathList = sapply(samplePaths, list.dirs, recursive=F)
stopifnot(is.character(samplePathList))
stopifnot(file.exists(samplePathList))
## create output dir
dir.create(outputDir)
## reads, normalises, and saves individual sample results
sampleIDs = readData(samplePathList = samplePathList, output.dir = outputDir)
## plot the data
plotsList = PlotData(sampleIDs, outputDir)
return(invisible(plotsList))
}
##' @title Get smallRNA counts and stats
##' @description Get smallRNA counts and stats to return as final output and make QC plots.
##' @author (Rob Kitchen) Miquel Anglada Girotto
readData = function(samplePathList, output.dir){
##
## Create objects to contain the data
##
sample.data = vector(mode="list",length=length(samplePathList))
allIDs.calibrator = NULL
allIDs.miRNA = NULL
allIDs.tRNA = NULL
allIDs.piRNA = NULL
allIDs.gencode = NULL
allIDs.circularRNA = NULL
allIDs.exogenous_miRNA = NULL
allIDs.exogenous_rRNA = NULL
allIDs.exogenous_genomes = NULL
taxonomyInfo.exogenous_rRNA = NULL
taxonomyInfo.exogenous_genomes = NULL
mapping.stats = matrix(0,nrow=length(samplePathList),ncol=31, dimnames=list(1:length(samplePathList), c("input","successfully_clipped","passed_quality_filter","failed_homopolymer_filter","passed_phiX_filter","calibrator","UniVec_contaminants","rRNA","reads_used_for_alignment","genome","miRNA_sense","miRNA_antisense","miRNAprecursor_sense","miRNAprecursor_antisense","tRNA_sense","tRNA_antisense","piRNA_sense","piRNA_antisense","gencode_sense","gencode_antisense","circularRNA_sense","circularRNA_antisense","not_mapped_to_genome_or_libs","repetitiveElements","endogenous_gapped","input_to_exogenous_miRNA","exogenous_miRNA","input_to_exogenous_rRNA","exogenous_rRNA","input_to_exogenous_genomes","exogenous_genomes")))
qc.results = matrix(0,nrow=length(samplePathList),ncol=5, dimnames=list(1:length(samplePathList), c("InputReads","GenomeReads","TranscriptomeReads","TranscriptomeGenomeRatio","TranscriptomeComplexity")))
maxReadLength = 10000
read.lengths = matrix(0,nrow=length(samplePathList),ncol=maxReadLength+1,dimnames=list(1:length(samplePathList), 0:maxReadLength))
##
## Loop through all samples and read the pipeline output
##
printMessage(c("Reading sample data..."))
removeSamples = NULL
for(i in 1:length(samplePathList)){
thisSampleID = names(samplePathList)[i]
## Get timings and check this sample finished successfully
tmp.stats = readLines(paste(samplePathList[i],".stats",sep=""))
tmp.stats = delete_e(tmp.stats)
tmp.stats = do.call(rbind,strsplit(tmp.stats,'\t'))
tmp.stats = data.frame(tmp.stats,stringsAsFactors = FALSE)
#tmp.stats[c(1,nrow(tmp.stats)),2] = ""
x.start = 1 #grep("#STATS",tmp.stats[,1])
x.end = grep("#END OF STATS",tmp.stats[,1])
if(length(x.start) > 0 && length(x.end) > 0){
tmp.start = strptime(unlist(strsplit(tmp.stats[x.start[1],1],"Run started at "))[2],"%Y-%m-%d--%H:%M:%S")
tmp.end = strptime(unlist(strsplit(tmp.stats[x.end[1],1],"Run completed at "))[2],"%Y-%m-%d--%H:%M:%S")
runTiming = data.frame(start=tmp.start, completed=tmp.end, duration=difftime(tmp.end,tmp.start), duration_secs=as.numeric(difftime(tmp.end,tmp.start,units="secs")))
continue = T
}else{
continue = F
removeSamples = c(removeSamples, i)
printMessage(c("[",i,"/",length(samplePathList),"] WARNING: Incomplete run for sample \'",thisSampleID,"\', ignoring"))
}
if(continue == T){
##
## Read sample mapping stats
##
#tmp.stats = tmp.stats[-c(1,2,nrow(tmp.stats)),]
#tmp.stats[tmp.stats[,1] %in% "clipped", 1] = "successfully_clipped"
##tmp.stats = tmp.stats[-nrow(tmp.stats),]
tmp.stats <- tmp.stats[tmp.stats[,1] %in% colnames(mapping.stats),]
mapping.stats[i, match(tmp.stats[,1], colnames(mapping.stats))] = as.numeric(tmp.stats[,2])
rownames(mapping.stats)[i] = thisSampleID
##
## Read the QC result
##
adapterConfidence = NA
qcOutcome = NA
if(file.exists(paste(samplePathList[i],".qcResult",sep=""))){
tmp.qc = read.table(paste(samplePathList[i],".qcResult",sep=""), stringsAsFactors=F, fill=T, header=F, sep=" ",skip=0)
if(tmp.qc[1,1] == "Adapter_confidence:"){
adapterConfidence = tmp.qc[1,2]
tmp.qc = tmp.qc[-1,]
}
qcOutcome = tmp.qc[1,2]
qc.results[i, match(gsub(":$","",tmp.qc[-1,1]), colnames(qc.results))] = as.numeric(tmp.qc[-1,2])
#qc.results[i, ] = as.numeric(tmp.qc[-1,2])
rownames(qc.results)[i] = thisSampleID
}
##
## Read the adapter sequence
##
if(paste(thisSampleID,".adapterSeq",sep="") %in% dir(samplePathList[i])){
tmp.seq = try(read.table(paste(samplePathList[i],"/",thisSampleID,".adapterSeq",sep="")), silent=T)
if(class(tmp.seq) == "try-error"){
adapterSeq = NA
}else{
adapterSeq = as.character(tmp.seq[1,1])
}
} else {
adapterSeq = NA
}
##
## Read the calibrator counts, if available
##
calibratorCounts = NULL
if(paste(thisSampleID,".clipped.trimmed.filtered.calibratormapped.counts",sep="") %in% dir(samplePathList[i])){
calibratorCounts = try(read.table(paste(samplePathList[i],"/",thisSampleID,".clipped.trimmed.filtered.calibratormapped.counts",sep=""), stringsAsFactors=F)[,2:1], silent=T)
if(class(calibratorCounts) == "try-error"){
calibratorCounts = NULL
}else{
colnames(calibratorCounts) = c("calibratorID","readCount")
}
}
##
## Read the clipped read lengths
##
if(length(grep(".readLengths.txt$", dir(samplePathList[i]))) == 1){
tmp = read.table(paste(samplePathList[i], dir(samplePathList[i])[grep(".readLengths.txt$", dir(samplePathList[i]))], sep="/"))
read.lengths[i, 1:ncol(tmp)] = as.numeric(tmp[2,])
rownames(read.lengths)[i] = thisSampleID
}
##
## Read sample data
##
availableFiles = dir(samplePathList[i])
miRNA_sense=miRNA_antisense = tRNA_sense=tRNA_antisense = piRNA_sense=piRNA_antisense = gencode_sense=gencode_antisense = circRNA_sense=circRNA_antisense = NULL
if("readCounts_miRNAmature_sense.txt" %in% availableFiles){
miRNA_sense = read.table(paste(samplePathList[i],"readCounts_miRNAmature_sense.txt",sep="/"), header=T, sep="\t", comment.char="", stringsAsFactors=F,colClasses=c("character","numeric","numeric","numeric","numeric"), row.names=1)
miRNA_sense = cbind(miRNA_sense, ID=sapply(rownames(miRNA_sense), function(id){ multiID = unlist(strsplit(id,"\\|")); multiIDs = sapply(multiID, function(idPart){unlist(strsplit(idPart,":"))[1]}); if(length(multiIDs) == 1){ multiIDs }else{ paste(sort(multiIDs),collapse="|") } }))
}
if("readCounts_miRNAmature_antisense.txt" %in% availableFiles){
miRNA_antisense = read.table(paste(samplePathList[i],"readCounts_miRNAmature_antisense.txt",sep="/"), header=T, sep="\t", comment.char="", stringsAsFactors=F,colClasses=c("character","numeric","numeric","numeric","numeric"), row.names=1)
miRNA_antisense = cbind(miRNA_antisense, ID=sapply(rownames(miRNA_antisense), function(id){ multiID = unlist(strsplit(id,"\\|")); multiIDs = sapply(multiID, function(idPart){unlist(strsplit(idPart,":"))[1]}); if(length(multiIDs) == 1){ multiIDs }else{ paste(sort(multiIDs),collapse="|") } }))
}
if("readCounts_tRNA_sense.txt" %in% availableFiles){
tmp = read.table(paste(samplePathList[i],"readCounts_tRNA_sense.txt",sep="/"), header=T, sep="\t", comment.char="", stringsAsFactors=F,colClasses=c("character","numeric","numeric","numeric","numeric"), row.names=1)
tmp = cbind(tmp, ID=sapply(rownames(tmp), function(id){ unlist(strsplit(id,"-"))[2] }))
tRNA_sense = ddply(tmp, "ID", function(mat){ c(as.numeric(mat[1,1:2]),sum(mat$multimapAdjustedReadCount),sum(mat$multimapAdjustedBarcodeCount)) })
colnames(tRNA_sense)[-1] = colnames(tmp)[1:4]
tRNA_sense = tRNA_sense[order(tRNA_sense$multimapAdjustedReadCount,decreasing=T), ]
}
if("readCounts_tRNA_antisense.txt" %in% availableFiles){
tmp = read.table(paste(samplePathList[i],"readCounts_tRNA_antisense.txt",sep="/"), header=T, sep="\t", comment.char="", stringsAsFactors=F,colClasses=c("character","numeric","numeric","numeric","numeric"), row.names=1)
tmp = cbind(tmp, ID=sapply(rownames(tmp), function(id){ unlist(strsplit(id,"-"))[2] }))
tRNA_antisense = ddply(tmp, "ID", function(mat){ c(as.numeric(mat[1,1:2]),sum(mat$multimapAdjustedReadCount),sum(mat$multimapAdjustedBarcodeCount)) })
colnames(tRNA_antisense)[-1] = colnames(tmp)[1:4]
tRNA_antisense = tRNA_antisense[order(tRNA_antisense$multimapAdjustedReadCount,decreasing=T), ]
}
if("readCounts_piRNA_sense.txt" %in% availableFiles){
piRNA_sense = read.table(paste(samplePathList[i],"readCounts_piRNA_sense.txt",sep="/"), header=T, sep="\t", comment.char="", stringsAsFactors=F,colClasses=c("character","numeric","numeric","numeric","numeric"), row.names=1)
}
if("readCounts_piRNA_antisense.txt" %in% availableFiles){
piRNA_antisense = read.table(paste(samplePathList[i],"readCounts_piRNA_antisense.txt",sep="/"), header=T, sep="\t", comment.char="", stringsAsFactors=F,colClasses=c("character","numeric","numeric","numeric","numeric"), row.names=1)
}
makeGeneID = function(id){
bits=unlist(strsplit(id,":")); geneNameBits=unlist(strsplit(bits[3],"-"));
geneName = geneNameBits[1]
if(length(geneNameBits) > 2){ geneName=paste(geneNameBits[-length(geneNameBits)],collapse="-") }
paste(geneName,bits[2],sep=":")
}
if("readCounts_gencode_sense.txt" %in% availableFiles){
tmp = read.table(paste(samplePathList[i],"readCounts_gencode_sense.txt",sep="/"), header=T, sep="\t", comment.char="", stringsAsFactors=F,colClasses=c("character","numeric","numeric","numeric","numeric"), row.names=1)
tmp = cbind(tmp, ID=sapply(rownames(tmp), makeGeneID))
gencode_sense = ddply(tmp, "ID", function(mat){ c(as.numeric(mat[1,1:2]),sum(mat$multimapAdjustedReadCount),sum(mat$multimapAdjustedBarcodeCount)) })
colnames(gencode_sense)[-1] = colnames(tmp)[1:4]
gencode_sense = gencode_sense[order(gencode_sense$multimapAdjustedReadCount,decreasing=T), ]
}
if("readCounts_gencode_antisense.txt" %in% availableFiles){
tmp = read.table(paste(samplePathList[i],"readCounts_gencode_antisense.txt",sep="/"), header=T, sep="\t", comment.char="", stringsAsFactors=F,colClasses=c("character","numeric","numeric","numeric","numeric"), row.names=1)
tmp = cbind(tmp, ID=sapply(rownames(tmp), makeGeneID))
gencode_antisense = ddply(tmp, "ID", function(mat){ c(as.numeric(mat[1,1:2]),sum(mat$multimapAdjustedReadCount),sum(mat$multimapAdjustedBarcodeCount)) })
colnames(gencode_antisense)[-1] = colnames(tmp)[1:4]
gencode_antisense = gencode_antisense[order(gencode_antisense$multimapAdjustedReadCount,decreasing=T), ]
}
if("readCounts_circRNA_sense.txt" %in% availableFiles){
circRNA_sense = read.table(paste(samplePathList[i],"readCounts_circRNA_sense.txt",sep="/"), header=T, sep="\t", comment.char="", stringsAsFactors=F,colClasses=c("character","numeric","numeric","numeric","numeric"), row.names=1)
}
if("readCounts_circRNA_antisense.txt" %in% availableFiles){
circRNA_antisense = read.table(paste(samplePathList[i],"readCounts_circRNA_antisense.txt",sep="/"), header=T, sep="\t", comment.char="", stringsAsFactors=F,colClasses=c("character","numeric","numeric","numeric","numeric"), row.names=1)
}
##
## Read exogenous miRNA alignments (if applicable)
##
exogenous_miRNA_sense = NA
exogenous_miRNA_IDs = NULL
if("EXOGENOUS_miRNA" %in% availableFiles){
tmp.dir = paste(samplePathList[i],"EXOGENOUS_miRNA",sep="/")
if("readCounts_miRNAmature_sense.txt" %in% dir(tmp.dir)){
exogenous_miRNA_sense = read.table(paste(tmp.dir,"readCounts_miRNAmature_sense.txt",sep="/"), header=T, sep="\t", comment.char="", stringsAsFactors=F,colClasses=c("character","numeric","numeric","numeric","numeric"), row.names=1)
exogenous_miRNA_IDs = rownames(exogenous_miRNA_sense)
}
if("readCounts_miRNAmature_antisense.txt" %in% dir(tmp.dir)){
exogenous_miRNA_antisense = read.table(paste(tmp.dir,"readCounts_miRNAmature_antisense.txt",sep="/"), header=T, sep="\t", comment.char="", stringsAsFactors=F,colClasses=c("character","numeric","numeric","numeric","numeric"), row.names=1)
}
}
##
## Read exogenous rRNA alignments (if applicable)
##
exogenous_rRNA = NA
exogenous_rRNA_IDs = NULL
if("EXOGENOUS_rRNA" %in% availableFiles){
tmp.dir = paste(samplePathList[i],"EXOGENOUS_rRNA",sep="/")
if("ExogenousRibosomalAlignments.result.taxaAnnotated.txt" %in% dir(tmp.dir)){
exogenous_rRNA = try(read.table(paste(tmp.dir,"/ExogenousRibosomalAlignments.result.taxaAnnotated.txt",sep=""), sep="\t", stringsAsFactors = F, quote="", comment.char="",header=T), silent=T)
if(class(exogenous_rRNA) == "try-error"){
exogenous_rRNA = NULL
}#else{
# colnames(exogenous_rRNA) = c("indent","distFromRoot","level","name","uniqueReads","allSumReads")
#}
## remove the 'Bacteria' stick insect!
i.toRemove = which(exogenous_rRNA$name == "Bacteria" & exogenous_rRNA$level == "genus")
if(length(i.toRemove) > 0)
exogenous_rRNA = exogenous_rRNA[-i.toRemove, ]
exogenous_rRNA_IDs = exogenous_rRNA$name
taxonomyInfo.exogenous_rRNA = unique(rbind(taxonomyInfo.exogenous_rRNA, exogenous_rRNA[,1:5]))
}
}
##
## Read exogenous genome alignments (if applicable)
##
exogenous_genomes = NA
exogenous_genomes_IDs = NULL
if("EXOGENOUS_genomes" %in% availableFiles){
tmp.dir = paste(samplePathList[i],"EXOGENOUS_genomes",sep="/")
if("ExogenousGenomicAlignments.result.taxaAnnotated.txt" %in% dir(tmp.dir)){
exogenous_genomes = read.table(paste(tmp.dir,"/ExogenousGenomicAlignments.result.taxaAnnotated.txt",sep=""), sep="\t", stringsAsFactors = F, quote="", comment.char="", header=T)
if(class(exogenous_genomes) == "try-error"){
exogenous_genomes = NULL
}#else{
# colnames(exogenous_genomes) = c("indent","distFromRoot","level","name","uniqueReads","allSumReads")
#}
i.toRemove = which(exogenous_genomes$name == "Bacteria" & exogenous_genomes$level == "genus")
if(length(i.toRemove) > 0)
exogenous_genomes = exogenous_genomes[-i.toRemove, ]
exogenous_genomes_IDs = exogenous_genomes$name
taxonomyInfo.exogenous_genomes = unique(rbind(taxonomyInfo.exogenous_genomes, exogenous_genomes[,1:5]))
}
}
# Update list of detected smallRNA IDs
allIDs.calibrator = unique(c(allIDs.calibrator, as.character(calibratorCounts$calibratorID)))
allIDs.miRNA = unique(c(allIDs.miRNA, as.character(miRNA_sense$ID)))
allIDs.tRNA = unique(c(allIDs.tRNA, as.character(tRNA_sense$ID)))
allIDs.piRNA = unique(c(allIDs.piRNA, rownames(piRNA_sense)))
allIDs.gencode = unique(c(allIDs.gencode, as.character(gencode_sense$ID)))
allIDs.circularRNA = unique(c(allIDs.circularRNA, rownames(circRNA_sense)))
allIDs.exogenous_miRNA = unique(c(allIDs.exogenous_miRNA, exogenous_miRNA_IDs))
allIDs.exogenous_rRNA = unique(c(allIDs.exogenous_rRNA, exogenous_rRNA_IDs))
allIDs.exogenous_genomes = unique(c(allIDs.exogenous_genomes, exogenous_genomes_IDs))
sample.data[[i]] = list("miRNA_sense"=miRNA_sense,"miRNA_antisense"=miRNA_antisense, "tRNA_sense"=tRNA_sense,"tRNA_antisense"=tRNA_antisense, "piRNA_sense"=piRNA_sense,"piRNA_antisense"=piRNA_antisense, "gencode_sense"=gencode_sense,"gencode_antisense"=gencode_antisense, "circRNA_sense"=circRNA_sense,"circRNA_antisense"=circRNA_antisense, "exogenous_miRNA_sense"=exogenous_miRNA_sense, "exogenous_rRNA"=exogenous_rRNA, "exogenous_genomes"=exogenous_genomes, "adapterSeq"=adapterSeq, "adapterConfidence"=adapterConfidence, "qcOutcome"=qcOutcome, "runTiming"=runTiming, "calibratorCounts"=calibratorCounts)
names(sample.data)[i] = thisSampleID
printMessage(c("[",i,"/",length(samplePathList),"] Added sample \'",thisSampleID,"\'"))
}
}
##
## Remove failed/incomplete samples
##
stopifnot(length(removeSamples) < length(sample.data))
if(length(removeSamples) > 0){
read.lengths = read.lengths[-removeSamples, ]
sample.data = sample.data[-removeSamples]
mapping.stats = mapping.stats[-removeSamples,]
qc.results = qc.results[-removeSamples,]
}
##
## Trim read-length matrix
##
read.lengths = read.lengths[,0:(max(as.numeric(colnames(read.lengths[, colSums(read.lengths) > 0, drop=F])))+1), drop=F]
#read.lengths = read.lengths[,colSums(read.lengths) > 0, drop=F]
##
## Collect IDs
##
allIDs = list("calibrator"=allIDs.calibrator, "miRNA_sense"=allIDs.miRNA, "tRNA_sense"=allIDs.tRNA, "piRNA_sense"=allIDs.piRNA, "gencode_sense"=allIDs.gencode, "circRNA_sense"=allIDs.circularRNA, "exogenous_miRNA"=allIDs.exogenous_miRNA, "exogenous_rRNA"=allIDs.exogenous_rRNA, "exogenous_genomes"=allIDs.exogenous_genomes)
##
## Convert sample data to large per-smallRNA expression matrices
##
printMessage("Creating raw read-count matrices for available libraries")
#run.duration = data.frame(runDuration_string=rep("",length(sample.data)), runDuration_secs=rep(0,length(sample.data)),stringsAsFactors = F)
run.duration = data.frame(runDuration_secs=rep(0,length(sample.data)),stringsAsFactors = F)
rownames(run.duration) = names(sample.data)
exprs.calibrator = matrix(0,ncol=length(sample.data),nrow=length(allIDs$calibrator), dimnames=list(allIDs$calibrator, names(sample.data)))
exprs.miRNA = matrix(0,ncol=length(sample.data),nrow=length(allIDs$miRNA_sense), dimnames=list(allIDs$miRNA_sense, names(sample.data)))
exprs.tRNA = matrix(0,ncol=length(sample.data),nrow=length(allIDs$tRNA_sense), dimnames=list(allIDs$tRNA_sense, names(sample.data)))
exprs.piRNA = matrix(0,ncol=length(sample.data),nrow=length(allIDs$piRNA_sense), dimnames=list(allIDs$piRNA_sense, names(sample.data)))
exprs.gencode = matrix(0,ncol=length(sample.data),nrow=length(allIDs$gencode_sense), dimnames=list(allIDs$gencode_sense, names(sample.data)))
exprs.circRNA = matrix(0,ncol=length(sample.data),nrow=length(allIDs$circRNA_sense), dimnames=list(allIDs$circRNA_sense, names(sample.data)))
exprs.exogenous_miRNA = matrix(0,ncol=length(sample.data),nrow=length(allIDs$exogenous_miRNA), dimnames=list(allIDs$exogenous_miRNA, names(sample.data)))
if(is.null(taxonomyInfo.exogenous_rRNA))
tmp.nrow = 0
else
tmp.nrow = nrow(taxonomyInfo.exogenous_rRNA)
exprs.exogenousRibosomal_specific = matrix(0,ncol=length(sample.data),nrow=tmp.nrow, dimnames=list(taxonomyInfo.exogenous_rRNA$ID, names(sample.data)))
exprs.exogenousRibosomal_cumulative = matrix(0,ncol=length(sample.data),nrow=tmp.nrow, dimnames=list(taxonomyInfo.exogenous_rRNA$ID, names(sample.data)))
if(is.null(taxonomyInfo.exogenous_genomes))
tmp.nrow = 0
else
tmp.nrow = nrow(taxonomyInfo.exogenous_genomes)
exprs.exogenousGenomes_specific = matrix(0,ncol=length(sample.data),nrow=tmp.nrow, dimnames=list(taxonomyInfo.exogenous_genomes$ID, names(sample.data)))
exprs.exogenousGenomes_cumulative = matrix(0,ncol=length(sample.data),nrow=tmp.nrow, dimnames=list(taxonomyInfo.exogenous_genomes$ID, names(sample.data)))
for(i in 1:length(sample.data)){
run.duration[i,] = sample.data[[i]]$runTiming[1,4,drop=F]
if(!is.null(nrow(sample.data[[i]]$calibratorCounts)))
exprs.calibrator[match(sample.data[[i]]$calibratorCounts$calibratorID, rownames(exprs.calibrator)),i] = as.numeric(sample.data[[i]]$calibratorCounts$readCount)
exprs.miRNA[match(sample.data[[i]]$miRNA_sense$ID, rownames(exprs.miRNA)),i] = as.numeric(sample.data[[i]]$miRNA_sense$multimapAdjustedReadCount)
exprs.tRNA[match(sample.data[[i]]$tRNA_sense$ID, rownames(exprs.tRNA)),i] = as.numeric(sample.data[[i]]$tRNA_sense$multimapAdjustedReadCount)
exprs.piRNA[match(rownames(sample.data[[i]]$piRNA_sense), rownames(exprs.piRNA)),i] = as.numeric(sample.data[[i]]$piRNA_sense$multimapAdjustedReadCount)
exprs.gencode[match(sample.data[[i]]$gencode_sense$ID, rownames(exprs.gencode)),i] = as.numeric(sample.data[[i]]$gencode_sense$multimapAdjustedReadCount)
exprs.circRNA[match(rownames(sample.data[[i]]$circRNA_sense), rownames(exprs.circRNA)),i] = as.numeric(sample.data[[i]]$circRNA_sense$multimapAdjustedReadCount)
## Exogenous miRNA
if(!is.null(nrow(sample.data[[i]]$exogenous_miRNA)))
exprs.exogenous_miRNA[match(rownames(sample.data[[i]]$exogenous_miRNA), rownames(exprs.exogenous_miRNA)),i] = as.numeric(sample.data[[i]]$exogenous_miRNA$multimapAdjustedReadCount)
## Exogenous rRNA
if(!is.null(nrow(sample.data[[i]]$exogenous_rRNA))){
exprs.exogenousRibosomal_specific[match(sample.data[[i]]$exogenous_rRNA$ID, rownames(exprs.exogenousRibosomal_specific)),i] = as.numeric(sample.data[[i]]$exogenous_rRNA$readCount_direct)
exprs.exogenousRibosomal_cumulative[match(sample.data[[i]]$exogenous_rRNA$ID, rownames(exprs.exogenousRibosomal_cumulative)),i] = as.numeric(sample.data[[i]]$exogenous_rRNA$readCount_inherited)
}
## Exogenous Genomes
if(!is.null(nrow(sample.data[[i]]$exogenous_genomes))){
exprs.exogenousGenomes_specific[match(sample.data[[i]]$exogenous_genomes$ID, rownames(exprs.exogenousGenomes_specific)),i] = as.numeric(sample.data[[i]]$exogenous_genomes$readCount_direct)
exprs.exogenousGenomes_cumulative[match(sample.data[[i]]$exogenous_genomes$ID, rownames(exprs.exogenousGenomes_cumulative)),i] = as.numeric(sample.data[[i]]$exogenous_genomes$readCount_inherited)
}
}
##
## Calculate the total number of mapped reads to the rRNA, genome, and exogenous sequences
##
mapping.stats[is.na(mapping.stats)] = 0
mapping.stats = as.data.frame(mapping.stats)
libSizes = list()
libSizes$input = mapping.stats[,colnames(mapping.stats) %in% c("input")]
libSizes$successfully_clipped = mapping.stats[,colnames(mapping.stats) %in% c("successfully_clipped")]
libSizes$reads_used_for_alignment = mapping.stats[,colnames(mapping.stats) %in% c("reads_used_for_alignment")]
libSizes$all = rowSums(mapping.stats[,colnames(mapping.stats) %in% c("rRNA","genome","miRNA_exogenous_sense")])
libSizes$endogenous = rowSums(mapping.stats[,colnames(mapping.stats) %in% c("rRNA","genome")])
libSizes$genome = mapping.stats[,colnames(mapping.stats) %in% "genome"]
libSizes$smRNA = mapping.stats[,grep("sense",colnames(mapping.stats))]
libSizes$miRNA = colSums(exprs.miRNA)
libSizes$exogenous_miRNA = colSums(exprs.exogenous_miRNA)
libSizes$exogenous_rRNA = exprs.exogenousRibosomal_cumulative[rownames(exprs.exogenousRibosomal_cumulative)=="1",]
libSizes$exogenous_genomes = exprs.exogenousGenomes_cumulative[rownames(exprs.exogenousGenomes_cumulative)=="1",]
##
## Save the raw count data
##
printMessage("Saving raw data to disk")
#save(exprs.miRNA, exprs.tRNA, exprs.piRNA, exprs.gencode, exprs.circRNA, exprs.exogenous_miRNA, exprs.exogenous_genomes, mapping.stats, libSizes, read.lengths, file=paste(output.dir, "exceRpt_smallRNAQuants_ReadCounts.RData", sep="/"))
save(exprs.miRNA, exprs.tRNA, exprs.piRNA, exprs.gencode, exprs.circRNA, exprs.exogenous_miRNA, exprs.exogenousRibosomal_specific, exprs.exogenousRibosomal_cumulative, taxonomyInfo.exogenous_rRNA, exprs.exogenousGenomes_specific, exprs.exogenousGenomes_cumulative, taxonomyInfo.exogenous_genomes, mapping.stats, qc.results, libSizes, read.lengths, run.duration, exprs.calibrator, file=paste(output.dir, "exceRpt_smallRNAQuants_ReadCounts.RData", sep="/"))
if(nrow(exprs.calibrator) > 0)
write.table(exprs.calibrator, file=paste(output.dir, "exceRpt_CALIBRATOR_ReadCounts.txt", sep="/"), sep="\t", col.names=NA, quote=F)
if(nrow(exprs.miRNA) > 0)
write.table(exprs.miRNA, file=paste(output.dir, "exceRpt_miRNA_ReadCounts.txt", sep="/"), sep="\t", col.names=NA, quote=F)
if(nrow(exprs.tRNA) > 0)
write.table(exprs.tRNA, file=paste(output.dir, "exceRpt_tRNA_ReadCounts.txt", sep="/"), sep="\t", col.names=NA, quote=F)
if(nrow(exprs.piRNA) > 0)
write.table(exprs.piRNA, file=paste(output.dir, "exceRpt_piRNA_ReadCounts.txt", sep="/"), sep="\t", col.names=NA, quote=F)
if(nrow(exprs.gencode) > 0)
write.table(exprs.gencode, file=paste(output.dir, "exceRpt_gencode_ReadCounts.txt", sep="/"), sep="\t", col.names=NA, quote=F)
if(nrow(exprs.circRNA) > 0)
write.table(exprs.circRNA, file=paste(output.dir, "exceRpt_circularRNA_ReadCounts.txt", sep="/"), sep="\t", col.names=NA, quote=F)
if(nrow(exprs.exogenous_miRNA) > 0)
write.table(exprs.exogenous_miRNA, file=paste(output.dir, "exceRpt_exogenous_miRNA_ReadCounts.txt", sep="/"), sep="\t", col.names=NA, quote=F)
if(nrow(exprs.exogenousRibosomal_specific) > 0){
tmp = cbind(taxonomyInfo.exogenous_rRNA[match(rownames(exprs.exogenousRibosomal_specific), taxonomyInfo.exogenous_rRNA$ID), ], exprs.exogenousRibosomal_specific)
write.table(tmp, file=paste(output.dir, "exceRpt_exogenousRibosomal_taxonomySpecific_ReadCounts.txt", sep="/"), sep="\t", row.names=F, quote=F)
tmp = cbind(taxonomyInfo.exogenous_rRNA[match(rownames(exprs.exogenousRibosomal_cumulative), taxonomyInfo.exogenous_rRNA$ID), ], exprs.exogenousRibosomal_cumulative)
write.table(tmp, file=paste(output.dir, "exceRpt_exogenousRibosomal_taxonomyCumulative_ReadCounts.txt", sep="/"), sep="\t", row.names=F, quote=F)
}
if(nrow(exprs.exogenousGenomes_specific) > 0){
tmp = cbind(taxonomyInfo.exogenous_genomes[match(rownames(exprs.exogenousGenomes_specific), taxonomyInfo.exogenous_genomes$ID), ], exprs.exogenousGenomes_specific)
write.table(tmp, file=paste(output.dir, "exceRpt_exogenousGenomes_taxonomySpecific_ReadCounts.txt", sep="/"), sep="\t", row.names=F, quote=F)
tmp = cbind(taxonomyInfo.exogenous_genomes[match(rownames(exprs.exogenousGenomes_cumulative), taxonomyInfo.exogenous_genomes$ID), ], exprs.exogenousGenomes_cumulative)
write.table(tmp, file=paste(output.dir, "exceRpt_exogenousGenomes_taxonomyCumulative_ReadCounts.txt", sep="/"), sep="\t", row.names=F, quote=F)
}
write.table(read.lengths, file=paste(output.dir, "exceRpt_ReadLengths.txt", sep="/"), sep="\t", col.names=NA, quote=F)
##
## Keep a record of the adapter sequences for QC
##
adapterSeq = unlist(lapply(sample.data, function(l){ l$adapterSeq }))
write.table(as.data.frame(adapterSeq), file=paste(output.dir, "exceRpt_adapterSequences.txt", sep="/"), sep="\t", col.names=NA, quote=F)
##
## Write the numbers of reads mapping at each stage and the QC results
##
write.table(mapping.stats, file=paste(output.dir,"exceRpt_readMappingSummary.txt",sep="/"), sep="\t", col.names=NA, quote=F)
write.table(qc.results, file=paste(output.dir,"exceRpt_QCresults.txt",sep="/"), sep="\t", col.names=NA, quote=F)
##
## Calculate reads per million (RPM)
##
printMessage("Normalising to RPM")
#libSize.use = libSizes$all
#libSize.use = libSizes$miRNA
libSize.use = libSizes$genome
exprs.miRNA.rpm = t(10^6 * t(exprs.miRNA) / libSize.use)
exprs.tRNA.rpm = t(10^6 * t(exprs.tRNA) / libSize.use)
exprs.piRNA.rpm = t(10^6 * t(exprs.piRNA) / libSize.use)
exprs.gencode.rpm = t(10^6 * t(exprs.gencode) / libSize.use)
exprs.circRNA.rpm = t(10^6 * t(exprs.circRNA) / libSize.use)
exprs.exogenous_miRNA.rpm = t(10^6 * t(exprs.exogenous_miRNA) / libSizes$exogenous_miRNA)
exprs.exogenousRibosomal_specific.rpm = exprs.exogenousRibosomal_specific
exprs.exogenousRibosomal_cumulative.rpm = exprs.exogenousRibosomal_cumulative
if(nrow(exprs.exogenousRibosomal_specific) > 0){
exprs.exogenousRibosomal_specific.rpm = t(10^6 * t(exprs.exogenousRibosomal_specific) / libSizes$exogenous_rRNA)
exprs.exogenousRibosomal_cumulative.rpm = t(10^6 * t(exprs.exogenousRibosomal_cumulative) / libSizes$exogenous_rRNA)
}
exprs.exogenousGenomes_specific.rpm = exprs.exogenousGenomes_specific
exprs.exogenousGenomes_cumulative.rpm = exprs.exogenousGenomes_cumulative
if(nrow(exprs.exogenousGenomes_specific) > 0){
exprs.exogenousGenomes_specific.rpm = t(10^6 * t(exprs.exogenousGenomes_specific) / libSizes$exogenous_genomes)
exprs.exogenousGenomes_cumulative.rpm = t(10^6 * t(exprs.exogenousGenomes_cumulative) / libSizes$exogenous_genomes)
}
##
## Save the RPM normalised data
##
printMessage("Saving normalised data to disk")
save(exprs.miRNA.rpm, exprs.tRNA.rpm, exprs.piRNA.rpm, exprs.gencode.rpm, exprs.circRNA.rpm, exprs.exogenous_miRNA.rpm, exprs.exogenousRibosomal_specific.rpm, exprs.exogenousRibosomal_cumulative.rpm, exprs.exogenousGenomes_specific.rpm, exprs.exogenousGenomes_cumulative.rpm, file=paste(output.dir, "exceRpt_smallRNAQuants_ReadsPerMillion.RData", sep="/"))
if(nrow(exprs.miRNA.rpm) > 0)
write.table(exprs.miRNA.rpm, file=paste(output.dir, "exceRpt_miRNA_ReadsPerMillion.txt", sep="/"), sep="\t", col.names=NA, quote=F)
if(nrow(exprs.tRNA.rpm) > 0)
write.table(exprs.tRNA.rpm, file=paste(output.dir, "exceRpt_tRNA_ReadsPerMillion.txt", sep="/"), sep="\t", col.names=NA, quote=F)
if(nrow(exprs.piRNA.rpm) > 0)
write.table(exprs.piRNA.rpm, file=paste(output.dir, "exceRpt_piRNA_ReadsPerMillion.txt", sep="/"), sep="\t", col.names=NA, quote=F)
if(nrow(exprs.gencode.rpm) > 0)
write.table(exprs.gencode.rpm, file=paste(output.dir, "exceRpt_gencode_ReadsPerMillion.txt", sep="/"), sep="\t", col.names=NA, quote=F)
if(nrow(exprs.circRNA.rpm) > 0)
write.table(exprs.circRNA.rpm, file=paste(output.dir, "exceRpt_circularRNA_ReadsPerMillion.txt", sep="/"), sep="\t", col.names=NA, quote=F)
if(nrow(exprs.exogenous_miRNA.rpm) > 0)
write.table(exprs.exogenous_miRNA.rpm, file=paste(output.dir, "exceRpt_exogenous_miRNA_ReadsPerMillion.txt", sep="/"), sep="\t", col.names=NA, quote=F)
if(nrow(exprs.exogenousRibosomal_specific) > 0){
tmp = cbind(taxonomyInfo.exogenous_rRNA[match(rownames(exprs.exogenousRibosomal_specific.rpm), taxonomyInfo.exogenous_rRNA$ID), ], exprs.exogenousRibosomal_specific.rpm)
write.table(tmp, file=paste(output.dir, "exceRpt_exogenousRibosomal_taxonomySpecific_ReadsPerMillion.txt", sep="/"), sep="\t", row.names=F, quote=F)
tmp = cbind(taxonomyInfo.exogenous_rRNA[match(rownames(exprs.exogenousRibosomal_cumulative.rpm), taxonomyInfo.exogenous_rRNA$ID), ], exprs.exogenousRibosomal_cumulative.rpm)
write.table(tmp, file=paste(output.dir, "exceRpt_exogenousRibosomal_taxonomyCumulative_ReadsPerMillion.txt", sep="/"), sep="\t", row.names=F, quote=F)
}
if(nrow(exprs.exogenousGenomes_specific) > 0){
tmp = cbind(taxonomyInfo.exogenous_genomes[match(rownames(exprs.exogenousGenomes_specific.rpm), taxonomyInfo.exogenous_genomes$ID), ], exprs.exogenousGenomes_specific.rpm)
write.table(tmp, file=paste(output.dir, "exceRpt_exogenousGenomes_taxonomySpecific_ReadsPerMillion.txt", sep="/"), sep="\t", row.names=F, quote=F)
tmp = cbind(taxonomyInfo.exogenous_genomes[match(rownames(exprs.exogenousGenomes_cumulative.rpm), taxonomyInfo.exogenous_genomes$ID), ], exprs.exogenousGenomes_cumulative.rpm)
write.table(tmp, file=paste(output.dir, "exceRpt_exogenousGenomes_taxonomyCumulative_ReadsPerMillion.txt", sep="/"), sep="\t", row.names=F, quote=F)
}
return(rownames(mapping.stats))
}
##' @title delete "-e " strings
##' @description delete "-e " strings in *.stats processed output document to create the required dataframe
##' @description for downstream processing
##' @param linesRead <string> vector output from readLines()
##' @author Miquel Anglada Girotto
delete_e = function(linesRead){
f = gsub('-e ','',linesRead)
return(f)
}
##' @title print info messages
##' @description to print formatted info messages.
##' @param message <string>
##' @author (Rob Kitchen)
printMessage = function(message=""){
cat(as.character(Sys.time()),": ",paste(message,sep=""),"\n",sep="")
}
##' @title Make QC plots
##' @description Create QC plots from data obtained through readData()
##' @param sampleIDs <string> names of the samples obtained through readData()
##' @param output.dir <string> directory to which output the files to create the report.
##' @param sampleGroups <string> if any sample groups were defined. Deprecated.
##' @param minPercent_exogenousRibosomal <double> Not used.
##' @param minPercent_exogenousGenomes <double> Not used.
##' @author (Rob Kitchen) Miquel Anglada Girotto
PlotData = function(sampleIDs, output.dir, sampleGroups=NA, minPercent_exogenousRibosomal=0.5, minPercent_exogenousGenomes=0.5){
load(paste(output.dir, "exceRpt_smallRNAQuants_ReadCounts.RData", sep="/"))
load(paste(output.dir, "exceRpt_smallRNAQuants_ReadsPerMillion.RData", sep="/"))
##
## Order samples based on similarity of mapping statistics
##
sampleOrder = 1
if(nrow(mapping.stats) > 1){
h = hclust(dist(1-cor(t(mapping.stats))))
sampleOrder = h$order
}
##
## Create list where to save plots that can be returned
##
plotsList = list()
##
## Open PDF for diagnostic plots
##
printMessage("Creating QC plots")
pdf(paste(output.dir,"exceRpt_DiagnosticPlots.pdf",sep="/"), height=10, width=20)
if(ncol(read.lengths) > 1){
printMessage("Plotting read-length distributions")
##
## plot distribution of clipped read lengths - read count
##
tmp = melt(read.lengths); colnames(tmp) = c("sample","length","count")
if(is.data.frame(sampleGroups)){ tmp$sampleGroup = sampleGroups[match(tmp$sample, sampleGroups$sampleID), 2] }
tmp$sample <- factor(tmp$sample)
maxX = min(c(100,max(tmp$length)))
p = ggplot(tmp, aes(x=length, y=count, color=sample)) + geom_line(alpha=1) +
xlab("read length (nt)") + ylab("# reads") +
ggtitle("read-length distributions:\nraw read count")+
scale_x_continuous(limits=c(15,maxX), minor_breaks=1:maxX, breaks=seq(15,maxX,by=5))+
scale_colour_manual(values=getSampleColors(tmp$sample))
if(nrow(read.lengths) > 30){ p = p +guides(colour=FALSE) }
if(is.data.frame(sampleGroups)){ p = p +facet_wrap(~sampleGroup,ncol=1)}
print(p)
# save
plotsList[["read-length distributions: raw read count"]] = p
##
## plot distribution of clipped read lengths - fraction
##
tmp = melt(t(apply(read.lengths, 1, function(row){ row/sum(row) }))); colnames(tmp) = c("sample","length","fraction")
if(is.data.frame(sampleGroups)){ tmp$sampleGroup = sampleGroups[match(tmp$sample, sampleGroups$sampleID), 2] }
tmp$sample <- factor(tmp$sample)
maxX = min(c(100,max(tmp$length)))
p = ggplot(tmp, aes(x=length, y=fraction, colour=sample)) +geom_line(alpha=1) +xlab("read length (nt)") +ylab("fraction of reads") +ggtitle("read-length distributions:\nnormalised read fraction") +scale_x_continuous(limits=c(15,maxX), minor_breaks=1:maxX, breaks=seq(15,maxX,by=5))+
scale_colour_manual(values=getSampleColors(tmp$sample))
if(nrow(read.lengths) > 30){ p = p +guides(colour=FALSE) }
if(is.data.frame(sampleGroups)){ p = p +facet_wrap(~sampleGroup,ncol=1)}
print(p)
# save
plotsList[["read-length distributions: normalised read fraction"]] = p
}
##
## plot distribution of # mapped reads per sample
##
printMessage("Plotting # mapped reads")
tmp = log10(libSizes$all)
## ORIGINAL: hist(tmp, breaks=seq(0,ceiling(max(tmp)), by=0.1), col="grey", border="white", xlab="log10(# mapped reads)", main="Library size (all mapped reads)", ylab="# samples")
p = ggplot(as.data.frame(tmp),aes(x=tmp))+geom_histogram(breaks=seq(0,ceiling(max(tmp)), by=0.1),fill='lightblue')+xlab("log10(# mapped reads)")+ylab("# samples")+ggtitle("Library size (all mapped reads)")
# save
plotsList[["Library size (all mapped reads)"]] = p
##
## Plot the rRNA contamination
##
mapping.stats.orig = mapping.stats
mapping.stats = mapping.stats[,-grep("input_to_",colnames(mapping.stats))]
## remove the exogenous stuff from the stats if this wasn't used in the run
if(sum(mapping.stats[,23:27]) == 0)
mapping.stats = mapping.stats[, -c(23:27)]
##
## Plot heatmap of mapping percentages through the pipeline
##
printMessage("Plotting mapping stats heatmap (1/3)")
toplot = melt(as.matrix(mapping.stats / mapping.stats$input)); colnames(toplot) = c("Sample","Stage","ReadFraction")
toplot$Stage = with(toplot, factor(Stage, levels = rev(levels(Stage))))
toplot$Sample = factor(as.character(toplot$Sample), levels=rownames(mapping.stats)[sampleOrder])
if(is.data.frame(sampleGroups)){ toplot$sampleGroup = sampleGroups[match(toplot$Sample, sampleGroups$sampleID), 2] }
p = ggplot(toplot, aes(x=Sample, y=Stage, group=Sample, fill=ReadFraction, label=sprintf("%1.1f%%",ReadFraction*100))) +geom_tile() +scale_fill_gradient2(low="white",high="yellow",mid="steelblue", midpoint=0.5) +theme(axis.text.x=element_text(angle=40, hjust=1.0, vjust=1)) +ggtitle("fraction aligned reads\n(normalised by # input reads)")
if(nrow(mapping.stats) < 50){ p = p +geom_text(size=3) }
if(is.data.frame(sampleGroups)){ p = p +facet_grid(~sampleGroup, scales="free_x",space="free_x")}
print(p)
# save
plotsList[["fraction aligned reads (normalised by # input reads)"]] = p
##
## Plot heatmap of mapping percentages through the pipeline
##
printMessage("Plotting mapping stats heatmap (2/3)")
if(max(mapping.stats$successfully_clipped) > 0){
tmp = mapping.stats
i.toFix = which(tmp$successfully_clipped == 0)
if(length(i.toFix) > 0)
tmp$successfully_clipped[i.toFix] = tmp$input[i.toFix]
toplot = melt(as.matrix(tmp / tmp$successfully_clipped)[,-1,drop=F]); colnames(toplot) = c("Sample","Stage","ReadFraction")
toplot$Stage = with(toplot, factor(Stage, levels = rev(levels(Stage))))
toplot$Sample = factor(as.character(toplot$Sample), levels=rownames(mapping.stats)[sampleOrder])
if(is.data.frame(sampleGroups)){ toplot$sampleGroup = sampleGroups[match(toplot$Sample, sampleGroups$sampleID), 2] }
p = ggplot(toplot, aes(x=Sample, y=Stage, group=Sample, fill=ReadFraction, label=sprintf("%1.1f%%",ReadFraction*100))) +geom_tile() +scale_fill_gradient2(low="white",high="yellow",mid="steelblue", midpoint=0.5) +theme(axis.text.x=element_text(angle=40, hjust=1.0, vjust=1)) +ggtitle("fraction aligned reads\n(normalised by # adapter-clipped reads)")
if(nrow(mapping.stats) < 50){ p = p +geom_text(size=3) }
if(is.data.frame(sampleGroups)){ p = p +facet_grid(~sampleGroup, scales="free_x",space="free_x")}
print(p)
# save
plotsList[["fraction aligned reads (normalised by # adapter-clipped reads)"]] = p
}
##
## Plot heatmap of mapping percentages through the pipeline
##
printMessage("Plotting mapping stats heatmap (3/3)")
toplot = melt(as.matrix(mapping.stats / mapping.stats$reads_used_for_alignment)[,-c(1:7),drop=F]); colnames(toplot) = c("Sample","Stage","ReadFraction")
toplot$Stage = with(toplot, factor(Stage, levels = rev(levels(Stage))))
toplot$Sample = factor(as.character(toplot$Sample), levels=rownames(mapping.stats)[sampleOrder])
if(is.data.frame(sampleGroups)){ toplot$sampleGroup = sampleGroups[match(toplot$Sample, sampleGroups$sampleID), 2] }
p = ggplot(toplot, aes(x=Sample, y=Stage, group=Sample, fill=ReadFraction, label=sprintf("%1.1f%%",ReadFraction*100))) +geom_tile() +scale_fill_gradient2(low="white",high="yellow",mid="steelblue", midpoint=0.5) +theme(axis.text.x=element_text(angle=40, hjust=1.0, vjust=1)) +ggtitle("fraction aligned reads\n(normalised by # non-contaminant reads)")
if(nrow(mapping.stats) < 50){ p = p +geom_text(size=3) }
if(is.data.frame(sampleGroups)){ p = p +facet_grid(~sampleGroup, scales="free_x",space="free_x")}
print(p)
# save
plotsList[["fraction aligned reads (normalised by # non-contaminant reads)"]] = p
##
## Plot QC results
##
printMessage("Plotting QC result")
toplot = as.data.frame(qc.results)
toplot$Sample = factor(rownames(toplot), levels=rownames(mapping.stats)[sampleOrder])
p = ggplot(toplot, aes(x=TranscriptomeReads, y=TranscriptomeGenomeRatio)) +
geom_text(label=toplot$Sample,hjust = 0,nudge_x = 0.02, nudge_y = 0.02, colour='black') +
scale_colour_manual(values=getSampleColors(toplot$Sample))
if(is.data.frame(sampleGroups)){
toplot$sampleGroup = sampleGroups[match(toplot$Sample, sampleGroups$sampleID), 2]
p = ggplot(toplot, aes(x=TranscriptomeReads, y=TranscriptomeGenomeRatio, colour=sampleGroup)) +
geom_text(label=toplot$Sample,hjust = 0, nudge_x = 0.02, nudge_y = 0.02, colour='black') +
scale_colour_manual(values=getSampleColors(toplot$sampleGroup))
}
minX = floor(log10(min(toplot$TranscriptomeReads)+0.001))
maxX = ceiling(log10(max(toplot$TranscriptomeReads)+0.001))
p = p + scale_x_log10(breaks=10^c(minX:maxX)) +coord_cartesian(xlim=c(10^(minX),10^(maxX)),ylim=c(0,1)) +geom_vline(xintercept=100000,col="red",alpha=0.5) +geom_hline(yintercept=0.5,col="red",alpha=0.5) +annotate("rect",xmin=0,xmax=Inf,ymin=-1,ymax=0.5,alpha=0.2,fill="red") +annotate("rect",xmin=0,xmax=100000,ymin=-1,ymax=1.1,alpha=0.2,fill="red") +ylab("# transcriptome reads / # genome reads") +xlab("# transcriptome reads (log10)") +ggtitle("QC result: overall")
print(p+geom_point(size=4))
# save
plotsList[["QC result: overall"]] = p + geom_point(size=4)
if(is.data.frame(sampleGroups)){
print(p + facet_wrap(~sampleGroup) + theme(legend.position="none") +geom_point(size=2))
# save
plotsList[["QC result: overall"]] = p +facet_wrap(~sampleGroup) +theme(legend.position="none") +
geom_point(size=2)
}
##
## Heatmap
##
qc.results[,4] = round(qc.results[,4]*100)/100
qc.results[,5] = round(qc.results[,5]*1000)/1000
tmp.mat = qc.results
tmp.mat[,1] = rep(1,nrow(tmp.mat))
tmp.mat[,2] = rep(1,nrow(tmp.mat))
tmp.mat[,5] = rep(1,nrow(tmp.mat))
tmp.pass=tmp.mat[,3] >= 100000; tmp.mat[tmp.pass,3] = "pass"; tmp.mat[!tmp.pass,3] = "fail"
tmp.pass=tmp.mat[,4] >= 0.5; tmp.mat[tmp.pass,4] = "pass"; tmp.mat[!tmp.pass,4] = "fail"
toplot=cbind(melt(tmp.mat), Actual=melt(qc.results)[,3]); colnames(toplot)[1:3]=c("Sample","Stage","Value")
toplot$Sample = factor(as.character(toplot$Sample), levels=rownames(mapping.stats)[sampleOrder])
if(is.data.frame(sampleGroups)){ toplot$sampleGroup = sampleGroups[match(toplot$Sample, sampleGroups$sampleID), 2] }
p = ggplot(toplot, aes(y=Sample, x=Stage, fill=Value, label=Actual)) +
scale_fill_manual(values=c("fail"="red","pass"="palegreen","1"="lightgrey")) +
geom_label() +theme(plot.background=element_rect(fill="white"),panel.background=element_rect(fill=rgb(0.97,0.97,0.97)), axis.text.x=element_text(angle=20, hjust=1, vjust=1)) +
ggtitle("QC result: per-sample results") +xlab("") +ylab("")
print(p)
# save
plotsList[["QC result: per-sample results"]] = p
##
## Plot breakdown of counts in each biotype
##
printMessage("Plotting biotype counts")
require(plyr)
sampleTotals=matrix(NA,ncol=nrow(mapping.stats),nrow=0); colnames(sampleTotals) = rownames(mapping.stats)
if(nrow(exprs.miRNA) > 0){
sampleTotals = rbind(sampleTotals, colSums(exprs.miRNA))
rownames(sampleTotals)[nrow(sampleTotals)] = "miRNA"
}
if(nrow(exprs.tRNA) > 0){
sampleTotals = rbind(sampleTotals, colSums(exprs.tRNA))
rownames(sampleTotals)[nrow(sampleTotals)] = "tRNA"
}
if(nrow(exprs.piRNA) > 0){
sampleTotals = rbind(sampleTotals, colSums(exprs.piRNA))
rownames(sampleTotals)[nrow(sampleTotals)] = "piRNA"
}
if(nrow(exprs.gencode) > 0){
tmp = data.frame(biotype=sapply(rownames(exprs.gencode), function(id){bits=unlist(strsplit(id,":")); bits[length(bits)]}), exprs.gencode)
tmp = ddply(tmp, "biotype", function(mat){ colSums(mat[,-1,drop=F]) })
rownames(tmp) = tmp[,1]; tmp = tmp[,-1,drop=F]
colnames(tmp) = colnames(sampleTotals)
## add gencode miRNA to the existing count...
if("miRNA" %in% rownames(tmp) && "miRNA" %in% rownames(sampleTotals)){
i = which(rownames(tmp) %in% "miRNA")
j = which(rownames(sampleTotals) %in% "miRNA")
for(x in 1:ncol(sampleTotals)){
sampleTotals[j,x] = sampleTotals[j,x] + tmp[i,x]
}
tmp = tmp[-i,,drop=F]
}
sampleTotals = rbind(sampleTotals, tmp)
}
if(nrow(exprs.circRNA) > 0){
sampleTotals = rbind(sampleTotals, colSums(exprs.circRNA))
rownames(sampleTotals)[nrow(sampleTotals)] = "circularRNA"
}
sampleTotals = rbind(sampleTotals, mapping.stats$exogenous_miRNA)
rownames(sampleTotals)[nrow(sampleTotals)] = "exogenous_miRNA"
sampleTotals = rbind(sampleTotals, mapping.stats$exogenous_rRNA)
rownames(sampleTotals)[nrow(sampleTotals)] = "exogenous_rRNA"
sampleTotals = rbind(sampleTotals, mapping.stats$exogenous_genomes)
rownames(sampleTotals)[nrow(sampleTotals)] = "exogenous_genomes"
sampleTotals = sampleTotals[order(apply(sampleTotals, 1, median, na.rm=T), decreasing=F), ,drop=F]
tmp = melt(as.matrix(sampleTotals))
colnames(tmp) = c("biotype","sampleID","readCount")
if(is.data.frame(sampleGroups)){ tmp$sampleGroup = sampleGroups[match(tmp$sampleID, sampleGroups$sampleID), 2] }
p = ggplot(na.omit(tmp), aes(y=readCount,x=biotype, colour=biotype)) +geom_hline(aes(yintercept=1),linetype="dashed") +
geom_boxplot() +scale_y_log10(breaks=c(0.01,0.1,1,10,100,1000,10000,100000,1000000,10000000,100000000)) +
guides(colour=FALSE) +coord_flip() +ggtitle("Biotypes: distributions, raw read-counts")+
scale_color_manual(values = getSampleColors(tmp$biotype))
if(is.data.frame(sampleGroups)){ p = p +facet_grid(~sampleGroup, scales="free_x")}
print(p)
# save
plotsList[["Biotypes: distributions, raw read-counts"]] = p
## save the biotype counts
write.table(sampleTotals[order(apply(sampleTotals, 1, median, na.rm=T), decreasing=T), ,drop=F], file=paste(output.dir, "exceRpt_biotypeCounts.txt", sep="/"), sep="\t", col.names=NA, quote=F)
## plot biotype breakdown as RPM:
tmp = melt(as.matrix(apply(sampleTotals, 2, function(col){ col*1000000/sum(col) })))
colnames(tmp) = c("biotype","sampleID","readPerMillion")
if(is.data.frame(sampleGroups)){ tmp$sampleGroup = sampleGroups[match(tmp$sampleID, sampleGroups$sampleID), 2] }
p = ggplot(na.omit(tmp), aes(y=readPerMillion,x=biotype, colour=biotype)) +geom_hline(aes(yintercept=1),linetype="dashed") +geom_boxplot() +scale_y_log10(breaks=c(0.01,0.1,1,10,100,1000,10000,100000,1000000,10000000,100000000)) +guides(colour=FALSE) +coord_flip() +ggtitle("Biotypes: distributions, normalised")+
scale_color_manual(values = getSampleColors(tmp$biotype))
if(is.data.frame(sampleGroups)){ p = p +facet_grid(~sampleGroup, scales="free_x")}
print(p)
# save
plotsList[["Biotypes: distributions, normalised"]] = p
## plot top N biotypes for each sample as a barplot - normalised to INPUT reads
tmp = sampleTotals
for(i in 1:ncol(sampleTotals))
tmp[,i] = sampleTotals[,i]*1000000/libSizes$reads_used_for_alignment[i]
biotypeOrder = order(apply(tmp, 1, mean), decreasing=T)
tmp = tmp[biotypeOrder, , drop=F]
N = min(7, length(biotypeOrder)) # number of biotypes detected
tmp = as.matrix(rbind(tmp[1:N, , drop=F], other=colSums(tmp[-c(1:N), , drop=F])))
tmp = melt(rbind(tmp, unmapped=1000000-colSums(tmp)))
colnames(tmp) = c("biotype","sampleID","readsPerMillion")
if(is.data.frame(sampleGroups)){ tmp$sampleGroup = sampleGroups[match(tmp$sampleID, sampleGroups$sampleID), 2] }
p = ggplot(na.omit(tmp), aes(y=readsPerMillion,x=sampleID,fill=biotype)) +geom_bar(stat="identity") +scale_fill_brewer(palette = "Paired") +theme(axis.text.x=element_text(angle=50, hjust=1.0, vjust=1)) +ggtitle("Biotypes: per-sample, normalised") +ylab("reads per million reads used for alignment") +xlab("") +ylim(limits=c(0,1E6)) #+scale_fill_discrete(rich.colors(10))
if(is.data.frame(sampleGroups)){ p = p +facet_grid(~sampleGroup, scales="free_x",space="free_x")}
suppressWarnings(print(p))
# save
plotsList[["Biotypes: per-sample, normalised"]] = p
## plot top N biotypes for each sample as a barplot - normalised to MAPPED reads
tmp = as.matrix(apply(sampleTotals, 2, function(col){ col*1000000/sum(col) }))
tmp = tmp[biotypeOrder, , drop=F]
tmp = as.matrix(rbind(tmp[1:N, , drop=F], other=colSums(tmp[-c(1:N), , drop=F])))
tmp = melt(tmp)
colnames(tmp) = c("biotype","sampleID","readsPerMillion")
if(is.data.frame(sampleGroups)){ tmp$sampleGroup = sampleGroups[match(tmp$sampleID, sampleGroups$sampleID), 2] }
p = ggplot(na.omit(tmp), aes(y=readsPerMillion,x=sampleID,fill=biotype)) +geom_bar(stat="identity") +theme(axis.text.x=element_text(angle=50, hjust=1.0, vjust=1)) +ggtitle("Biotypes: per-sample, normalised") +ylab("reads per million mapped reads") +xlab("") +scale_fill_brewer(palette = "Paired") #+scale_fill_manual( values = c(colorRampPalette( brewer.pal( 6 , "Paired" ) )(8), "grey") )
if(is.data.frame(sampleGroups)){ p = p +facet_grid(~sampleGroup, scales="free_x",space="free_x")}
suppressWarnings(print(p))
# save
plotsList[["Biotypes: per-sample, normalised"]] = p
## Plot miRNA expression distributions
if(nrow(exprs.miRNA) > 0){
printMessage("Plotting miRNA expression distributions")
tmp = melt(exprs.miRNA)
colnames(tmp) = c("miRNA","sample","abundance")
if(is.data.frame(sampleGroups)){ tmp$sampleGroup = sampleGroups[match(tmp$sample, sampleGroups$sampleID), 2] }
p = ggplot(tmp, aes(y=abundance, x=sample, colour=sample)) +
geom_violin() +
geom_boxplot(alpha=0.2) +ylab("Read count") +ggtitle("miRNA abundance distributions (raw counts)") +
scale_y_log10() +guides(colour=FALSE) +
scale_color_manual(values = set_names(getSampleColors(tmp$sample),tmp$sample))
if(ncol(exprs.miRNA) < 30){
p = p +theme(axis.text.x=element_text(angle=50, hjust=1.0, vjust=1))
}else{
p = p+theme(axis.ticks = element_blank(), axis.text.x = element_blank())
}
if(is.data.frame(sampleGroups)){ p = p +facet_grid(~sampleGroup, scales="free_x",space="free_x")}
print(p)
# save
plotsList[["miRNA abundance distributions (raw counts)"]] = p
p = ggplot(tmp, aes(x=abundance, colour=sample)) +geom_density() +xlab("Read count") +ggtitle("miRNA abundance distributions (raw counts)") +scale_x_log10()+
scale_color_manual(values = set_names(getSampleColors(tmp$sample),tmp$sample))
if(ncol(exprs.miRNA.rpm) > 30){ p = p +guides(colour=FALSE) }
if(is.data.frame(sampleGroups)){ p = p +facet_grid(~sampleGroup)}
print(p)
# save
plotsList[["miRNA abundance distributions (raw counts)"]] = p
tmp = melt(exprs.miRNA.rpm)
colnames(tmp) = c("miRNA","sample","abundance")
if(is.data.frame(sampleGroups)){ tmp$sampleGroup = sampleGroups[match(tmp$sample, sampleGroups$sampleID), 2] }
p = ggplot(tmp, aes(y=abundance, x=sample, colour=sample)) +geom_violin() +
geom_boxplot(alpha=0.2) +ylab("Reads per million (RPM)") +
ggtitle("miRNA abundance distributions (RPM)") +
theme(axis.ticks = element_blank(), axis.text.x = element_blank()) +
scale_y_log10() +guides(colour=FALSE)+
scale_color_manual(values = set_names(getSampleColors(tmp$sample),tmp$sample))
if(ncol(exprs.miRNA.rpm) < 30){
p = p +theme(axis.text.x=element_text(angle=50, hjust=1.0, vjust=1))
}else{
p = p+theme(axis.ticks = element_blank(), axis.text.x = element_blank())
}
if(is.data.frame(sampleGroups)){ p = p +facet_grid(~sampleGroup, scales="free_x",space="free_x")}
print(p)
# save
plotsList[["miRNA abundance distributions (RPM)"]] = p
p = ggplot(tmp, aes(x=abundance, colour=sample)) +geom_density() +
xlab("Reads per million (RPM)") +ggtitle("miRNA abundance distributions (RPM)") +scale_x_log10()+
scale_color_manual(values = set_names(getSampleColors(tmp$sample),tmp$sample))
if(ncol(exprs.miRNA.rpm) > 30){ p = p +guides(colour=FALSE) }
if(is.data.frame(sampleGroups)){ p = p +facet_grid(~sampleGroup)}
print(p)
# save
plotsList[["miRNA abundance distributions (RPM)"]] = p
}
##
## Finally, plot exogenous if there are any (not used by default)
##
if(nrow(exprs.exogenousGenomes_specific) > 0){
printMessage("Plotting exogenous counts")
par(oma=c(20,2,0,0))
barplot(exprs.exogenousGenomes_cumulative[1,,drop=F], las=2, main="Total # reads mapped to NCBI taxonomy")
## if we have more than one sample, plot some heatmaps
if(ncol(exprs.exogenousGenomes_specific) > 1){
par(oma=c(8,0,0,20))
maxRow = 50; if(nrow(exprs.exogenousGenomes_specific) < maxRow){ maxRow = nrow(exprs.exogenousGenomes_specific) }
tmp.order = order(apply(t(t(exprs.exogenousGenomes_specific)/colSums(exprs.exogenousGenomes_specific)), 1, median), decreasing=T)
tmp = t(log10(t(t(exprs.exogenousGenomes_specific)*1000000/colSums(exprs.exogenousGenomes_specific))[tmp.order, ][1:maxRow,]+0.1))
colnames(tmp) = taxonomyInfo.exogenous_genomes[match(colnames(tmp), taxonomyInfo.exogenous_genomes$ID), ]$name
heatmap.2(tmp,trace="none",main="top taxa nodes: specific normalised read count", symbreaks=F,col=rich.colors(50))
tmp.order = order(apply(t(t(exprs.exogenousGenomes_specific)), 1, median), decreasing=T)
tmp = t(log10(exprs.exogenousGenomes_specific[tmp.order, ][1:maxRow,]+0.1))
colnames(tmp) = taxonomyInfo.exogenous_genomes[match(colnames(tmp), taxonomyInfo.exogenous_genomes$ID), ]$name
heatmap.2(tmp,trace="none",main="top taxa nodes: specific absolute read count", symbreaks=F,col=rich.colors(50))
maxRow = 50; if(nrow(exprs.exogenousGenomes_cumulative) < maxRow){ maxRow = nrow(exprs.exogenousGenomes_cumulative) }
tmp.order = order(apply(t(t(exprs.exogenousGenomes_cumulative)/libSizes$exogenous_genomes), 1, median), decreasing=T)
tmp = t(log10(t(t(exprs.exogenousGenomes_cumulative)*1000000/libSizes$exogenous_genomes)[tmp.order, ][1:maxRow,]+0.1))
colnames(tmp) = taxonomyInfo.exogenous_genomes[match(colnames(tmp), taxonomyInfo.exogenous_genomes$ID), ]$name
heatmap.2(tmp,trace="none",main="top taxa nodes: cumulative normalised read count", symbreaks=F,col=rich.colors(50))
tmp.order = order(apply(t(t(exprs.exogenousGenomes_cumulative)), 1, median), decreasing=T)
tmp = t(log10(exprs.exogenousGenomes_cumulative[tmp.order, ][1:maxRow,]+0.1))
colnames(tmp) = taxonomyInfo.exogenous_genomes[match(colnames(tmp), taxonomyInfo.exogenous_genomes$ID), ]$name
heatmap.2(tmp,trace="none",main="top taxa nodes: cumulative absolute read count", symbreaks=F,col=rich.colors(50))
}
}
dev.off()
printMessage("All done!")
return(plotsList)
}
## Deprecated below:
##
## Plots a taxonomy tree with a given set of weights
## (Rob Kitchen)
plotTree = function(rEG, taxonomyInfo, counts_uniq, counts_cum, title="", what){
## node parameters
nNodes = length(nodes(rEG))
nA <- list()
nA$shape = rep("circle",nNodes)
nA$fixedSize<-rep(FALSE, nNodes)
nA$height <- nA$width <- rescale(sqrt(counts_cum/10), to=c(0.25,7))
nA$color <- rep(rgb(0,0,0,0.25),nNodes)
nA$style <- rep("bold", nNodes)
if(what == "exogenousRibosomal"){
nA$fillcolor <- sapply(counts_uniq*10, function(val){ if(val>100){val=100}; rgb(100-val,100,100-val,maxColorValue=100)})
}else{
nA$fillcolor <- sapply(counts_uniq*10, function(val){ if(val>100){val=100}; rgb(100-val,100-val,100,maxColorValue=100)})
}
newNodeIDs = sapply(taxonomyInfo[match(as.numeric(nodes(rEG)), taxonomyInfo$ID), ]$name, function(id){ newID=unlist(strsplit(id," ")); if(length(newID) == 1){id}else{paste(newID[1], "\n", paste(newID[-1],collapse=" "), sep="") }})
nA$label <- paste(newNodeIDs,"\n",round(counts_cum*10)/10,"%",sep="")
nA <- lapply(nA, function(x) { names(x) <- nodes(rEG); x})
## edge parameters
eA <- list(arrowsize=rep(0.1,length(names(rEG@edgeData))), arrowhead=rep("none",length(names(rEG@edgeData))))
eA <- lapply(eA, function(x) { names(x) <- names(rEG@edgeData); x})
## layout the graph
tmp = layoutGraph(rEG, nodeAttrs=nA, edgeAttrs=eA)
## hack to make sure the node labels are visible!
sizes = rescale(tmp@renderInfo@nodes$rWidth, to=c(0.2,1.5))
names(sizes) = nodes(rEG)
nodeRenderInfo(tmp) <- list(cex=sizes)
graphRenderInfo(tmp) <- list(main=title)
## plot the graph
renderGraph(tmp)
}
##
## Plot exogenous genomes
## (Rob Kitchen)
plotExogenousTaxonomyTrees = function(counts, cumcounts, what, output.dir, taxonomyInfo, fontScale=2, sampleGroups=NA, minPercent=0.5){
# counts = exprs.exogenousGenomes_specific
# cumcounts = exprs.exogenousGenomes_cumulative
# taxonomyInfo = taxonomyInfo.exogenous_genomes
#
# counts = exprs.exogenousRibosomal_specific
# cumcounts = exprs.exogenousRibosomal_cumulative
# taxonomyInfo = taxonomyInfo.exogenous_rRNA
## add direct count to the cumulative counts matrix
cumcounts = cumcounts+counts
#counts.norm = t(t(counts*100)/colSums(counts))
counts.norm = apply(counts, 2, function(col){ col*100/sum(col) })
cumcounts.norm = apply(cumcounts, 2, function(col){ col*100/col[1] })
dim(counts)
## remove nodes with < 0.1% of all reads
#minPercent = 1
keepRows = which(apply(counts.norm, 1, max) >= minPercent)
keepRows = sort(unique(c(keepRows, which(apply(cumcounts.norm, 1, max) >= minPercent))))
# use only paths through the tree that capture above a certain fraction of reads
counts = counts[keepRows, , drop=F]
cumcounts = cumcounts[keepRows, , drop=F]
nrow(counts)
#data_uniq = counts.norm[keepRows, , drop=F]
#data_cum = cumcounts.norm[keepRows, , drop=F]
#nrow(data_cum)
## Re-scale the node percentages after trimming branches to make the numbers make more sense - shouldn't make much diff to the cumcounts
data_uniq = apply(counts, 2, function(col){ col*100/sum(col) })
data_cum = apply(cumcounts, 2, function(col){ col*100/col[1] })
#if("significantDEX" %in% names(combinedSamples)){
# significant = combinedSamples$significantDEX[keepRows]
# foldChange = combinedSamples$foldChange[keepRows]
#}
## remove edges with no useable counts (based on minPercent threshold)
taxonomyInfo = taxonomyInfo[taxonomyInfo$ID %in% rownames(data_cum), ]
## Build the graph object
rEG <<- new("graphNEL", nodes=as.character(taxonomyInfo$ID), edgemode="directed")
trim <- function (x) gsub("^\\s+|\\s+$", "", x)
apply(taxonomyInfo[-1,], 1, function(row){
from = trim(as.character(row[4]));
if(from %in% taxonomyInfo$ID){ rEG <<- addEdge(trim(as.character(row[4])), trim(as.character(row[3])), rEG, 1) }
NULL })
data_uniq = data_uniq[match(taxonomyInfo$ID, rownames(data_uniq)), , drop=F]
data_cum = data_cum[match(taxonomyInfo$ID, rownames(data_cum)), , drop=F]
data_uniq[is.na(data_uniq)] = 0
data_cum[is.na(data_cum)] = 0
##
## Write to PDF
##
## plot an average tree over all samples
printMessage(c("Plotting a taxonomy tree based on the average of all samples "))
pdf(file=paste(output.dir,"/exceRpt_",what,"_TaxonomyTrees_aggregateSamples.pdf",sep=""),height=7,width=15)
plotTree(rEG, taxonomyInfo, apply(data_uniq, 1, max), rowMeans(data_cum), what=what)
dev.off()
## plot samples individually
printMessage(c("Plotting a separate taxonomy tree for each sample"))
pdf(file=paste(output.dir,"/exceRpt_",what,"_TaxonomyTrees_perSample.pdf",sep=""), height=7, width=15)
for(i in 1:ncol(data_uniq))
plotTree(rEG, taxonomyInfo, data_uniq[,i], data_cum[,i], title=paste(colnames(data_uniq)[i]," (total reads: ",cumcounts[1,i],")", sep=""), what=what)
dev.off()
## if there are groups of samples
if(is.data.frame(sampleGroups)){
printMessage(c("Plotting a separate taxonomy tree for each sample-group"))
pdf(file=paste(output.dir,"/exceRpt_",what,"_TaxonomyTrees_perGroup.pdf",sep=""), height=7, width=15)
for(thisgroup in levels(as.factor(sampleGroups$sampleGroup))){
tmpDat_uniq = rowMeans(data_uniq[, match(sampleGroups[sampleGroups$sampleGroup %in% thisgroup, ]$sampleID, colnames(data_uniq)), drop=F])
tmpDat_cum = rowMeans(data_cum[, match(sampleGroups[sampleGroups$sampleGroup %in% thisgroup, ]$sampleID, colnames(data_cum)), drop=F])
plotTree(rEG, taxonomyInfo, tmpDat_uniq, tmpDat_cum, title=paste(thisgroup,sep=""), what=what)
}
dev.off()
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.