##' Plot FastQC
##'
##' Creates a bar plot with the number of sequences per entry in a set of zip archives generated by FastQC.
##' To be used after extractFastqcNreads().
##' @param x return value from extractFastqcNreads()
##' @param main an overall title for the plot
##' @param cex numeric character expansion factor for x-axis labels
##' @return nothing
##' @author Timothee Flutre [cre,aut]
##' @export
plotFastqcNreads <- function(x, main="", cex=1){
stopifnot(is.vector(x), is.numeric(x), ! is.null(names(x)))
par(mar=c(10, 7, 4, 1))
bp <- barplot(sort(x), xaxt="n", xlab="", ylab="Number of sequences",
main=main)
axis(1, at=bp, labels=FALSE)
text(bp, par("usr")[3], srt=45, adj=1.1, labels=names(sort(x)),
xpd=TRUE, cex=cex)
}
##' Plot FastQC
##'
##' Plot the number, or percentage, of sequences per quality score with one curve per dataset.
##' To be used after extractFastqcQuals().
##' @param qual return value from extractFastqcQuals()
##' @param perc value of perc used when qual was generated by extractFastqcQuals()
##' @param ylim left and right limits of the y-axis (will be min and max of qual by default)
##' @param max.datasets.per.plot max number of datasets on the same plot
##' @param main an overall title for the plot
##' @param legend.x x coordinate to position the legend (no legend if NULL)
##' @param legend.y y coordinate to position the legend
##' @param legend.cex numeric character expansion factor for legend labels
##' @param add.2nd.yaxis add a 2nd y-axis on the right side of the plot
##' @return nothing
##' @author Timothee Flutre [cre,aut], Nicolas Rode [ctb]
##' @export
plotFastqcNbseqQual <- function(qual,
perc=FALSE,
ylim=NULL,
max.datasets.per.plot=25,
main="Quality control",
legend.x="topleft",
legend.y=NULL,
legend.cex=1,
add.2nd.yaxis=TRUE){
stopifnot(is.matrix(qual),
! is.null(rownames(qual)))
xlab <- "Phred quality"
ylab <- "Number of sequences"
if(perc)
ylab <- "Percentage of sequences"
## determine the lowest and highest qualities for the x-axis
lowest.qual <- NA
for(j in 1:ncol(qual)){
if(any(! is.na(qual[,j]))){
lowest.qual <- j
break
}
}
highest.qual <- NA
for(j in ncol(qual):1){
if(any(! is.na(qual[,j]))){
highest.qual <- j
break
}
}
xlim <- c(lowest.qual, highest.qual)
## determine the lowest and highest counts for the y-axis
lowest.count <- min(qual[,lowest.qual])
highest.count <- max(qual[,lowest.qual])
for(j in lowest.qual:highest.qual){
lowest.count <- min(lowest.count, qual[,j], na.rm=TRUE)
highest.count <- max(highest.count, qual[,j], na.rm=TRUE)
}
if(is.null(ylim))
ylim <- c(lowest.count, highest.count)
## plot the data
if(nrow(qual) <= max.datasets.per.plot){ # show all datasets on a single plot
plot(x=0, y=0, xlim=xlim, ylim=ylim,
xlab=xlab, ylab=ylab, main=main,
type="n", bty="n")
for(i in 1:nrow(qual)){
idx <- which(! is.na(qual[i,]))
points(x=idx, y=qual[i, idx], col=i, pch=(1:25)[i %% 25], type="b")
}
if(add.2nd.yaxis)
axis(side=4)
if(! is.null(legend.x))
legend(x=legend.x, y=legend.y, cex=legend.cex, bty="n",
legend=rownames(qual),
col=1:nrow(qual),
pch=1:min(25, nrow(qual)))
} else{ # show all datasets on several plots
nb.plots <- ceiling(nrow(qual) / max.datasets.per.plot)
for(plot.id in 1:nb.plots){
plot(x=0, y=0, xlim=xlim, ylim=ylim,
xlab=xlab, ylab=ylab, main=main,
type="n", bty="n")
subset.idx.rows <- ((plot.id-1)*max.datasets.per.plot+1):(plot.id*max.datasets.per.plot)
subset.idx.rows <- subset.idx.rows[subset.idx.rows %in% 1:nrow(qual)]
for(i in subset.idx.rows){
j <- i - (plot.id-1) * max.datasets.per.plot
idx <- which(! is.na(qual[i,]))
points(x=idx, y=qual[i, idx], col=j, pch=(1:25)[j %% 25], type="b")
}
if(add.2nd.yaxis)
axis(side=4)
if(! is.null(legend.x))
legend(x=legend.x, y=legend.y, cex=legend.cex, bty="n",
legend=rownames(qual)[subset.idx.rows],
col=(1:max.datasets.per.plot)[1:length(subset.idx.rows)],
pch=1:min(25, max.datasets.per.plot)[1:length(subset.idx.rows)])
}
}
}
##' Plot FastQC
##'
##' Plot a variable content (adapter or N) as percentage along the sequences per entry in a set of zip archives generated by FastQC.
##' To be used after extractFastqcAdpContents() or extractFastqcBaseNs().
##' @param content return value from extractFastqcAdpContents() or extractFastqcBaseNs()
##' @param max.datasets.per.plot max number of datasets on the same plot
##' @param lowest.perc lowest percentage of content for the y-axis
##' @param highest.perc highest percentage of content for the y-axis
##' @param ylab a title for the y axis
##' @param main an overall title for the plot
##' @param legend.x x coordinate to position the legend (no legend if NULL)
##' @param legend.y y coordinate to position the legend
##' @param legend.cex numeric character expansion factor for legend labels
##' @param add.2nd.yaxis add a 2nd y-axis on the right side of the plot
##' @return nothing
##' @author Timothee Flutre [cre,aut], Nicolas Rode [ctb]
##' @export
plotFastqcContent <- function(content,
max.datasets.per.plot=25,
lowest.perc=NULL,
highest.perc=NULL,
ylab="Content (%)",
main="Quality control",
legend.x="topleft",
legend.y=NULL,
legend.cex=1,
add.2nd.yaxis=TRUE){
stopifnot(is.matrix(content),
! is.null(rownames(content)),
! is.null(colnames(content)))
xlab <- "Positions (bp)"
## determine the range of positions for the x-axis
positions <- sapply(strsplit(colnames(content), "-"),
function(x){as.numeric(x[1])})
xlim <- c(positions[1], positions[length(positions)])
## determine the lowest and highest content percentage for the y-axis
if(is.null(lowest.perc))
lowest.perc <- min(c(content), na.rm=TRUE)
if(is.null(highest.perc))
highest.perc <- max(c(content), na.rm=TRUE)
ylim <- c(lowest.perc, highest.perc)
## plot the data
if(nrow(content) <= max.datasets.per.plot){ # show all datasets on a single plot
plot(x=0, y=0, xlim=xlim, ylim=ylim,
xlab=xlab, ylab=ylab, main=main,
type="n", bty="n")
for(i in 1:nrow(content)){
idx <- which(! is.na(content[i,]))
points(x=positions[idx], y=content[i, idx],
col=i, pch=(1:25)[i %% 25], type="b")
}
if(add.2nd.yaxis)
axis(side=4)
if(! is.null(legend.x))
legend(x=legend.x, y=legend.y, cex=legend.cex, bty="n",
legend=rownames(content),
col=1:nrow(content),
pch=1:min(25, nrow(content)))
} else{ # show all datasets on several plots
nb.plots <- ceiling(nrow(content) / max.datasets.per.plot)
for(plot.id in 1:nb.plots){
plot(x=0, y=0, xlim=xlim, ylim=ylim,
xlab=xlab, ylab=ylab, main=main,
type="n", bty="n")
subset.idx.rows <- ((plot.id-1)*max.datasets.per.plot+1):(plot.id*max.datasets.per.plot)
subset.idx.rows <- subset.idx.rows[subset.idx.rows %in% 1:nrow(content)]
for(i in subset.idx.rows){
j <- i - (plot.id-1) * max.datasets.per.plot
points(x=positions, y=content[i,], col=j, pch=(1:25)[j %% 25], type="b")
}
if(add.2nd.yaxis)
axis(side=4)
if(! is.null(legend.x))
legend(x=legend.x, y=legend.y, cex=legend.cex, bty="n",
legend=rownames(content)[subset.idx.rows],
col=(1:max.datasets.per.plot)[1:length(subset.idx.rows)],
pch=1:min(25, max.datasets.per.plot)[1:length(subset.idx.rows)])
}
}
}
##' Plot FastQC
##'
##' Plot the distribution of sequence lengths per entry in a set of zip archives generated by FastQC.
##' To be used after extractFastqcSeqLengths().
##' @param seq.length return value from extractFastqcSeqLengths()
##' @param max.datasets.per.plot max number of datasets on the same plot
##' @param lowest.len lowest sequence length for the y-axis
##' @param highest.len highest sequence length for the y-axis
##' @param main an overall title for the plot
##' @param ylab label for the y-axis
##' @param legend.x x coordinate to position the legend (no legend if NULL)
##' @param legend.y y coordinate to position the legend
##' @param legend.cex numeric character expansion factor for legend labels
##' @param add.2nd.yaxis add a 2nd y-axis on the right side of the plot
##' @return nothing
##' @author Timothee Flutre [cre,aut], Nicolas Rode [ctb]
##' @export
plotFastqcSeqLengths <- function(seq.length,
max.datasets.per.plot=25,
lowest.len=NULL,
highest.len=NULL,
main="Quality control",
ylab="Number of sequences",
legend.x="topleft",
legend.y=NULL,
legend.cex=1,
add.2nd.yaxis=TRUE){
stopifnot(is.matrix(seq.length),
! is.null(rownames(seq.length)),
! is.null(colnames(seq.length)))
xlab <- "Sequence lengths (bp)"
## determine the range of lengths for the x-axis
lengths <- sapply(strsplit(colnames(seq.length), "-"),
function(x){as.numeric(x[1])})
xlim <- c(lengths[1], lengths[length(lengths)])
## determine the lowest and highest counts of seq lengths for the y-axis
if(is.null(lowest.len)){
lowest.len <- min(c(seq.length))
if(is.infinite(lowest.len))
stop("did you give log10(seq.length)? maybe use also lowest.len=0")
}
if(is.null(highest.len))
highest.len <- max(c(seq.length))
ylim <- c(lowest.len, highest.len)
## plot the data
if(nrow(seq.length) <= max.datasets.per.plot){ # show all datasets on a single plot
plot(x=0, y=0, xlim=xlim, ylim=ylim,
xlab=xlab, ylab=ylab, main=main,
type="n", bty="n")
for(i in 1:nrow(seq.length))
points(x=lengths, y=seq.length[i,], col=i, pch=(1:25)[i %% 25], type="b")
if(add.2nd.yaxis)
axis(side=4)
if(! is.null(legend.x))
legend(x=legend.x, y=legend.y, cex=legend.cex, bty="n",
legend=rownames(seq.length),
col=1:nrow(seq.length),
pch=1:min(25, nrow(seq.length)))
} else{ # show all datasets on several plots
nb.plots <- ceiling(nrow(seq.length) / max.datasets.per.plot)
for(plot.id in 1:nb.plots){
plot(x=0, y=0, xlim=xlim, ylim=ylim,
xlab=xlab, ylab=ylab, main=main,
type="n", bty="n")
subset.idx.rows <- ((plot.id-1)*max.datasets.per.plot+1):(plot.id*max.datasets.per.plot)
subset.idx.rows <- subset.idx.rows[subset.idx.rows %in% 1:nrow(seq.length)]
for(i in subset.idx.rows){
j <- i - (plot.id-1) * max.datasets.per.plot
points(x=lengths, y=seq.length[i,], col=j, pch=(1:25)[j %% 25], type="b")
}
if(add.2nd.yaxis)
axis(side=4)
if(! is.null(legend.x))
legend(x=legend.x, y=legend.y, cex=legend.cex, bty="n",
legend=rownames(seq.length)[subset.idx.rows],
col=(1:max.datasets.per.plot)[1:length(subset.idx.rows)],
pch=1:min(25, max.datasets.per.plot)[1:length(subset.idx.rows)])
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.