##' Extract FastQC
##'
##' Return the number of sequences per entry in a set of zip archives generated by FastQC.
##' To be used after readFastqcZips().
##' @param all.qc return value from readFastqcZips()
##' @return numeric vector
##' @author Timothee Flutre [cre,aut]
##' @export
extractFastqcNreads <- function(all.qc){
stopifnot(is.list(all.qc), ! is.null(names(all.qc)))
sapply(all.qc, function(qc){
as.numeric(qc[["Basic_Statistics"]]$Value[qc[["Basic_Statistics"]]$Measure
== "Total Sequences"])
})
}
##' Extract FastQC
##'
##' Return the number, or percentage, of sequences per quality score per entry in a set of zip archives generated by FastQC.
##' To be used after readFastqcZips().
##' @param all.qc return value from readFastqcZips()
##' @param perc return percentage of sequences if TRUE, number of sequences otherwise
##' @param nreads return value from nreads.fastqc(), required if perc=TRUE
##' @return numeric matrix with entries in rows and number (percentage) of sequences per quality in columns
##' @author Timothee Flutre [cre,aut]
##' @export
extractFastqcQuals <- function(all.qc, perc=FALSE, nreads=NULL){
stopifnot(is.list(all.qc), ! is.null(names(all.qc)),
ifelse(perc, ! is.null(nreads), TRUE))
N <- length(all.qc)
qual <- matrix(NA, nrow=N, ncol=50,
dimnames=list(names(all.qc), paste0("Q=", 1:50)))
for(i in 1:N)
qual[i, all.qc[[i]][["Per_sequence_quality_scores"]][,"Quality"]] <-
all.qc[[i]][["Per_sequence_quality_scores"]][,"Count"]
if(perc)
for(i in 1:nrow(qual))
qual[i,] <- (qual[i,] / nreads[i]) * 100
return(qual)
}
##' Extract FastQC
##'
##' Returns the adapter content along the sequences per entry in a set of zip archives generated by FastQC.
##' To be used after readFastqcZips().
##' When reads are long, FastQC can average adapter content over a given range, e.g. over two successive bases, such as "11-12".
##' In such a case, because the ranges can be different for different fastq files in \code{all.qc}, this function will set adapter content to the first base of the range in the ouput, here "11", the other, here "12", will get NA.
##' @param all.qc return value from readFastqcZips()
##' @param adp name of the adapter to plot (default="Illumina Universal Adapter")
##' @return numeric matrix with entries in rows and positions along sequences in columns
##' @author Timothee Flutre [cre,aut], Nicolas Rode [ctb]
##' @export
extractFastqcAdpContents <- function(all.qc, adp="Illumina Universal Adapter"){
stopifnot(is.list(all.qc), ! is.null(names(all.qc)))
## set up the structure of the output
N <- length(all.qc)
L <- -1
for(i in 1:N){
nb.pos <- length(all.qc[[i]]$Adapter_Content[["Position"]])
L.i <- as.numeric(
strsplit(
as.character(all.qc[[i]]$Adapter_Content[["Position"]][nb.pos]),
"-")[[1]][1])
L <- ifelse(L.i > L, L.i, L)
}
adp.content <- matrix(data=NA, nrow=N, ncol=L,
dimnames=list(names(all.qc), as.character(1:L)))
## fill the output
for(i in 1:N){
stopifnot(adp %in% names(all.qc[[i]]$Adapter_Content))
first.positions <- sapply(
strsplit(as.character(all.qc[[i]]$Adapter_Content[["Position"]]),
"-"), function(x){x[[1]][1]})
adp.content[i, first.positions] <- all.qc[[i]]$Adapter_Content[[adp]]
}
return(adp.content)
}
##' Extract FastQC
##'
##' Returns the N count along the sequences per entry in a set of zip archives generated by FastQC.
##' To be used after readFastqcZips().
##' When reads are long, FastQC can average N count over a given range, e.g. over two successive bases, such as "11-12".
##' In such a case, because the ranges can be different for different fastq files in \code{all.qc}, this function will set N count to the first base of the range in the ouput, here "11", the other, here "12", will get NA.
##' @param all.qc return value from readFastqcZips()
##' @return numeric matrix with entries in rows and positions along sequences in columns
##' @author Timothee Flutre [cre,aut]
##' @export
extractFastqcBaseNs <- function(all.qc){
stopifnot(is.list(all.qc), ! is.null(names(all.qc)))
## set up the structure of the output
N <- length(all.qc)
L <- -1
for(i in 1:N){
nb.pos <- length(all.qc[[i]]$Per_base_N_content[,"Base"])
L.i <- as.numeric(
strsplit(
as.character(all.qc[[i]]$Per_base_N_content[,"Base"][nb.pos]),
"-")[[1]][1])
L <- ifelse(L.i > L, L.i, L)
}
baseN <- matrix(data=NA, nrow=N, ncol=L,
dimnames=list(names(all.qc), as.character(1:L)))
## fill the output
for(i in 1:N){
first.positions <- as.numeric(sapply(
strsplit(as.character(all.qc[[i]]$Per_base_N_content[,"Base"]), "-"),
function(x){x[[1]][1]}))
baseN[i, first.positions] <- as.numeric(all.qc[[i]]$Per_base_N_content[,"N-Count"])
}
return(baseN)
}
##' Extract FastQC
##'
##' Returns the sequence length distribution per entry in a set of zip archives generated by FastQC.
##' To be used after readFastqcZips().
##' @param all.qc return value from readFastqcZips()
##' @return numeric matrix with entries in rows and positions along sequences in columns
##' @author Timothee Flutre [cre,aut], Nicolas Rode [ctb]
##' @export
extractFastqcSeqLengths <- function(all.qc){
stopifnot(is.list(all.qc), ! is.null(names(all.qc)))
N <- length(all.qc)
positions <- lapply(1:N, function(i){
as.character(all.qc[[i]]$Sequence_Length_Distribution[["Length"]])
})
positions <- unique(sort(sapply(
strsplit(do.call(c, positions), "-"), function(x){
as.numeric(x[1])
})))
seq.lengths <- matrix(0, nrow=N, ncol=length(positions),
dimnames=list(names(all.qc), positions))
for(i in 1:N){
tmp <- all.qc[[i]]$Sequence_Length_Distribution
if (length(grep("-",tmp$Length)) != 0)
tmp$Length <- sapply(strsplit(tmp$Length, "-"), function(x){x[1]})
seq.lengths[i,as.character(tmp$Length)] <- tmp$Count
}
return(seq.lengths)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.