Nothing
#' Process total coverage statistics
#'
#' @description
#' Process reports generated by flagstat.
#' Assumes reports for before and after off-target filtering have been written to the same file, with separating headers
#'
#' @inheritParams get.coverage.by.sample.statistics
#'
#' @return data frame with extracted statistics
process.total.coverage.statistics <- function(project.directory) {
flagstat.report.paths <- system.ls(pattern = "*/*stats", directory = project.directory, error = TRUE);
sample.ids <- extract.sample.ids(flagstat.report.paths, from.filename = TRUE);
total.coverage.statistics <- list();
for(i in 1:length(flagstat.report.paths)) {
### READ DATA
filepath <- flagstat.report.paths[i];
sample.id <- sample.ids[i];
input.lines <- scan(
filepath,
what = character(),
sep = "\n",
quiet = TRUE
);
### INPUT TESTS
if(28 != length(input.lines)) {
error.message <- paste("Read", length(input.lines), "from Flagstat report. Expected 28 lines.");
stop(error.message);
}
if("Flagstat before filtering off-target reads" != input.lines[1]) {
stop('Expected line 1 to be "Flagstat before filtering off-target reads"');
}
if("Flagstat after filtering off-target reads" != input.lines[15]) {
stop('Expected line 15 to be "Flagstat after filtering off-target reads"');
}
### EXTRACT NUMBERS
#
# this is based on order of flagstat report => not very robust
# have tried to add lots of input tests to compensate
ontarget.reads <- stringr::str_extract(input.lines[16], "\\d+") %>% as.numeric;
total.reads <- stringr::str_extract(input.lines[2], "\\d+") %>% as.numeric;
mapped.reads <- stringr::str_extract(input.lines[6], "\\d+") %>% as.numeric;
paired.reads <- stringr::str_extract(input.lines[7], "\\d+") %>% as.numeric;
sample.coverage.statistics <- data.frame(
"sample.id" = sample.id,
"total.reads" = total.reads,
"mapped.reads" = mapped.reads,
"ontarget.reads" = ontarget.reads,
"ontarget.percent" = ontarget.reads/total.reads,
"mapped.percent" = mapped.reads/total.reads,
"paired.reads" = paired.reads,
"paired.percent" = paired.reads/total.reads
);
total.coverage.statistics[[ sample.id ]] <- sample.coverage.statistics;
}
total.coverage.statistics <- do.call(rbind, total.coverage.statistics);
### SANITY CHECKS
if( any(total.coverage.statistics$mapped.reads > total.coverage.statistics$total.reads) ) {
stop("One or more samples has more mapped reads than total reads. That can't be right.");
}
if( any(total.coverage.statistics$paired.reads > total.coverage.statistics$total.reads) ) {
stop("One or more samples has more paired reads than total reads. That can't be right.");
}
if( any(total.coverage.statistics$ontarget.percent > 1) ) {
stop("One or more samples has on-target percent greater than 100.");
}
return(total.coverage.statistics);
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.