R/read.R

##' Read FastQC
##'
##' Read a "fastqc_data.txt" file generated by FastQC.
##' Heavily inspired from readFastQC() in the Repitools package.
##' @param file the name of the file which the data are to be read from
##' @return list
##' @author Timothee Flutre [cre,aut]
##' @export
readFastqcTxt <- function(file){
  stopifnot(file.exists(file),
            grepl(pattern="##FastQC", x=readLines(file, n=1)))
  temp <- readLines(file)
  temp <- gsub("#", "", temp)
  temp <- temp[!grepl(">>END_MODULE", temp)]
  temp <- split(temp, cumsum(grepl("^>>", temp)))[-1]
  names(temp) <- sapply(temp, function(x) {
    gsub("^>>", "", gsub("\t.*", "", gsub(" ", "_", x[1])))
  })
  temp <- lapply(temp, function(x) {
    if(length(x)==1)
      return(data.frame())
    x <- strsplit(x[-1], split="\t")
    tab <- as.data.frame(do.call(rbind, x[-1]), stringsAsFactors=FALSE)
    for(i in 1:ncol(tab))
      if(!any(is.na(suppressWarnings(as.numeric(tab[,i])))))
        tab[,i] <- as.numeric(tab[,i])
    colnames(tab) <- x[[1]]
    tab
  })
  return(temp)
}

##' Read FastQC
##'
##' Read several zip archives generated by FastQC.
##' Zip archives are decompressed in a temporary directory which is removed afterwards.
##' @param path character vector of the path to the directory containing the zip archives (will be followed by "*_fastqc.zip")
##' @param glob character vector with wildcard(s) to find zip archives
##' @param verbose verbosity level (0/1/2)
##' @return list of lists (one per zip archive)
##' @author Timothee Flutre [cre,aut], Nicolas Rode [ctb]
##' @export
readFastqcZips <- function(path=".", glob="*_fastqc.zip", verbose=1){
  stopifnot(dir.exists(path))

  zip.archives <- Sys.glob(paste(path, glob, sep="/"))

  if(length(zip.archives) == 0)
    stop("not a single zip archive was found", call.=FALSE)

  if(verbose > 0)
    message(paste("nb of zip archives detected:", length(zip.archives)))

  all.qc <- lapply(zip.archives, function(zip.archive){
    qc <- NULL

    zipdir <- tempfile()
    dir.create(zipdir)
    if(verbose > 1)
      message("create tmpdir ", zipdir)

    if(verbose > 1)
      message(paste0("try to unzip ", zip.archive, " ..."))
    retval <- tryCatch(
    {
      unzip(zip.archive, exdir=zipdir)
    },
    warning = function(w){
      message(paste(basename(zip.archive), "could no be unzipped."))
      message("Original warning message:")
      message(paste0(w, ""))
    },
    error = function(e){
      message(paste(basename(zip.archive), "could no be unzipped."))
      message("Original error message:")
      message(paste0(e, ""))
    })

    if(! is.null(retval)){
      if(verbose > 1)
        message(paste0("try to read fastqc_data.txt ..."))
      tryCatch(
      {
        f.base <- sub(".zip", "", basename(zip.archive))
        qc <- readFastqcTxt(paste0(zipdir, "/", f.base, "/fastqc_data.txt"))
      },
      warning = function(w){
        qc <- list(warn="warning")
        message(paste0(sub(".zip", "", basename(zip.archive)),
                       ".txt could no be found."))
        message("Original warning message:")
        message(paste0(w, ""))
      },
      error = function(e){
        qc <- list(err="error")
        message(paste0(sub(".zip", "", basename(zip.archive)),
                       ".txt could no be found."))
        message("Original error message:")
        message(paste0(e, 33))
      })
    }

    unlink(zipdir, recursive=TRUE)
    return(qc)
	})

  names(all.qc) <- sapply(zip.archives, function(zip.archive){
    sub("_fastqc.zip", "", basename(zip.archive))
  })

  return(all.qc[! sapply(all.qc, is.null)])
}
timflutre/rutilsfastqc documentation built on May 31, 2019, 2:16 p.m.