#* Copyright (C) 2018 the DEUS contributors.
#* Website: https://github.com/timjeske/DEUS
#*
#* This file is part of the DEUS R package.
#*
#* The DEUS R package is free software: you can redistribute it and/or modify
#* it under the terms of the GNU General Public License as published by
#* the Free Software Foundation, either version 3 of the License, or
#* (at your option) any later version.
#*
#* This program is distributed in the hope that it will be useful,
#* but WITHOUT ANY WARRANTY; without even the implied warranty of
#* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#* GNU General Public License for more details.
#*
#* You should have received a copy of the GNU General Public License
#* along with this program. If not, see <http://www.gnu.org/licenses/>.
#' Create sequence-based count tables
#'
#' This function creates count tables summarizing counts for each observed sequence in a set of fastQ files.
#' Input files are taken from the given input directory and are selected based on the given fastQ suffix.
#' @param in_dir Input directory containing fastQ files to be analyzed
#' @param fq_suffix Suffix of your fastq files, default is ".fq.gz"
#' @param pheno_info A data frame with sample names as row names and file names in column 'sample'.
#' The sample names will be used as headers in the count table.
#' If not given the file names of the fastQ files count table will be the headers of the count table.
#' @return A data frame with unique sequences as row names and the correspoding number of occurrences for each sample
#' @export
createCountTableFromFastQs <- function(in_dir,fq_suffix = ".fq.gz",pheno_info) {
file.list <- list.files(in_dir)
fastqs <- file.list[endsWith(file.list,fq_suffix)]
fastqs <- paste(in_dir,fastqs,sep="/")
if(!missing(pheno_info) && "sample" %in% colnames(pheno_info)) {
pheno_info$sampleName = rownames(pheno_info)
pheno_info$baseName = basename(as.character(pheno_info$sample))
fastqs <- paste(in_dir, pheno_info$baseName, sep="/")
}
message(paste("Counting all reads in",fastqs[1]))
content <- ShortRead::readFastq(fastqs[1])
counts <- ShortRead::tables(content,n=length(content))
countTable <- as.data.frame(counts$top)
names(countTable) <- c(basename(fastqs[1]))
if(!missing(pheno_info) && "sample" %in% colnames(pheno_info)) {
names(countTable) <- c(as.character(pheno_info$sampleName[basename(fastqs[1])==pheno_info$baseName]))
}
countTable$Read <- row.names(countTable)
fastqs <- fastqs[-1]
for(f in fastqs) {
message(paste("Counting all reads in",f))
content <- ShortRead::readFastq(f)
counts <- ShortRead::tables(content,n=length(content))
counts <- as.data.frame(counts$top)
names(counts) <- c(basename(f))
if(!missing(pheno_info) && "sample" %in% colnames(pheno_info)) {
names(counts) <- c(as.character(pheno_info$sampleName[basename(f)==pheno_info$baseName]))
}
counts$Read <- row.names(counts)
countTable <- plyr::join(countTable, counts, type="full")
}
row.names(countTable) <- countTable$Read
countTable$Read <- NULL
countTable[is.na(countTable)] <- 0
return(countTable)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.