#' Collects raw counts of gene (or transcript) data generated by kallisto.
#' @description Extracts raw counts from each of the "abundance.tsv" files generated by kallisto (\url{https://pachterlab.github.io/kallisto/about}) expression analysis. The counts are put into a matrix (genes by sample), and rounded. This step collects the raw count data ready for further expression analysis, or for differential expression, using \code{\link{DESeq2}}.
#' @param baseDir A character string of the directory where kallisto results are stored. For example, baseDir = "kallistoResults" will search "kallistoResults/*/abundance.tsv".
#' @return matrix. A matrix of (rounded) raw counts of genes (or transcripts) for each sample.
#' @seealso kallisto (\url{https://pachterlab.github.io/kallisto/about})
#' @export
collectKallistoCounts <- function(baseDir = "./kallistoResults"){
sample_id <- dir(baseDir)
consolData <- lapply(sample_id, consolidateData, baseDir)
kallistoRes <- do.call(cbind, consolData)
return(as.matrix(kallistoRes))
}
#' Collects raw counts of genes, based on transcript data generated by kallisto.
#' @description Extracts raw counts of transcripts from each of the "abundance.tsv" files generated by kallisto (\url{https://pachterlab.github.io/kallisto/about}) expression analysis and converts this to gene counts. The counts are put into a matrix (genes by sample). This step collects the raw count data ready for further expression analysis, or for differential expression, using \code{\link{DESeq2}}.
#' @param baseDir A character string of the directory where kallisto results are stored. For example, baseDir = "kallistoResults" will search "kallistoResults/*/abundance.tsv".
#' @return matrix. A matrix of raw counts of genes for each sample.
#' @import tximport
#' @import readr
#' @import tximportData
#' @export
collectKallistoCountsTXItoGene <- function(baseDir = "./kallistoResults"){
importDir <- system.file("extdata", package="RNASeqAnalysis")
tx2gene <- read.csv(file.path(importDir, "tx2gene.csv"))
sample_id <- dir(baseDir)
files <- file.path(baseDir, sample_id, "abundance.tsv")
names(files) <- sample_id
txi <- tximport(files, type="kallisto", tx2gene=tx2gene, reader=read_tsv)
return(txi)
}
#' Adds the output kallisto directory to the original datafile.
#' @description GOES HERE
#' @param dataFile MORE
#' @param countDat MORE
#' @return dataframe MORE
#' @seealso \code{\link{collectKallistoCounts}}
#' @export
addKallistoResToTable <- function(dataFile, countDat){
sample_id <- colnames(countDat)
datKal <- lapply(sample_id, function(x) {
datKallisto <- dataFile[grepl(paste("^", x, "(_1.fastq.gz|_2.fastq.gz|.fastq.gz)$", sep=""), dataFile$FILTEREDFILE), ]
datKallisto$kallistoRes <- x
return(datKallisto)
}
)
datKal <- do.call(rbind, datKal)
datKal <- datKal[!duplicated(datKal$kallistoRes), ]
}
# private function
consolidateData <- function(sample_id, baseDir){
baseDir <- "./kallistoResults"
abundanceFile <- paste(baseDir, sample_id, "abundance.tsv", sep="/")
abundance <- read.csv(abundanceFile, sep = "\t", row.names = "target_id")
abundance <- round(abundance["est_counts"])
colnames(abundance) <- sample_id
return (abundance)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.