#' Count and summarize long-reads RNA into a table
#'
#' A function that ouputs two tsv files containing counts of exon combinations
#'
#' @param alnsFile The file that store the long-read RNA alignments generated by Minimap2.
#' @param referencesFile A file that store reference coordinates of human genes.
#'
#' @return A list of two data frames. Fisrt data frame contains new exon coordinates
#' created and second data frame contains the exon combinations of each read.
#'
#' @examples
#' alignments <- system.file("extdata", "mlx_reads.sorted.bam",
#' package = "LSplicing")
#' referenceCoord <- system.file("extdata", "example_refCoord.gff3",
#' package = "LSplicing")
#'
#' \dontrun{
#' countReads(alns = alignments, refCoord = referenceCoord)
#' }
#' @export
#' @import GenomicRanges
#' @import GenomicAlignments
#' @import SummarizedExperiment
#' @import rtracklayer
#' @importFrom plyr rbind.fill
#' @importFrom utils write.table
countReads <- function(alnsFile, referencesFile) {
if (! (file.exists(alnsFile) && file.exists(referencesFile))) {
stop("No such file, please check the path to files")
}
# read a BAM file into a GAlignment object
alns <- GenomicAlignments::readGAlignments(alnsFile)
# read a BED file into a GRanges Object
refCoord <- rtracklayer::import(referencesFile)
# store results of exon combinations
results <- data.frame()
# store results of new exon coordinates
gList <- data.frame()
# Extract gene in refCoord
idx <- refCoord@elementMetadata@listData[["type"]] == "gene"
genes <- refCoord[idx]
# loop through every alignment and map them to according gene exons
for (k in seq_along(alns)) {
gene <- data.frame(union = SummarizedExperiment::assay(GenomicAlignments::summarizeOverlaps(genes, alns[k], inter.feature=FALSE)))
# If this read maps to more than one gene, then skip this read
if (sum(gene$reads) == 1) {
(matchIdx <- which(gene[,1] > 0))
genes[matchIdx]
# find ensembl gene ID of this read
geneId <- genes[matchIdx]@elementMetadata@listData[["gene_id"]]
# filter refCoord
dup <- sort(refCoord[refCoord@elementMetadata@listData[["gene_id"]] == geneId &
refCoord@elementMetadata@listData[["type"]] == "exon" &
refCoord@elementMetadata@listData[["transcript_type"]] == "protein_coding"])
# generate new exon coordinates
combined <- combineTranscripts(gList, dup, geneId)
newGranges <- combined[[1]]
gList <- combined[[2]]
# map read to new exon coordinates
read <- data.frame(union = SummarizedExperiment::assay(GenomicAlignments::summarizeOverlaps(newGranges,
alns[k], inter.feature=FALSE), byrow = TRUE))
sum(read$reads)
# Transpose the data frame
t <- t(read)
colnames(t) <- paste(rep("exon", ncol(t)), sep = "", 1:ncol(t))
newDf <- cbind(data.frame("index" = k, geneId), t)
# Assign row name
## use plyr::rbind.fill(x,y) to bind data frames
results <- plyr::rbind.fill(results, newDf)
}
}
r <- list(gList, results)
return(r)
}
# [END]
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.