R/countMatrixFunctions.R

Defines functions countFinalRegions

Documented in countFinalRegions

#' countFinalRegions
#' @description count reads falling within the final regions.
#'
#' @param regionsGRanges a GRanges objects representing the peaks to compute
#' the coverage, with a "k-carriers" mcols.
#' (tipically generated by finalRegions function).
#' @param readsFilePath the filepath of bam or bed files necessary to compute
#' the coverage.
#' @param fileType the file type of the input files.
#' @param minCarriers minimum number of carriers (samples).
#' @param genomeName code name of the genome of reads files (i.e. "mm9").
#' @param onlyStdChrs a flag indicating if to keep only the standard chromosomes
#' @param ignStrandSO a flag indicating if to ignore the reads strand.
#' (see GenomicAlignments::summarizeOverlaps).
#' @param modeSO the mode to use, default is "Union".
#' (see GenomicAlignments::summarizeOverlaps).
#' @param saveFlag a flag indicating if to save the results.
#' @param savePath the path where to store the results.
#' @param verbose verbose output.
#' @param carrierscolname character describing the name of the column within the
#' carriers number (default is "k-carriers").
#'
#' @return A SummarizedExperiment object containing as assays the read counts
#' matrix with regions as rows and samples as columns, and as rowRanges
#' the GRanges object representing the peaks used as rows in the matrix.
#' @export
#' @importFrom GenomicAlignments summarizeOverlaps
#' @importFrom SummarizedExperiment assay SummarizedExperiment
#' @importFrom BiocGenerics start end
#' @importFrom GenomeInfoDb seqnames
#' @importFrom utils write.table
#' @examples
#' filename <- system.file("extdata/regions/regions.rds", package="DEScan2")
#' regionsGR <- readRDS(file=filename)
#' reads.path <- system.file("extdata/bam", package="DEScan2")
#' finalRegionsSE <- countFinalRegions(regionsGRanges=regionsGR,
#'     readsFilePath=reads.path, fileType="bam", minCarriers=1,
#'     genomeName="mm9", onlyStdChrs=TRUE, ignStrandSO=TRUE, saveFlag=FALSE,
#'     verbose=TRUE)
#' library("SummarizedExperiment")
#' assay(finalRegionsSE) ## matrix of counts
#' rowRanges(finalRegionsSE) ## the GRanges of the input regions
countFinalRegions <- function(regionsGRanges, readsFilePath=NULL,
            fileType=c("bam", "bed"), minCarriers=2, genomeName=NULL,
            onlyStdChrs=FALSE, carrierscolname="k-carriers", ignStrandSO=TRUE, modeSO="Union", saveFlag=FALSE,
            savePath="finalRegions", verbose=TRUE)
{
    match.arg(fileType)
    stopifnot(is(regionsGRanges, "GRanges"))
    if(is.null(readsFilePath)) {stop("readsFilePath cannot be NULL!")}

    idxK <- which(regionsGRanges@elementMetadata[[carrierscolname]] >= minCarriers)
    regionsGRanges <- regionsGRanges[idxK,]

    if(!is.null(genomeName))
    {
        regionsGRanges <- setGRGenomeInfo(GRanges=regionsGRanges,
                                          genomeName=genomeName)
    }


    if(fileType == "bam")
    {
        readsFiles <- list.files(path=readsFilePath, full.names=TRUE,
                                pattern=".bam$")
    }
    else
    {
        readsFiles <- list.files(path=readsFilePath, full.names=TRUE,
                                pattern=".bed$")
    }
    if(length(readsFiles) == 0 ) stop("No reads files found!")
    if(verbose) message("Final regions on ", length(readsFiles), " files.")

    fileReadsList <- lapply(readsFiles, function(file)
    {
        fileReads <- constructBedRanges(filename=as.character(file),
                                        filetype=fileType,
                                        genomeName=genomeName,
                                        onlyStdChrs=onlyStdChrs)
        return(fileReads)
    })
    names(fileReadsList) <- readsFiles

    summRegMat <- vapply(fileReadsList, function(fileReads)
    {
        summReg <- GenomicAlignments::summarizeOverlaps(
                                    features=regionsGRanges,
                                    reads=fileReads,
                                    ignore.strand=ignStrandSO,
                                    mode=modeSO)
        return(SummarizedExperiment::assay(summReg))
    }, integer(length(regionsGRanges)))
    if(!is.matrix(summRegMat)) {
        if(length(summRegMat) == length(fileReadsList))
        {
            ## only one peak found
            summRegMat <- as.matrix(summRegMat)
            summRegMat <- t(summRegMat)
        }
    }

    regionsRN <- paste0(GenomeInfoDb::seqnames(regionsGRanges), ":",
                        BiocGenerics::start(regionsGRanges), "-",
                        BiocGenerics::end(regionsGRanges))

    rownames(summRegMat) <- regionsRN
    if(saveFlag)
    {
        dir.create(path=savePath, recursive=TRUE, showWarnings=FALSE)
        datename <- paste0(strsplit(gsub(pattern=":", replacement=" ",
                                        date()), " ")[[1]], collapse="_")
        filename <- paste0("regions_", datename, "_minK", minCarriers,
                            "_mso", modeSO, ".tsv")
        utils::write.table(x=summRegMat, file=file.path(savePath, filename),
                            quote=FALSE, sep="\t")
        if(verbose) message("file ", filename, " saved on disk!")
    }
    rownames(summRegMat) <- names(regionsGRanges)
    summExp <- SummarizedExperiment::SummarizedExperiment(assays=summRegMat,
                                                rowRanges=regionsGRanges,
                                                colData=data.frame(readsFiles))
    return(summExp)
}
drighelli/DEScan2 documentation built on March 21, 2023, 3:06 p.m.