R/methods-SRFilter.R

Defines functions compose alignDataFilter alignQualityFilter dustyFilter srdistanceFilter polynFilter nFilter strandFilter occurrenceFilter .occurrenceName uniqueFilter positionFilter chromosomeFilter idFilter .getAlphabetFrequency

Documented in alignDataFilter alignQualityFilter chromosomeFilter compose dustyFilter idFilter nFilter occurrenceFilter polynFilter positionFilter srdistanceFilter strandFilter uniqueFilter

setMethod(.srValidity, "SRFilter", function (object) {
    msg <- NULL
    fmls <- formals(object)
    if (length(fmls) != 1 || names(fmls)[[1]] != "x")
        msg <- c(msg, paste("'filter' must have one argument, 'x'"))
    if (is.null(msg)) TRUE else msg
})

setMethod(srFilter, "missing", function(fun, name, ...) {
    srFilter(function(x) !logical(length(x)), name=name, ...)
})
         
setMethod(srFilter, "function",
          function(fun, name, ...) 
{
    name <- mkScalar(as.character(name))
    fmls <- formals(fun)
    if (length(fmls) != 1 || names(fmls)[[1]] != "x")
        .throw(SRError("UserArgumentMismatch",
                       "'filter' must have one argument, 'x'"))

    env <- new.env(parent=environment(fun))
    env[[".stats"]] <- NULL
    fun <- eval(substitute(function(x) {
        res <- FUN(x)
        SRFilterResult(res, NAME)
    }, list(FUN=fun, NAME=name)))
    environment(fun) <- env

    new("SRFilter", fun, name=name, ...)
})

setMethod(srFilter, "SRFilter", function(fun, name, ...) {
    slot(fun, ".Data")
})

setMethod(name, "SRFilter", function(x, ...) slot(x, "name"))

.getAlphabetFrequency <- function(x, ...)
{
    if (is(x, "ShortRead"))
        alphabetFrequency(sread(x), ...)
    else
        alphabetFrequency(x, ...)
}

idFilter <-
    function(regex=character(0), fixed=FALSE, exclude=FALSE,
             .name="idFilter")
{
    .check_type_and_length(regex, "character", 0:1)
    srFilter(function(x) {
        .idx <- logical(length(x))
        .idx[grep(regex, as.character(id(x)), fixed=fixed)] <- TRUE
        if (exclude) .idx <- !.idx
        .idx
    }, name = .name)
}

chromosomeFilter <-
    function(regex=character(0), fixed=FALSE, exclude=FALSE,
             .name="ChromosomeFilter")
{
    .check_type_and_length(regex, "character", 0:1)
    srFilter(function(x) {
        .idx <- logical(length(x))
        .idx[grep(regex, chromosome(x), fixed=fixed)] <- TRUE
        if (exclude) .idx <- !.idx
        .idx
    }, name=.name)
}

positionFilter <-
    function(min=-Inf, max=Inf, .name="PositionFilter")
{
    .check_type_and_length(min, "numeric", 1)
    .check_type_and_length(max, "numeric", 1)
    srFilter(function(x) {
        !is.na(position(x)) & position(x) >= min &
            position(x) <= max
    }, name=.name)
}

uniqueFilter <-
    function(withSread=TRUE, .name="UniqueFilter")
{
    msg <-
        if (withSread) "occurrenceFilter(withSread=TRUE)"
        else "occurrenceFilter"
    .Defunct(msg, package="ShortRead")
}

## withSread
##   TRUE: sread, chromosome, position, strand
##   FALSE: chromosome, position, strand
##   NA: sread
.occurrenceName <-
    function(min, max, withSread, duplicates)
{
    if (!is.character(duplicates))
    {
        duplicates <- deparse(substitute(duplicates, env=parent.frame()))
        if (length(duplicates) > 1)
            duplicates <- "custom"
    }
    sprintf("%s\n  min=%d max=%d withSread='%s'\n  duplicates='%s'",
            "OccurrenceFilter", min, max, withSread, duplicates)
}

occurrenceFilter <-
    function(min=1L, max=1L, withSread=c(NA, TRUE, FALSE),
             duplicates=c("head", "tail", "sample", "none"),
             .name=.occurrenceName(min, max, withSread,
                 duplicates))
{
    .check_type_and_length(min, "numeric", 1L)
    .check_type_and_length(max, "numeric", 1L)
    if (missing(withSread))
        withSread <- withSread[1]
    .check_type_and_length(withSread, "logical", 1L)
    if (is.character(duplicates))
        duplicates <- match.arg(duplicates)
    if (max < min)
        .throw(SRError("UserArgumentMismatch",
                       "'min' must be <= 'max'"))
    srFilter(function(x) {
        rnk <- 
            if (is(x, "AlignedRead")) {
                if (is.na(withSread)) srrank(sread(x))
                else srrank(x, withSread=withSread)
            } else srrank(x)
        t <- tabulate(rnk)
        result <- rnk %in% which(t >= min & t <= max)
        if (!(is.character(duplicates) && "none" == duplicates)) {
            q <- which(rnk %in% which(t > max))
            if(length(q) != 0L) {
                x <- tapply(q, rnk[q], duplicates, max, simplify=FALSE)
                result[unlist(x, use.names=FALSE)] <- TRUE
            }
        }
        result
    }, name=.name)
}

strandFilter <- function(strandLevels=character(0),
                         .name="StrandFilter")
{
    .check_type_and_length(strandLevels, "character", NA)
    srFilter(function(x) strand(x) %in% strandLevels,
             name=.name)
}

nFilter <- function(threshold=0L, .name="CleanNFilter") 
{
    .check_type_and_length(threshold, "numeric", 1)
    srFilter(function(x) {
        .getAlphabetFrequency(x, baseOnly=TRUE)[,"other"] <= threshold
    }, name=.name)
}

polynFilter <- function(threshold=0L,
                        nuc=c("A", "C", "T", "G", "other"),
                        .name="PolyNFilter")
{
    .check_type_and_length(threshold, "numeric", 1)
    .check_type_and_length(nuc, "character", NA)
    ok <- eval(formals()[["nuc"]])
    if (!all(nuc %in% ok))
        .arg_mismatch_value_err("nuc",
                                paste(nuc, collapse=", "),
                                ok)
    srFilter(function(x) {
        alf <- .getAlphabetFrequency(x, baseOnly=TRUE)
        apply(alf[,nuc,drop=FALSE], 1, max) <= threshold
    }, name=.name)
}

srdistanceFilter <- function(subject=character(0), threshold=0L,
                             .name="SRDistanceFilter")
{
    .check_type_and_length(subject, "character", NA)
    .check_type_and_length(threshold, "numeric", 1)
    srFilter(function(x) {
        .idx <- !logical(length(x))
        dist <- srdistance(x, subject)
        for (i in seq_along(dist))
            .idx <- .idx & dist[[i]] >= threshold
        .idx
    }, name=.name)
}

dustyFilter <-
    function(threshold=Inf, batchSize=NA, .name="DustyFilter")
{
    .check_type_and_length(threshold, "numeric", 1)
    srFilter(function(x) dustyScore(x, batchSize) <= threshold,
             name=.name)
}

alignQualityFilter <- function(threshold=0L,
                               .name="AlignQualityFilter")
{
    .check_type_and_length(threshold, "numeric", 1)
    srFilter(function(x) quality(alignQuality(x)) >= threshold,
             name=.name)
}

alignDataFilter <- function(expr=expression(),
                            .name="AlignDataFilter")
{
    .check_type_and_length(expr, "expression", NA)
    srFilter(function(x) eval(expr, pData(alignData(x))),
             name=.name)
}

compose <-
    function(filt, ..., .name)
{
    lst <- if (missing(filt)) list(...) else list(filt, ...)
    for (`filt, ...` in lst)
        .check_type_and_length(`filt, ...`, "SRFilter", NA)
    if (missing(.name))
        .name <- paste(sapply(lst, name), collapse=" o ")
    srFilter(function(x) {
        .idx <- SRFilterResult(!logical(length(x)))
        for (elt in rev(lst))
            .idx <- .idx & elt(x)
        .idx
    }, name =.name)
}

setMethod(show, "SRFilter", function(object) {
    cat("class:", class(object), "\n")
    cat("name:", name(object), "\n")
    cat("use srFilter(object) to see filter\n")
})

setAs("SRFilter", "FilterRules", function(from) {
    exprs <- list(from)
    names(exprs) <- name(from)
    FilterRules(exprs)
})

setMethod(c, "SRFilter", function (x, ..., recursive = FALSE) {
    if (missing(x))
        args <- unname(list(...))
    else
        args <- unname(list(x, ...))
    args <- list(x, ...)
    rules <- lapply(args, as, "FilterRules")
    do.call(c, c(rules, recursive = recursive))
})

Try the ShortRead package in your browser

Any scripts or data that you put into this service are public.

ShortRead documentation built on Nov. 8, 2020, 8:02 p.m.