Nothing
### =========================================================================
### bam_tally program
### -------------------------------------------------------------------------
###
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Raw tally result
###
setClass("TallyIIT", representation(genome = "GmapGenome",
bam = "BamFile",
xs = "logical",
read_pos = "logical",
nm = "logical",
codon = "logical"),
contains = "IIT")
TallyIIT <- function(ptr, genome, bam, xs, read_pos, nm, codon) {
new("TallyIIT", ptr = ptr, genome = genome, bam = bam, xs = xs,
read_pos = read_pos, nm = nm, codon = codon)
}
setMethod("genome", "TallyIIT", function(x) x@genome)
bamFile <- function(x) x@bam
setMethod("show", "TallyIIT", function(object) {
cat("Tally IIT object for '", path(bamFile(object)), "' on '",
genome(genome(object)), "'\n", sep = "")
})
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### High-level wrapper
###
setGeneric("bam_tally",
function(x, param, ...)
standardGeneric("bam_tally"),
signature = "x")
setMethod("bam_tally", "BamFile",
function(x, param, ...)
{
x <- GmapBamReader(x)
callGeneric()
})
setMethod("bam_tally", "character",
function(x, param, ...)
{
x <- BamFile(x)
callGeneric()
})
setMethod("bam_tally", "GmapBamReader",
function(x, param, ...)
{
param_list <- as.list(param)
args <- list(...)
param_list[names(args)] <- args
genome <- param_list$genome
##verify genome has been created
param_list$db <- genome(genome)
param_list$genome_dir <- path(directory(genome))
if (!.gmapGenomeCreated(genome)) {
stop("The GmapGenome object has not yet been created. ",
"One solution is to run the GmapGenome constructor ",
"with create=TRUE")
}
param_list$genome <- NULL
TallyIIT(do.call(.bam_tally_C, c(list(x), param_list)), genome,
as(x, "BamFile"), xs=param_list$xs,
read_pos=param_list$read_pos, nm=param_list$nm,
codon=!is.null(param_list$exon_iit))
})
variantSummary <- function(x, read_pos_breaks = NULL,
keep_ref_rows = FALSE, read_length = NA_integer_,
high_nm_score = NA_integer_)
{
read_length <- as.integer(read_length)
if (length(read_length) != 1L) {
stop("'read_length' must be a single integer")
}
high_nm_score <- as.integer(high_nm_score)
stopifnot(length(high_nm_score) == 1L)
if (!is.na(high_nm_score) && !x@nm) {
stop("'high_nm_score is not NA but NM scores were not tallied")
}
if (length(read_pos_breaks) > 0L) {
if (!x@read_pos) {
stop("'read_pos_breaks' non-empty, but read positions were not tallied")
}
read_pos_breaks <- as.integer(read_pos_breaks)
if (any(is.na(read_pos_breaks)))
stop("'read_pos_breaks' should not contain missing values")
if (length(read_pos_breaks) < 2)
stop("'read_pos_breaks' needs at least two elements to define a bin")
if (is.unsorted(read_pos_breaks))
stop("'read_pos_breaks' must be sorted")
}
tally <- .Call(R_tally_iit_parse, x@ptr,
read_pos_breaks,
NULL, read_length, x@xs, high_nm_score)
tally_names <- c("seqnames", "pos", "ref", "alt",
"n.read.pos", "n.read.pos.ref",
"count", "count.ref", "raw.count.total", "count.total",
"count.plus", "count.plus.ref",
"count.minus", "count.minus.ref",
"count.del.plus", "count.del.minus",
"read.pos.mean", "read.pos.mean.ref",
"read.pos.var", "read.pos.var.ref",
"mdfne", "mdfne.ref", "codon.strand",
"count.xs.plus", "count.xs.plus.ref",
"count.xs.minus", "count.xs.minus.ref",
"count.high.nm", "count.high.nm.ref")
break_names <- character()
if (length(read_pos_breaks) > 0L) {
read_pos_breaks <- as.integer(read_pos_breaks)
break_names <- paste("readPosCount", head(read_pos_breaks, -1),
tail(read_pos_breaks, -1), sep = ".")
tally_names <- c(tally_names, break_names)
}
names(tally) <- tally_names
tally$codon.strand <- if (x@codon) {
structure(tally$codon.strand, class="factor", levels=levels(strand()))
}
tally <- Filter(Negate(is.null), tally)
if (!keep_ref_rows) {
variant_rows <- !is.na(tally$alt)
if (!all(variant_rows))
tally <- lapply(tally, `[`, variant_rows)
}
meta_names <- setdiff(names(tally),
c("seqnames", "pos", "ref", "alt", "count",
"count.ref", "count.total"))
genome <- genome(x)
indel <- nchar(tally$ref) == 0L | nchar(tally$alt) == 0L
metacols <- DataFrame(tally[meta_names])
mcols(metacols) <- variantSummaryColumnDescriptions(read_pos_breaks, x@xs,
high_nm_score, x@codon)
gr <- with(tally,
VRanges(seqnames,
IRanges(pos,
width = ifelse(nchar(alt) == 0 & !is.na(alt),
nchar(ref), 1L)),
ref, alt,
count.total, count.ref, count,
seqlengths = seqlengths(genome)))
mcols(gr) <- metacols
checkTallyConsistency(gr)
## important to preserve seqlevel ordering compatible with 'genome'
seqinfo(gr) <- merge(seqinfo(genome), seqinfo(bamFile(x)))
gr <- keepSeqlevels(gr, intersect(seqlevels(gr), seqlevels(bamFile(x))))
gr <- normalizeIndelAlleles(gr, genome)
gr
}
checkTallyConsistency <- function(x) {
with(mcols(x), {
stopifnot(all(count.plus + count.minus == altDepth(x), na.rm=TRUE))
stopifnot(all(count.plus.ref + count.minus.ref == refDepth(x)))
})
}
normalizeIndelAlleles <- function(x, genome) {
is.indel <- nchar(ref(x)) == 0L | (nchar(alt(x)) == 0L & !is.na(alt(x)))
if (any(is.indel)) {
indels <- x[is.indel]
flanks <- flank(indels, 1)
anchor <- getSeq(genome, flanks)
ref(x)[is.indel] <- paste0(anchor, ref(indels))
alt(x)[is.indel] <- paste0(anchor, alt(indels))
ranges(x)[is.indel] <- resize(ranges(flanks), nchar(ref(x)[is.indel]))
}
x
}
guessReadLengthFromBam <- function(x, n=100L) {
ga <- readGAlignments(BamFile(x, yieldSize=n))
readlen <- unique(qwidth(ga))
if (length(readlen) != 1L)
NA
else readlen
}
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Low-level wrappers
###
normArgSingleInteger <- function(x) {
name <- deparse(substitute(x))
x <- as.integer(x)
if (!isSingleInteger(x))
stop("'", name, "' should be a single, non-NA integer")
x
}
normArgSingleNumber <- function(x) {
name <- deparse(substitute(x))
x <- as.numeric(x)
if (!isSingleNumber(x))
stop("'", name, "' should be a single, non-NA number")
x
}
normArgTRUEorFALSE <- function(x) {
name <- deparse(substitute(x))
if (!isTRUEorFALSE(x))
stop("'", name, "' should be TRUE or FALSE")
x
}
normArgSingleCharacterOrNULL <- function(x) {
name <- deparse(substitute(x))
if (!is.null(x) && (!is(x, "character") || length(x) != 1))
stop("'", name, "' should be a single character value")
x
}
.bam_tally_C <- function(bamreader, genome_dir = NULL, db = NULL,
which = NULL,
high_base_quality = 0L, desired_read_group = NULL,
alloclength = 200000L,
minimum_mapq = 0L, good_unique_mapq = 35L,
maximum_nhits = 1000000L,
concordant_only = FALSE, unique_only = FALSE,
primary_only = FALSE, ignore_duplicates = FALSE,
min_depth = 0L, variant_strand = 0L, variant_pct = 0,
ignore_query_Ns = FALSE,
indels = FALSE,
blocksize = 1000L, verbosep = FALSE,
min_softclip = 0L, max_softclip = 0L,
exon_iit = NULL, xs = FALSE, read_pos = FALSE,
min_base_quality = 0L, noncovered = FALSE, nm = FALSE)
{
if (!is(bamreader, "GmapBamReader"))
stop("'bamreader' must be a GmapBamReader")
if (length(which) > 0L) {
which <- list(as.character(seqnames(which)), start(which), end(which))
} else which <- NULL
if (!is.null(genome_dir) && !isSingleString(genome_dir))
stop("'genome_dir' must be NULL or a single, non-NA string")
if (!is.null(db) && !isSingleString(db))
stop("'db' must be NULL or a single, non-NA string")
if (!is.null(desired_read_group) && !isSingleString(desired_read_group))
stop("'desired_read_group' must be NULL or a single, non-NA string")
.Call(R_Bamtally_iit, bamreader@.extptr, genome_dir, db, which,
desired_read_group,
normArgSingleInteger(alloclength),
normArgSingleInteger(minimum_mapq),
normArgSingleInteger(good_unique_mapq),
normArgSingleInteger(maximum_nhits),
normArgTRUEorFALSE(concordant_only),
normArgTRUEorFALSE(unique_only),
normArgTRUEorFALSE(primary_only),
normArgTRUEorFALSE(ignore_duplicates),
normArgSingleInteger(min_depth),
normArgSingleInteger(variant_strand),
normArgSingleNumber(variant_pct),
normArgTRUEorFALSE(ignore_query_Ns),
normArgTRUEorFALSE(indels),
normArgSingleInteger(blocksize),
normArgTRUEorFALSE(verbosep),
normArgSingleInteger(min_softclip),
normArgSingleInteger(max_softclip),
normArgSingleCharacterOrNULL(exon_iit),
normArgTRUEorFALSE(xs),
normArgTRUEorFALSE(read_pos),
normArgSingleInteger(min_base_quality),
normArgTRUEorFALSE(noncovered),
normArgTRUEorFALSE(nm))
}
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Column metadata
###
variantSummaryColumnDescriptions <-
function(read_pos_breaks, xs, high_nm, codon)
{
desc <- c(
n.read.pos = "Number of unique read positions for the ALT",
n.read.pos.ref = "Number of unique read positions for the REF",
raw.count.total = "Total depth, before quality filtering",
count.plus = "Positive strand ALT count",
count.plus.ref = "Positive strand REF count",
count.minus = "Negative strand ALT count",
count.minus.ref = "Negative strand REF count",
count.del.plus = "Plus strand deletion count",
count.del.minus = "Count strand deletion count",
read.pos.mean = "Average read position for the ALT",
read.pos.mean.ref = "Average read position for the ALT",
read.pos.var = "Variance in read position for the ALT",
read.pos.var.ref = "Variance in read position for the REF",
mdfne = "Median distance from nearest end of read for the ALT",
mdfne.ref = "Median distance from nearest end of read for the REF",
if (codon) {
c(codon.strand = "Strand of transcription for the codon")
},
if (xs) {
c(count.xs.plus = "Plus strand XS counts",
count.xs.plus.ref = "Plus strand reference XS counts",
count.xs.minus = "Minus strand XS counts",
count.xs.minus.ref = "Minus strand reference XS counts")
},
count.high.nm = paste("Count of reads with >=", high_nm,
"NM score for the ALT"),
count.high.nm.ref = paste("Count of reads with >=", high_nm,
"NM score for the REF"))
if (length(read_pos_breaks) > 0L) {
break_desc <- paste0("Raw ALT count in read position range [",
head(read_pos_breaks, -1), ",",
tail(read_pos_breaks, -1), ")")
desc <- c(desc, break_desc)
}
DataFrame(Description = desc)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.