#' @importFrom GenomicRanges GRanges
#' @importMethodsFrom GenomicRanges resize seqnames
.to_0_start_halfopen <-
function(gr)
{
resize(gr, width(gr) + 1L, fix = "end")
}
.from_0_start_halfopen <-
function(gr)
{
resize(gr, width(gr) - 1L, fix = "end")
}
#' @importFrom base64enc base64decode
.as_file <-
function(urls, output)
{
bam <- file(output, "wb")
on.exit(close(bam))
for (url in urls) {
if (startsWith(url$url, "data")) {
data <- sub("data:application/octet-stream;base64,", "", url$url)
base64decode(data, bam)
} else {
.htsstream(bam, url$url, url$headers)
}
}
output
}
#' Retrieve reads (BAM files) or variants (VCF files)
#' @rdname htsget
#'
#' @param granges `GRanges()` instance with genomic coordinates for
#' retrieval. Coordinates follow Bioconductor standards (1-based,
#' closed intervals) and are translated to GA4GH coordinates
#' (0-based, half-open intervals).
#' @param sample_id `character(1)` sample identifier
#' @param url `character(1)` data resource url, excluding endpoint and
#' sample information.
#' @param fields `character()` of BAM fields to be included; default
#' (`NULL`) includes all fields.
#' @param destination `character(1)` file for output; must not exist.
#'
#' @return `htsget_reads()` return the path to a BAM file containing
#' the requested information.
#'
#' @examples
#' gr <- GenomicRanges::GRanges("chr12:111766922-111817529")
#' sample_id <- "platinum/NA12878"
#' url <- "https://htsnexus.rnd.dnanex.us/v1"
#' fl <- tempfile(fileext=".bam")
#' bam <- htsget_reads(gr, sample_id, url, destination = fl)
#' Rsamtools::countBam(bam)
#'
#' @importMethodsFrom GenomicRanges seqnames start end
#' @export
htsget_reads <-
function(granges, sample_id, url, fields = NULL, destination)
{
stopifnot(
.is_single_granges(granges), .is_single_string(sample_id),
.is_single_string(url), .is_nonexistent_file(destination),
all(fields %in% names(.BAM_fields))
)
fields <-
if (is.null(fields)) {
""
} else {
sprintf("&fields=%s", paste(fields, collapse=","))
}
queries <- sprintf(
"%s/reads/%s?format=BAM&referenceName=%s&start=%s&end=%s%s",
url, sample_id,
seqnames(granges), start(granges), end(granges),
fields
)
content <- .htsget(queries[[1]])
.as_file(content$urls, destination)
}
#' @rdname htsget
#'
#' @return `BAMfields()` returns a data.frame describing available
#' fields.
#'
#' @examples
#' BAMfields()
#' fl <- tempfile(fileext=".bam")
#' fields <- c("RNAME", "POS", "CIGAR")
#' bam <- htsget_reads(gr, sample_id, url, fields, destination = fl)
#' names(Rsamtools::scanBam(bam)[[1]])
#'
#' @export
BAMfields <-
function()
{
as.data.frame(.BAM_fields)
}
.BAM_fields <- c(
QNAME = "Read names",
FLAG = "Read bit flags",
RNAME = "Reference sequence name",
POS = "Alignment position",
MAPQ = "Mapping quality score",
CIGAR = "CIGAR string",
RNEXT = "Reference sequence name of the next fragment template",
PNEXT = "Alignment position of the next fragment in the template",
TLEN = "Inferred template size",
SEQ = "Read bases",
QUAL = "Base quality scores"
)
#' @rdname htsget
#'
#' @return `htsget_variants()` returns a `character(1)` path to a VCF file
#' containing requested information.
#'
#' @examples
#' gr <- GenomicRanges::GRanges("12:112204691-112247789")
#' sample_id <- "1000genomes/20130502_autosomes"
#' url <- "https://htsnexus.rnd.dnanex.us/v1"
#' fl <- tempfile(fileext=".vcf")
#' vcf <- htsget_variants(gr, sample_id, url, destination = fl)
#' VariantAnnotation::readVcf(vcf)
#' @export
htsget_variants <-
function(granges, sample_id, url, destination)
{
stopifnot(
.is_single_granges(granges), .is_single_string(sample_id),
.is_single_string(url), .is_nonexistent_file(destination)
)
queries <- sprintf(
"%s/variants/%s?format=VCF&referenceName=%s&start=%s&end=%s",
url, sample_id,
seqnames(granges), start(granges), end(granges)
)
content <- .htsget(queries[[1]])
.as_file(content$urls, destination)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.