Nothing
.sampleDimMap <- list(
"Counts" = 2,
"Coverages" = 1,
"Deletions" = 1,
"Reference" = NULL
)
setGeneric("h5dapply", function(...,range) standardGeneric("h5dapply"))
setMethod("h5dapply", c("numeric"), function(..., blocksize, range){
stopifnot( is.numeric(range), length(range) == 2, diff(range) > 0, range[1] >= 1 )
blocksize = min(blocksize, diff(range))
retsize = ceiling(diff(range)/blocksize)
blockstart = seq( range[1], by = blocksize, length = retsize)
blockend = blockstart + blocksize - 1
blockend[length(blockend)] = min(blockend[length(blockend)], range[2])
range <- IRanges( start = blockstart, end = blockend )
.h5dapplyIRanges(..., range = range)
})
setMethod("h5dapply", c("IRanges"), function(..., range){
.h5dapplyIRanges(..., range = range)
})
setMethod("h5dapply", c("GRanges"), function(..., group, range){
ret <- lapply(seqlevels(range), function(chrom){
tmpranges <- ranges(keepSeqlevels(range, chrom, pruning.mode = "coarse"))
tmpgroup <- paste(group, chrom, sep="/")
.h5dapplyIRanges(..., group=tmpgroup, range = tmpranges)
})
names(ret) <- seqlevels(range)
ret
})
.h5dapplyIRanges <- function( filename, group, blocksize, FUN = function(x) x, ... , names = c("Counts", "Coverages", "Deletions", "Reference"), dims, range, samples = NULL, sampleDimMap = .sampleDimMap, verbose = FALSE, BPPARAM = NULL ) {
sampleData <- getSampleData( filename, group )
if( !is.null(samples) ){
stopifnot(all(samples %in% sampleData$Sample))
}else{
samples = sampleData$Sample
}
selectedColumns <- subset( sampleData, Sample %in% samples )$Column
f <- H5Fopen(filename, flags = "H5F_ACC_RDONLY")
g <- H5Gopen(f, group)
h5Ds <- lapply(names, function(n) H5Dopen(g, n))
names(h5Ds) <- names
sizes <- lapply( lapply( lapply(h5Ds, H5Dget_space), H5Sget_simple_extent_dims), function(x) x$size )
datasets = data.frame(
Name = names,
DimCount = sapply( sizes, length ),
stringsAsFactors = FALSE
)
if( missing(dims) ){
if(!("Counts" %in% names )){
dCount <- tryCatch(H5Dopen(g, "Counts"), error = function(e) e)
if(class(dCount) != "H5IdComponent"){
stop( "If dims is not specified the dataset 'Counts' must exist in the tallyFile" )
}else{
sCount <- H5Sget_simple_extent_dims(H5Dget_space(dCount))$size
datasets$PosDim = sapply(sizes, function(x) which(x == sCount[4]))
H5Dclose(dCount)
}
}else{
if(verbose){
message("Guessing chromosome length from 'Counts' dataset, if one of the other dimensions happens to match this length there might be weird behaviour here. In that case, specify the dimensions explicitly.")
}
datasets$PosDim = sapply(sizes, function(x) which(x == sizes$Counts[4]))
}
}else{
datasets$PosDim = dims
if( !all(dims <= datasets$DimCount & dims >= 1) )
stop( "The specified dimensions 'dims' do not match the structure of the data" )
}
chromlength <- sapply(datasets$Name, function(n) sizes[[n]][datasets$PosDim[datasets$Name == n]])
if( length(unique(chromlength)) == 1 ){
chromlength = chromlength[1]
}else{
stop( "The specified datasets do not have a common length in the specified dimensions!" )
}
if( missing(range) ){
range = c( 1, chromlength )
}
lapply(h5Ds, H5Dclose)
H5Gclose(g)
H5Fclose(f)
blockstart <- start(range)
blockend <- end(range)
stopifnot( blockstart[1] >= 1, blockend[length(blockend)] <= chromlength )
blocks = paste( blockstart, blockend, sep=":" )
applyFUN <- function( b ){
if(verbose)
message( "Processing Block #", b, ": ", blocks[b], sep="")
vec = vector(mode="list", length=nrow(datasets))
names(vec) = datasets$Name
for( i in seq_len(nrow(datasets)) ){
idx_list = vector(mode="list", length=datasets$DimCount[i] )
idx_list[[ datasets$PosDim[i] ]] <- blockstart[b]:blockend[b]
sampleDim <- sampleDimMap[[datasets$Name[i]]]
if(!is.null(sampleDim)){
if( sampleDim == datasets$PosDim[i] & !all( sampleData$Sample %in% samples) ){
stop("Specified binning dimension equals sample dimension - can't subset by sample")
}
idx_list[[ sampleDim ]] <- selectedColumns
}
vec[[i]] = h5read( filename, paste( group, datasets$Name[i], sep="/" ), index = idx_list )
if(names(vec)[i] == "Reference"){ #BEGIN: Explicit casting to integer since this now comes out as raw for some reason
vec[[i]] <- as.integer(vec[[i]])
} #END: Explicit casting to integer since this now comes out as raw for some reason
}
vec$h5dapplyInfo <- list( Blockstart = blockstart[b], Blockend = blockend[b], Datasets = datasets, Group = group )
FUN(vec, ... )
}
if(is.null(BPPARAM)){
ret = lapply(
seq(along=blocks),
applyFUN
)
}else{
ret = bplapply(seq(along=blocks),
function(b, blocks, datasets, ...) {
library(h5vc)
applyFUN(b)
}, blocks=blocks, datasets=datasets, BPPARAM = BPPARAM
)
}
base::names(ret) = blocks
return( ret )
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.