Nothing
binGenome <- function( reference, binsize = 1e6, chroms = seqnames(reference) ){
stopifnot(class(reference) == "BSgenome")
lengths <- seqlengths(reference)[chroms]
ret <- do.call(rbind, lapply(names(lengths), function(chrom){
res <- data.frame(
Chrom = chrom,
Start = seq(1, lengths[chrom], binsize)
)
if( lengths[chrom] > binsize){
res$End <- c( seq(binsize, lengths[chrom], binsize), lengths[chrom] )
}else{
res$End <- c(lengths[chrom])
}
res
})
)
ret <- GRanges(seqnames = ret$Chrom, ranges = IRanges(start = ret$Start, end = ret$End))
seqlengths(ret) <- lengths
return(ret)
}
tallyRanges <- function(
bamfiles, ranges, reference, q=25, ncycles = 10, max.depth=1e6
){
stopifnot(class(ranges) == "GRanges")
strand(ranges) <- "*"
stopifnot(class(reference) %in% c( "BSgenome" ))
tmp = bplapply( seq_len(length(ranges)), function(rangeid){
tallyBAM( bamfiles, chr=seqnames(ranges)[rangeid], start(ranges)[rangeid], end(ranges)[rangeid], q, ncycles, max.depth )
})
ref <- getSeq(reference, ranges)
ret <- bplapply( seq_along(tmp), function(i){
prepareForHDF5( tmp[[i]], ref[[i]] )
}
)
return(ret)
}
tallyRangesBatch <- function(
tallyFile, study, bamfiles, ranges, reference,
q = 25, ncycles = 10, max.depth=1e6,
regID = "Tally",
res = list("ncpus" = 2, "memory" = 24000, "queue"="research-rh6"),
written = c(), wrfile = "written.jobs.RDa", waitTime = Inf
) {
stopifnot(class(ranges) == "GRanges")
strand(ranges) <- "*"
stopifnot(class(reference) == "BSgenome")
if(!file.exists(tallyFile)){
print("Preparing tally file since it does not exist.")
lapply( unique(seqnames(ranges)), function(chrom) {
prepareTallyFile(tallyFile, study, chrom, seqlengths(reference)[chrom], length(bamFiles))
})
}
# require(BatchJobs)
# check if registry is there
if(file.exists(paste0( regID, "-files"))) {
print("Registry already exists.")
reg = loadRegistry(paste0( regID, "-files" ))
toSubmit = setdiff(findJobs(reg), findDone(reg))
submitJobs(reg, resources = res, ids=toSubmit)
} else {
print("Registry does not exist.")
reg = makeRegistry(regID, seed = 23, packages = c("h5vc", "GenomicRanges"))
reference = getSeq(reference, ranges)
# submit jobs
batchFUN = function( rangeid, bamfiles, q, ncycles, max.depth, reference, ranges ) {
#require(h5vc)
#require(GenomicRanges)
tallyBAM( bamfiles, chr=seqnames(ranges)[rangeid], start(ranges)[rangeid], end(ranges)[rangeid], q, ncycles, max.depth, reference = reference[[rangeid]] )
}
batchMap(reg, fun = batchFUN, seq_along(ranges), more.args = list(bamfiles = bamfiles, q = q, ncycles = ncycles, max.depth = max.depth, reference = reference, ranges = ranges))
submitJobs(reg, resources = res)
}
lastWrite = as.numeric(Sys.time())
while(!all(findJobs(reg) %in% written)){
toWrite = setdiff(findDone(reg), written)
for(job in toWrite){
if(verbose){
print(paste("Processing job", job))
print(paste("Written", length(written), "out of", length(findJobs(reg))))
}
tally = loadResult(reg, job)
writeToTallyFile(list("TheTaly" = tally), tallyFile, study, ranges[job], samples = NULL)
if(verbose){
print(paste("Written results from Job", job))
}
written = c(written, job)
save(written, file=wrfile)
lastWrite = as.numeric(Sys.time())
}
Sys.sleep(5)
if(as.numeric(Sys.time())-lastWrite > waitTime & isEmpty(setdiff(findDone(reg), written))) {
print(paste(length(setdiff(findJobs(reg), written)), "jobs were not written!"))
print("Exiting now!")
break
}
}
}
writeToTallyFile <- function( theData, file, study, ranges, samples = NULL ){
for(i in seq(length(theData))){
chrom <- seqnames(ranges)[i]
data <- theData[[i]]
start <- start(ranges(ranges))[i]
end <- end(ranges(ranges))[i]
for(dataset in names(data)){
if(is.null(dim(data[[dataset]]))){
ndims <- 1
}else{
ndims <- length(dim(data[[dataset]]))
}
idxList <- vector(mode="list", length=ndims)
if(!is.null(samples)){
idxList[[length(idxList) - 1]] <- samples
}
idxList[[length(idxList)]] <- start:end
h5write(data[[dataset]], file, paste( study, chrom, dataset, sep = "/" ), index = idxList)
}
}
return(TRUE)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.