R/prepareTallyFile.R

Defines functions resizeCohort prepareTallyFile

Documented in prepareTallyFile resizeCohort

prepareTallyFile <- function( filename, study, chrom, chromlength, nsamples, maxsamples = nsamples, chunkSize = 50000, sampleChunkSize = nsamples, compressionLevel = 9, referenceFillValue = 5 ){
  if( chromlength < chunkSize ){
    chunkSize <- chromlength
    message(paste("Chromosome length smaller than designated chunkSize, reducing chunkSize to:", chunkSize))
  }
  if( file.exists(filename) ){
    message(paste("File", filename, "exists") )  
  }else{
    h5createFile(filename)
  }
  group <- paste( study, chrom, sep = "/" )
  h5createGroup(filename, study) # create the toplevel group first
  h5createGroup(filename, group)
  h5createDataset(filename, paste(group, "Counts", sep = "/"), dims=c(12,nsamples,2,chromlength), maxdims=c(12,maxsamples,2,chromlength), storage.mode="integer", chunk = c(12,sampleChunkSize,2,chunkSize), level=compressionLevel) #Creating the Counts group 
  h5createDataset(filename, paste(group, "Deletions", sep = "/"), dims=c(nsamples,2,chromlength), maxdims=c(maxsamples,2,chromlength), storage.mode="integer", chunk = c(sampleChunkSize,2,chunkSize), level=compressionLevel) #Creating the Deletions group
  h5createDataset(filename, paste(group, "Insertions", sep = "/"), dims=c(nsamples,2,chromlength), maxdims=c(maxsamples,2,chromlength), storage.mode="integer", chunk = c(sampleChunkSize,2,chunkSize), level=compressionLevel) #Creating the Insertions group
  h5createDataset(filename, paste(group, "Coverages", sep = "/"), dims=c(nsamples,2,chromlength), maxdims=c(maxsamples,2,chromlength), storage.mode="integer", chunk = c(sampleChunkSize,2,chunkSize), level=compressionLevel) #Creating the Coverages group
  h5createDataset(filename, paste(group, "Reference", sep = "/"), dims=c(chromlength), storage.mode="integer", chunk = c(chunkSize), level=compressionLevel, fillValue = referenceFillValue) #Creating the Reference group
  return(TRUE)
}

resizeCohort <- function( filename, study, chrom, newNumberOfSamples, dimmap = .sampleDimMap, force = FALSE ){
  f <- H5Fopen(filename)
  g <- H5Gopen(f, paste(study, chrom, sep = "/"))
  for( dataset in names(dimmap) ){
    if( !is.null(dimmap[[dataset]]) ){
      d <- H5Dopen( g, dataset )
      s <- H5Dget_space(d)
      currentDim <- H5Sget_simple_extent_dims(s)$size
      currentSamples <- currentDim[dimmap[[dataset]]]
      if( currentSamples > newSamples & !force ){
        stop("Trying to reduce the size of the cohort, this will most likely result in data loss. If you are sure about this, re-run resizeCohort with the force=TRUE parameter.")
      }
      currentDim[dimmap[[dataset]]] <- newSamples
      H5Dset_extent(d, currentDim)
      H5Sclose(s)
      H5Dclose(d)
    }
  }
H5Gclose(g)
H5Fclose(f)
}

Try the h5vc package in your browser

Any scripts or data that you put into this service are public.

h5vc documentation built on Nov. 8, 2020, 4:56 p.m.