R/methylDBFunctions.R

Defines functions definition error FUN .checkTabixFileExists

# Helper Functions ---------------------------------------------------

#' @noRd
## function checks wether a tabix file already exists and 
## appends number if file already exists  
.checkTabixFileExists <- function(tabixfile) {
  message("\nchecking wether tabix file already exists:")
  tabixfile <- paste0(tabixfile,".bgz")
  message(tabixfile)
  if(file.exists(tabixfile) ) {
    message("tabix file already exists, renaming output file:")
    i = 1
    filename2 = tabixfile
    while(file.exists(filename2) ) {
      filename2 = gsub(".txt.bgz",paste0("_",i,".txt.bgz"),tabixfile)
      i = i + 1
    }
    message(filename2)
    tabixfile <- filename2
    message(paste("HINT: consider using 'suffix' argument to write",
                  "different function calls to different files"))
  } else message("tabix file is new.")
  tabixfile <- gsub(".bgz","",tabixfile)
  message("continuing now ...\n")
    
  return(tabixfile)
  
}




# MethylRawDB and MethylRawListDB -----------------------------------------

# filterByCoverage --------------------------------------------------------

 
#' @aliases filterByCoverage,methylRawDB-method
#' @rdname filterByCoverage-methods
setMethod("filterByCoverage", signature(methylObj="methylRawDB"),
          function(methylObj,lo.count,lo.perc,hi.count,hi.perc,chunk.size,
                   save.db=TRUE,...){
  if( is.null(lo.count) & is.null(lo.perc) & is.null(hi.count) & 
      is.null(hi.perc) ){return(methylObj)}
  
  if(save.db) {
    
    filter <- function(data,lo.count,lo.perc,hi.count,hi.perc) {
      
      .setMethylDBNames(data,"methylRawDB")
      
      #figure out which cut-offs to use, maybe there is more elagent 
      # ways, quick&dirty works for now
      if(is.numeric(lo.count) ){lo.count=lo.count}
      if(is.numeric(lo.perc)){lo.count=quantile(data$coverage,lo.perc/100)}
      if(is.numeric(hi.count)){hi.count=hi.count}
      if(is.numeric(hi.perc)){hi.count=quantile(data$coverage,hi.perc/100)}
      
      if(is.numeric(lo.count)){data=data[data$coverage>=lo.count,]}
      if(is.numeric(hi.count)){data=data[data$coverage<hi.count,]}
      
      return(data)
      
    }
    
    # catch additional args 
    args <- list(...)
    dir <- dirname(methylObj@dbpath)
    
    if( "dbdir" %in% names(args) ){
      if( !(is.null(args$dbdir)) ){
        dir <- .check.dbdir(args$dbdir) 
      }
    } 
    
    if(!( "suffix" %in% names(args) ) ){
      suffix <- paste0("_","filtered")
    } else { 
      suffix <- paste0("_",args$suffix)
    }
    
    # filename <- paste0(paste(methylObj@sample.id,collapse = "_"),suffix,".txt")
    # tabixfile
    filename <- paste0(gsub(".txt.bgz",replacement = "",methylObj@dbpath),
    suffix,".txt")
    
    filename <- .checkTabixFileExists(filename)
    
    filename <- basename(filename)
    
    ## creating the tabix header
    slotList <- list(dbtype = "tabix",
                     sample.id = methylObj@sample.id,
                     assembly = methylObj@assembly, context = methylObj@context,
                     resolution = methylObj@resolution)
    
    tabixHead <- makeTabixHeader(slotList)
    tabixHeadString <- .formatTabixHeader(class = "methylRawDB",
                                          tabixHead = tabixHead)
    
    newdbpath <- applyTbxByChunk(methylObj@dbpath,chunk.size = chunk.size, 
                                 dir=dir,filename = filename, 
                                 return.type = "tabix", FUN = filter, 
                                 lo.count=lo.count, lo.perc=lo.perc, 
                                 hi.count=hi.count, hi.perc=hi.perc,
                                 tabixHead = tabixHeadString)
    
    readMethylRawDB(dbpath = newdbpath,dbtype = "tabix",
                    sample.id = methylObj@sample.id,
                    assembly = methylObj@assembly, context = methylObj@context,
                    resolution = methylObj@resolution)
    
  } else {
    
    methylObj <- methylObj[]
    #print(class(methylObj))
    filterByCoverage(methylObj,lo.count,lo.perc,hi.count,hi.perc,
                     chunk.size,save.db=FALSE,...)
    
  }
            
})

#' @aliases filterByCoverage,methylRawListDB-method
#' @rdname filterByCoverage-methods
setMethod("filterByCoverage", signature(methylObj="methylRawListDB"),
          function(methylObj,lo.count,lo.perc,hi.count,hi.perc,chunk.size,
                   save.db=TRUE,...){
            
    if(save.db){
      args <- list(...)
      
      if( !( "dbdir" %in% names(args)) ){
        dbdir <- NULL
      } else { dbdir <- basename(.check.dbdir(args$dbdir)) }
      
      new.list=lapply(methylObj,filterByCoverage,lo.count,lo.perc,hi.count,
                      hi.perc,chunk.size,save.db,dbdir=dbdir,...)
      new("methylRawListDB", new.list,treatment=methylObj@treatment)
      
    } else {
      new.list=lapply(methylObj,filterByCoverage,lo.count,lo.perc,hi.count,
                      hi.perc,chunk.size,save.db=FALSE,...)
      new("methylRawList", new.list,treatment=methylObj@treatment)
    }
    
})


# getCoverageStats --------------------------------------------------------

#' @rdname getCoverageStats-methods
#' @aliases getCoverageStats,methylRawDB-method
setMethod("getCoverageStats", "methylRawDB",
          function(object,plot,both.strands,labels,...,chunk.size){
            
            tmp = applyTbxByChunk(object@dbpath,chunk.size = chunk.size,
                                  return.type = "data.table", 
                                  FUN = function(x) { 
                                    .setMethylDBNames(x,"methylRawDB");
                                    return(x[,c("strand","coverage"),with=FALSE])} )
            
            if(!plot){
              qts=seq(0,0.9,0.1) # get quantiles
              qts=c(qts,0.95,0.99,0.995,0.999,1)                          
              
              if(both.strands){
                
                
                plus.cov=tmp[strand=="+",coverage]
                mnus.cov=tmp[strand=="-",coverage]
                
                cat("read coverage statistics per base\n\n")
                cat("FORWARD STRAND:\n")
                cat("summary:\n")
                print( summary( plus.cov ) )
                cat("percentiles:\n")
                print(quantile( plus.cov,p=qts ))
                cat("\n\n")
                cat("REVERSE STRAND:\n")
                cat("summary:\n")
                print( summary( mnus.cov ) )
                cat("percentiles:\n")
                print(quantile( mnus.cov,p=qts ))
                cat("\n")                          
              }else{
                
                all.cov=tmp[,coverage]
                
                cat("read coverage statistics per base\n")
                cat("summary:\n")
                print( summary( all.cov ) )
                cat("percentiles:\n")
                print(quantile( all.cov,p=qts ))
                cat("\n")
              }
              
            }else{
              if(both.strands){   
                plus.cov=tmp[strand=="+",coverage]
                mnus.cov=tmp[strand=="-",coverage]
                
                par(mfrow=c(1,2))
                if(labels){
                  a=hist(log10(plus.cov),plot=FALSE)
                  my.labs=as.character(round(100*a$counts/length(plus.cov),1))
                }else{my.labs=FALSE}
                
                hist(log10(plus.cov),col="chartreuse4",
                     xlab=paste("log10 of read coverage per",object@resolution),
                     main=paste("Histogram of", object@context, 
                                "coverage: Forward strand"),
                     labels=my.labs,...)
                mtext(object@sample.id, side = 3)
                
                if(labels){
                  a=hist(log10(mnus.cov),plot=FALSE)
                  my.labs=as.character(round(100*a$counts/length(mnus.cov),1))
                }else{my.labs=FALSE}
                a=hist(log10(mnus.cov),plot=FALSE)
                hist(log10(mnus.cov),col="chartreuse4",
                     xlab=paste("log10 of read coverage per",object@resolution),
                     main=paste("Histogram of", object@context, 
                                "coverage: Reverse strand"),
                     labels=my.labs,...)
                mtext(object@sample.id, side = 3)
                
              }else{
                all.cov=tmp[,coverage]
                if(labels){
                  a=hist(log10(all.cov),plot=FALSE)
                  my.labs=as.character(round(100*a$counts/length(all.cov),1))
                }else{my.labs=FALSE}                          
                
                hist(log10(all.cov),col="chartreuse4",
                     xlab=paste("log10 of read coverage per",object@resolution),
                     main=paste("Histogram of", object@context, "coverage"),
                     labels=my.labs,...)
                mtext(object@sample.id, side = 3)
                
              }
            }
          })


# getMethylationStats -----------------------------------------------------

#' @rdname getMethylationStats-methods
#' @aliases getMethylationStats,methylRawDB-method
setMethod("getMethylationStats", "methylRawDB",
          function(object,plot,both.strands,labels,...,chunk.size){
            
            numCs=coverage=strand=.=NULL
            tmp = applyTbxByChunk(object@dbpath,chunk.size = chunk.size,
                                  return.type = "data.table", 
                                  FUN = function(x) { 
                                    .setMethylDBNames(x,"methylRawDB"); 
                                    return(x[,.(strand,coverage,numCs)])} )
            
            
            plus.met=100* tmp[strand=="+",numCs/coverage]
            mnus.met=100* tmp[strand=="-",numCs/coverage]
            all.met =100* tmp[,numCs/coverage]
            
            if(!plot){
              qts=seq(0,0.9,0.1) # get quantiles
              qts=c(qts,0.95,0.99,0.995,0.999,1)                          
              
              if(both.strands){       
                
                cat("methylation statistics per base\n\n")
                cat("FORWARD STRAND:\n")
                cat("summary:\n")
                print( summary( plus.met ) )
                cat("percentiles:\n")
                print(quantile( plus.met,p=qts ))
                cat("\n\n")
                cat("REVERSE STRAND:\n")
                cat("summary:\n")
                print( summary( mnus.met ) )
                cat("percentiles:\n")
                print(quantile( mnus.met,p=qts ))
                cat("\n")                          
              }else{
                
                
                cat("methylation statistics per base\n")
                cat("summary:\n")
                print( summary( all.met ) )
                cat("percentiles:\n")
                print(quantile( all.met,p=qts ))
                cat("\n")
              }
              
            }else{
              if(both.strands){   
                
                par(mfrow=c(1,2))
                if(labels){
                  a=hist((plus.met),plot=FALSE,...)
                  my.labs=as.character(round(100*a$counts/length(plus.met),1))
                }else{my.labs=FALSE}
                hist((plus.met),col="cornflowerblue",
                     xlab=paste("% methylation per",object@resolution),
                     main=paste("Histogram of %", object@context,
                                "methylation: Forward strand"),
                     labels=my.labs,...)
                mtext(object@sample.id, side = 3)
                
                if(labels){                          
                  a=hist((mnus.met),plot=FALSE,...)
                  my.labs=as.character(round(100*a$counts/length(mnus.met),1))
                }
                else{my.labs=FALSE}
                
                hist((mnus.met),col="cornflowerblue",
                     xlab=paste("% methylation per",object@resolution),
                     main=paste("Histogram of %", object@context,
                                "methylation: Reverse strand"),
                     labels=my.labs,...)
                mtext(object@sample.id, side = 3)
                
              }else{
                if(labels){                          
                  
                  a=hist((all.met),plot=FALSE,...)
                  my.labs=as.character(round(100*a$counts/length(all.met),1))
                }else{my.labs=FALSE}
                hist((all.met),col="cornflowerblue",
                     xlab=paste("% methylation per",object@resolution),
                     main=paste("Histogram of %", object@context,"methylation"),
                     labels=my.labs,...)
                mtext(object@sample.id, side = 3)
                
              }
            }
          })


# adjustMethylC -----------------------------------------------------------

#' @rdname adjustMethylC
#' @aliases adjustMethylC,methylRawDB,methylRawDB-method
setMethod("adjustMethylC", c("methylRawDB","methylRawDB"),
          function(mc,hmc,save.db=TRUE,...,chunk.size){
  
  if(save.db) {
    
    lst=new("methylRawListDB",list(mc,hmc),treatment=c(1,0))
    base=unite(lst)
    
    adjust <- function(data) {
      
      .setMethylDBNames(data,"methylBaseDB")
      diff=(data$numCs1)-round(data$coverage1*(data$numCs2/data$coverage2))
      diff[diff<0]=0
      data$numCs1=diff
      data$numTs1=data$coverage1-data$numCs1
      return(data[1:7])
      
    }
    
    # catch additional args 
    args <- list(...)
    dir <- dirname(mc@dbpath)
    
    if( "dbdir" %in% names(args) ){
      if( !(is.null(args$dbdir)) ){
        dir <- .check.dbdir(args$dbdir) 
      }
    } 
    
    if(!( "suffix" %in% names(args) ) ){
      suffix <- paste0("_","adjusted")
    } else { 
      suffix <- paste0("_",args$suffix)
    }
    
    
    # filename <- paste0(paste(mc@sample.id,collapse = "_"),suffix,".txt")
    filename <- paste0(gsub(".txt.bgz",replacement = "",mc@dbpath),suffix,".txt")
    
    filename <- .checkTabixFileExists(filename)
    
    filename <- basename(filename)
    
    ## creating the tabix header
    slotList <- list(sample.id=mc@sample.id,
                     assembly=mc@assembly, context =mc@context,
                     resolution=mc@resolution,
                     dbtype = mc@dbtype)
    
    tabixHead <- makeTabixHeader(slotList)
    tabixHeadString <- .formatTabixHeader(class = "methylRawDB",
                                          tabixHead = tabixHead)
    
    newdbpath <- applyTbxByChunk(base@dbpath,chunk.size = chunk.size, 
                                 dir=dir,filename = filename, 
                                 return.type = "tabix", FUN = adjust,
                                 tabixHead = tabixHeadString)
    
    unlink(list.files(dirname(base@dbpath),
                      pattern = basename(gsub(".txt.bgz","",base@dbpath)),
                      full.names = TRUE))
    
    readMethylRawDB(dbpath = newdbpath,sample.id=mc@sample.id,
                    assembly=mc@assembly, context =mc@context,
                    resolution=mc@resolution,
                    dbtype = mc@dbtype)
    
  } else {
    
    mc.tmp <- mc[]
    hmc.tmp <- hmc[]
    adjustMethylC(mc.tmp,hmc.tmp,save.db=FALSE,...)
    
    
  }
  
  
})


#' @rdname adjustMethylC
#' @aliases adjustMethylC,methylRawListDB,methylRawListDB-method
setMethod("adjustMethylC", c("methylRawListDB","methylRawListDB"),
          function(mc,hmc,save.db=TRUE,...,chunk.size){
  
  # check lengths equal if not give error
  if(length(mc) != length(hmc)){stop("lengths of methylRawList objects should be same\n")}
  
  if(save.db){
    
    args <- list(...)
    
    if( !( "dbdir" %in% names(args)) ){
      dbdir <- NULL
    } else { dbdir <- basename(.check.dbdir(args$dbdir)) }
    
    my.list=list()
    for(i in 1:length(mc)){
      my.list[[i]]=adjustMethylC(mc[[i]],hmc[[i]],save.db,
                                  dbdir=dbdir,...,chunk.size)
    }
    new("methylRawListDB",my.list,treatment=mc@treatment )
    
  } else {
    
    my.list=list()
    for(i in 1:length(mc)){
      my.list[[i]]=adjustMethylC(mc[[i]],hmc[[i]],save.db=FALSE,...)
    }
    new("methylRawList",my.list,treatment=mc@treatment )
    
  }
  
})

# normalizeCoverage -------------------------------------------------------


#' @rdname normalizeCoverage-methods
#' @aliases normalizeCoverage,methylRawListDB-method
setMethod("normalizeCoverage", "methylRawListDB",
          function(obj,method,chunk.size,save.db=TRUE,...){
            
            if( !(method %in% c("median","mean") ) )
            {
              stop("method option should be either 'mean' or 'median'\n")
            }
            
            if(save.db) { 
              
              normCov <- function(data,method) {
                
                .setMethylDBNames(data)
                if(method=="median"){
                  x=median(data$coverage)
                }else {
                  x=mean(data$coverage)
                }
                
                sc.fac=max(x)/x #get scaling factor
                
                all.cov=data$coverage
                fCs    =data$numCs/all.cov
                fTs    =data$numT/all.cov
                data$coverage=round(sc.fac*data$coverage)
                data$numCs   =round(data$coverage*fCs)
                data$numTs   =round(data$coverage*fTs)
                
                return(data)
              }
              
              # catch additional args 
              args <- list(...)
              
              if( ( "dbdir" %in% names(args))   ){
                #                 if(!(is.null(args$dbdir))) {
                dir <- .check.dbdir(args$dbdir)
                #                 }
              } else { 
                dir <- dirname(obj[[1]]@dbpath)
              }
              if(!( "suffix" %in% names(args) ) ){
                suffix <- paste0("_","normed")
              } else { 
                suffix <- paste0("_",args$suffix)
              }
              
              outList <- list()
              
              for(i in 1:length(obj)){
                
                # filename <- paste0(paste(obj[[i]]@sample.id,collapse = "_"),
                #                    suffix,".txt")
                filename <- paste0(gsub(".txt.bgz",replacement = "",
                obj[[i]]@dbpath),suffix,".txt")
                
                filename <- .checkTabixFileExists(filename)
                
                filename <- basename(filename)
                
                ## creating the tabix header
                slotList <- list(sample.id=obj[[i]]@sample.id,
                                 assembly=obj[[i]]@assembly, 
                                 context =obj[[i]]@context,
                                 resolution=obj[[i]]@resolution,
                                 dbtype = obj[[i]]@dbtype)
                
                tabixHead <- makeTabixHeader(slotList)
                tabixHeadString <- .formatTabixHeader(class = "methylRawDB",
                                                      tabixHead = tabixHead)
                
                newdbpath <- applyTbxByChunk(obj[[i]]@dbpath,
                                             chunk.size = chunk.size, 
                                             dir=dir,filename = filename, 
                                             return.type = "tabix", 
                                             FUN = normCov,method = method,
                                             tabixHead = tabixHeadString)
                
                outList[[i]] <- readMethylRawDB(dbpath = newdbpath,
                                                sample.id=obj[[i]]@sample.id,
                                                assembly=obj[[i]]@assembly, 
                                                context =obj[[i]]@context,
                                                resolution=obj[[i]]@resolution,
                                                dbtype = obj[[i]]@dbtype)
                
              }
              
              new("methylRawListDB",outList,treatment=obj@treatment)
              
            } else {
              
              # coerce methylRawDB to methylRaw
              tmp.list <- lapply(obj,function(x) x[])
              tmp <- new("methylRawList",tmp.list,treatment=obj@treatment)
              normalizeCoverage(tmp,method,save.db=FALSE)
            }
})


# reorganize --------------------------------------------------------------


#' @rdname reorganize-methods
#' @aliases reorganize,methylRawListDB-method
setMethod("reorganize", signature(methylObj="methylRawListDB"),
          function(methylObj,sample.ids,treatment,chunk.size,save.db=TRUE,...){
            
  #sample.ids length and treatment length should be equal
  if(length(sample.ids) != length(treatment) ){
    stop("length of sample.ids should be equal to treatment")
  }
  
  # get ids from the list of methylRaw 
  orig.ids=sapply(methylObj,function(x) x@sample.id) 
  if( ! all(sample.ids %in% orig.ids) ){
    stop("provided sample.ids is not a subset of the sample ids of the object")
  }
  # get the column order in the original matrix
  col.ord=order(match(orig.ids,sample.ids))[1:length(sample.ids)] 
  
  if(save.db) {
    
    # catch additional args 
    args <- list(...)
    
    outList=list()
    # do not rename per default
    suffix <- NULL
    
    if( ("dbdir" %in% names(args)) | ( "suffix" %in% names(args) ) ) {
      if( "suffix" %in% names(args) ) { suffix <- paste0("_",args$suffix) }
      if( ( "dbdir" %in% names(args)) ){ 
        dir <- .check.dbdir(args$dbdir) 
        
        for(i in 1:length(sample.ids)){
          obj <- methylObj[[ col.ord[i]  ]]
          # filename <- paste0(dir,"/",paste(obj@sample.id,collapse = "_"),
                             # suffix,".txt.bgz")
          filename <- paste0(dir,"/",gsub(".txt.bgz",replacement =
          "",obj@dbpath),suffix,".txt.bgz")
          
          filename <- .checkTabixFileExists(filename)
          
          filename <- basename(filename)
          
          file.copy(obj@dbpath,filename)
          
          outList[[i]]=readMethylRawDB(dbpath = filename,
                                       dbtype = obj@dbtype,
                                       sample.id = obj@sample.id,
                                       assembly = obj@assembly, 
                                       context = obj@context, 
                                       resolution = obj@resolution)
        }
      } else {
        
        for(i in 1:length(sample.ids)){
          obj <- methylObj[[ col.ord[i]  ]]
          # filename <- paste0(dirname(obj@dbpath),paste(obj@sample.id,collapse = "_")
                             # ,suffix,".txt.bgz")
          filename <- paste0(gsub(".txt.bgz",replacement = "",obj@dbpath),
                             suffix,".txt.bgz")
          
          filename <- .checkTabixFileExists(filename)
          
          filename <- basename(filename)
          
          file.copy(obj@dbpath,filename)
          
          outList[[i]]=readMethylRawDB(dbpath = filename,
                                       dbtype = obj@dbtype,
                                       sample.id = obj@sample.id,
                                       assembly = obj@assembly, 
                                       context = obj@context, 
                                       resolution = obj@resolution)
        }
      }

    } else {
      
      for(i in 1:length(sample.ids)){
        outList[[i]]=methylObj[[ col.ord[i]  ]]
      }
    }
    
    new("methylRawListDB",outList,treatment=treatment)
    
  } else {
    
    outList=list()    
    for(i in 1:length(sample.ids)){
      outList[[i]]=methylObj[[ col.ord[i]  ]][]
      
    }
    
    new("methylRawList",outList,treatment=treatment)
  }
  
})


# MethylBaseDB ------------------------------------------------------------

# unite -------------------------------------------------------------------


unite.methylRawListDB <- function(object,destrand=FALSE,min.per.group=NULL,
                                  chunk.size=1e6,mc.cores=1,save.db=TRUE,...){
  
  if(save.db) {
    
    #check if assemblies,contexts and resolutions are same type NOT IMPLEMENTED   
    if( length(unique(vapply(object,function(x) x@context,FUN.VALUE="character"))) > 1)
    {
      stop("supplied methylRawList object have different methylation contexts:not all methylation events from the same bases")
    }
    if( length(unique(vapply(object,function(x) x@assembly,FUN.VALUE="character"))) > 1)
    {
      stop("supplied methylRawList object have different genome assemblies")
    }                     
    if( length(unique(vapply(object,function(x) x@resolution,FUN.VALUE="character"))) > 1)
    {
      stop("supplied methylRawList object have different methylation resolutions:some base-pair some regional")
    } 
    if( (!is.null(min.per.group)) &  ( ! is.integer( min.per.group ) )  )
    {
      stop("min.per.group should be an integer\ntry providing integers as 1L, 2L,3L etc.\n")
    }
    
    if( any(min.per.group > min(table(object@treatment)))  )
    {
      stop("min.per.group can not be higher than number of samples in smallest group\n")
    }
    
    if(Sys.info()['sysname']=="Windows") {mc.cores = 1}
    # destrand single objects contained in methylRawListDB
    if(destrand) { 
      
      message("destranding...")
      
      destrandFun <- function(obj){
        
        if(obj@resolution == "base") {
          dir <- dirname(obj@dbpath)
          filename <- paste(gsub(".txt.bgz","",obj@dbpath),
                            "destrand.txt",sep="_")
          
          # filename <- .checkTabixFileExists(filename)
          
          filename <- basename(filename)
          
          ## creating the tabix header
          slotList <- list(dbtype = "tabix",
                           sample.id = obj@sample.id,
                           assembly = obj@assembly, 
                           context = obj@context,
                           resolution = obj@resolution)
          
          tabixHead <- makeTabixHeader(slotList)
          tabixHeadString <- .formatTabixHeader(class = "methylRawDB",
                                                tabixHead = tabixHead)
          
          # need to use .CpG.dinuc.unifyOld because output needs to be ordered
          newdbpath <- applyTbxByChunk(obj@dbpath,
                                       chunk.size = chunk.size, 
                                       dir=dir,filename = filename,
                                       return.type = "tabix", 
                                       FUN = function(x) { 
                                         .CpG.dinuc.unify(.setMethylDBNames(x,
                                                                               "methylRawDB") )}, 
                                       tabixHead = tabixHeadString)
          
          readMethylRawDB(dbpath = newdbpath,dbtype = "tabix",
                          sample.id = obj@sample.id,
                          assembly = obj@assembly, context = obj@context,
                          resolution = obj@resolution)
          
        }else {obj}
        
      }
      new.list=suppressMessages(lapply(object,destrandFun))
      object <- new("methylRawListDB", new.list,treatment=object@treatment)
      
      on.exit(unlink(c(getDBPath(object),paste0(getDBPath(object),".tbi"))),add = TRUE)
    }
    #merge raw methylation calls together
    
    message("uniting...")
    
    objList <- getDBPath(object)
    
    args <- list(...)
    #print(args)
    if( ( "dbdir" %in% names(args)) )
    { 
      dir <- .check.dbdir(args$dbdir) 
    } else {
      dir <- dirname(object[[1]]@dbpath)
    }
    # if(!( "dbtype" %in% names(args) ) ){
    #   dbtype <- "tabix"
    # } else { dbtype <- args$dbtype }
    if(!( "suffix" %in% names(args) ) ){
      suffix <- NULL
    } else {
      suffix <- args$suffix
      suffix <- paste0("_",suffix)
    }
    
    # filename will be "methylBase" concatenated with either 13-char random string or suffix
    filename <- paste0( dir, "/", ifelse(is.null(suffix),
                               yes = tempfile(pattern = "methylBase_",tmpdir = "."),
                               no = paste0("methylBase",suffix)),
                        ".txt")
    
    filename <- .checkTabixFileExists(filename)
    # remove the "./" that tempfile prepends
    filename <- basename(filename)
    
    # filename <- tempfile(pattern = "methylBase",tmpdir = ".",fileext = ".txt")
    #filename <- paste0(paste(getSampleID(object),collapse = "_"),".txt")
    
    # get indices of coverage in the data frame 
    coverage.ind=seq(5,by=3,length.out=length(object))
    numCs.ind   =coverage.ind+1
    numTs.ind   =coverage.ind+2
    
    slotList <- list(dbtype = object[[1]]@dbtype,
                     sample.ids = getSampleID(object),assembly = object[[1]]@assembly,
                     context = object[[1]]@context,resolution = object[[1]]@resolution,
                     coverage.index=coverage.ind,numCs.index=numCs.ind,numTs.index=numTs.ind,
                     treatment = object@treatment,destranded = destrand)
    
    tabixHead <- makeTabixHeader(slotList)
    tabixHeadString <- .formatTabixHeader(class = "methylBaseDB",
                                          tabixHead = tabixHead)
    
    if(is.null(min.per.group)) {
      
      message("merging tabix files ...")
      
      dbpath <- mergeTabix(tabixList = objList ,dir = dir,
                           filename = filename,mc.cores = mc.cores,
                           tabixHead = tabixHeadString) 
      dbpath <- gsub(".tbi","",dbpath)
      
    } else {
      # if the the min.per.group argument is supplied, remove the rows
      # that doesn't have enough coverage
      
      message("merging tabix files ...")
      
      # keep rows with no matching in all samples  
      tmpPath <- mergeTabix(tabixList = objList ,dir = dir,
                            filename = paste0(filename,".tmp"),
                            mc.cores = mc.cores,all=TRUE,
                            tabixHead = tabixHeadString) 
      tmpPath <- gsub(".tbi","",tmpPath)
      
      # get indices of coverage in the data frame 
      coverage.ind=seq(5,by=3,length.out=length(object))
      
      filter <- function(df, coverage.ind, treatment,min.per.group){
        
        df <- as.data.table(df)
        for(i in unique(treatment) ){
          
          my.ind=coverage.ind[treatment==i]
          ldat = !is.na(df[,my.ind,with=FALSE])
          if(  is.null(dim(ldat))  ){  # if there is only one dimension
            df=df[ldat>=min.per.group,]
          }else{
            df=df[rowSums(ldat)>=min.per.group,]
          }
        }
        return(as.data.frame(df))
      }
      
      message("filter mincov per group ...")
      
      dbpath <- applyTbxByChunk(tbxFile = tmpPath, chunk.size = chunk.size, 
                                dir = dir, filename = filename, 
                                return.type = "tabix", FUN = filter, 
                                treatment=object@treatment, coverage.ind=coverage.ind,
                                min.per.group=min.per.group, tabixHead = tabixHeadString)
      
      unlink(list.files(dirname(tmpPath),pattern = basename(gsub(".txt.bgz","",tmpPath)),full.names = TRUE))
    }
    obj <- tryCatch(expr = { 
      readMethylBaseDB(dbpath = dbpath,dbtype = object[[1]]@dbtype,
                       sample.ids = getSampleID(object),assembly = object[[1]]@assembly,
                       context = object[[1]]@context,resolution = object[[1]]@resolution,
                       treatment = object@treatment,destranded = destrand)
    },
      error = function(e) {
        stop(sprintf("no %s were united. try adjusting 'min.per.group'.",
                     object[[1]]@resolution))
      }
    )
    
    message(paste0("flatfile located at: ",obj@dbpath))
    
    obj
    
  } else {
    obj <- object
    class(obj) <- "methylRawList"
    unite(obj,destrand,min.per.group,chunk.size,mc.cores,save.db=FALSE,...)
  }
}

#' @rdname unite-methods
#' @aliases unite,methylRawListDB-method
setMethod("unite", "methylRawListDB",unite.methylRawListDB) 


# getCorrelation ----------------------------------------------------------


#' @rdname getCorrelation-methods
#' @aliases getCorrelation,methylBaseDB-method
setMethod("getCorrelation", "methylBaseDB",
          function(object,method,plot,nrow=2e6){
            
  if(is.null(nrow)){ 
    
    meth.fun <- function(data, numCs.index, numTs.index){
      
      data[, numCs.index]/( data[,numCs.index] + data[,numTs.index] )
    }
    meth.mat = applyTbxByChunk(object@dbpath,return.type = "data.frame",
                               FUN = meth.fun, numCs.index = object@numCs.index,
                               numTs.index = object@numTs.index)
  }else{
    data = headTabix(object@dbpath,nrow=nrow,return.type = "data.frame")
    meth.mat = data[, object@numCs.index]/( data[,object@numCs.index] + 
                                              data[,object@numTs.index] )
  }
  
  names(meth.mat)=object@sample.ids
  
  method <- match.arg(method)
  print( cor(meth.mat,method=method) )
  
  if(plot) {
    .plotCorrelation(meth.mat = meth.mat,
                     title = paste(object@context, object@resolution ,method,"cor."),
                     method = method)
  }
}  
)


# reconstruct -------------------------------------------------------------

#' @rdname reconstruct-methods
#' @aliases reconstruct,methylBaseDB-method
setMethod("reconstruct",signature(mBase="methylBaseDB"), 
          function(methMat,mBase,chunk.size,save.db=TRUE,...){
  
  if(save.db){
    
    if(is.matrix(methMat) ) {
      
      # check if indeed methMat is percent methylation matrix
      if(min(methMat)<0 | max(methMat)>100 ){
        stop("\nmake sure 'methMat' is percent methylation matrix ",
                "(values between 0-100) \n")
      }
      
      # check if methMat is percent methylation matrix fitting to mBase  
      if(nrow(methMat) != mBase@num.records | 
         ncol(methMat) != length(mBase@numCs.index) ){
        stop("\nmethMat dimensions do not match number of samples\n",
             "and number of bases in methylBase object\n")
      }
      
      rndFile=paste(sample(c(0:9, letters, LETTERS),9, replace=TRUE),collapse="")
      matFile=paste(rndFile,"methMat.txt",sep="_")
      .write.bed(methMat,matFile,quote=FALSE,col.names=FALSE,row.names=FALSE,
                  sep="\t")
      methMat = matFile
      
    } else {
      
      # methMat can also be a file containing a percent methylation matrix
      mat <- fread(methMat,header = FALSE)
      
      # check if indeed methMat is percent methylation matrix
      if(min(methMat)<0 | max(methMat)>100 ){
        stop("\nmake sure 'methMat' is percent methylation matrix ",
                "(values between 0-100) \n")
      }
      
      # check if methMat is percent methylation matrix fitting to mBase  
      if(nrow(mat) != mBase@num.records | ncol(mat) != length(mBase@numCs.index) ){
        stop("\nmethMat dimensions do not match number of samples\n",
             "and number of bases in methylBase object\n")
      }
      rm(mat)
    }
    
    reconstr <- function(data, methMat, chunk, numCs.index, numTs.index) {
      
      mat=data[,numCs.index]+data[,numTs.index]
      methMat = read.table(methMat,header = FALSE, nrows = chunk, sep="\t")
      
      # get new unmethylated and methylated counts
      numCs=round(methMat*mat/100)
      numTs=round((100-methMat)*mat/100)
      
      data[,numCs.index]=numCs
      data[,numTs.index]=numTs
      
      return(data)
    }
    
    # catch additional args 
    args <- list(...)
    
    if( !( "dbdir" %in% names(args)) ){
      dir <- dirname(mBase@dbpath)
    } else { dir <- .check.dbdir(args$dbdir) }
    if(!( "suffix" %in% names(args) ) ){
      suffix <- "_reconstructed"
    } else { 
      suffix <- paste0("_",args$suffix)
    }
    
    # filename <- paste0(paste(mBase@sample.ids,collapse = "_"),suffix,".txt")
    filename <- paste0(gsub(".txt.bgz","",mBase@dbpath),suffix,".txt")
    
    filename <- .checkTabixFileExists(filename)
    
    filename <- basename(filename)
    
    con <- file(methMat,open = "r") 
    
    ## creating the tabix header
    slotList <- list(dbtype = mBase@dbtype,
                     sample.ids = mBase@sample.ids,assembly = mBase@assembly,
                     context = mBase@context,resolution = mBase@resolution,
                     treatment = mBase@treatment,destranded = mBase@destranded,
                     coverage.index = mBase@coverage.index,
                     numCs.index = mBase@numCs.index,
                     numTs.index = mBase@numTs.index)
    
    tabixHead <- makeTabixHeader(slotList)
    tabixHeadString <- .formatTabixHeader(class = "methylBaseDB",
                                          tabixHead = tabixHead)
    
    newdbpath <- applyTbxByChunk(tbxFile = mBase@dbpath,chunk.size = chunk.size, 
                                 dir=dir,filename = filename, 
                                 return.type = "tabix", FUN = reconstr, 
                                 con,chunk.size,mBase@numCs.index,
                                 mBase@numTs.index, tabixHead = tabixHeadString)
    
    close(con)
    if(file.exists(matFile)) {unlink(matFile)}
    
    readMethylBaseDB(dbpath = newdbpath,dbtype = mBase@dbtype,
                     sample.ids = mBase@sample.ids,assembly = mBase@assembly,
                     context = mBase@context,resolution = mBase@resolution,
                     treatment = mBase@treatment,destranded = mBase@destranded)
  } else {
    
    tmp <- mBase[]
    reconstruct(methMat,tmp,save.db=FALSE)
    
  }
}
)


# removeComp --------------------------------------------------------------


#' @rdname removeComp-methods
#' @aliases removeComp,methylBaseDB-method
setMethod("removeComp",signature(mBase="methylBaseDB"), 
          function(mBase,comp,chunk.size,save.db=TRUE,...){
  if(anyNA(comp) || is.null(comp)){
    stop("no component to remove\n")
  }
  
  if(any(comp > length(mBase@sample.ids) )){
    stop("'comp' elements can only take values between 1 and number of samples\n")
  }
  
  scale=TRUE
  center=TRUE
  mat=percMethylation(mBase,chunk.size=chunk.size)  
  mat=scale(mat,scale=scale,center=center)
  centers <- attr(mat,'scaled:center') 
  scales <- attr(mat,'scaled:scale') 
  pr=prcomp(mat,scale.=FALSE,center=FALSE)
  
  pr$rotation[,comp]=0
  res=pr$x %*% t(pr$rotation)
  
  res=(scale(res,center=(-centers/scales),scale=1/scales))
  attr(res,"scaled:center")<-NULL 
  attr(res,"scaled:scale")<-NULL 
  res[res>100]=100
  res[res<0]=0
  reconstruct(res,mBase,chunk.size,save.db = save.db,...=...)
}
)


# percMethylation ---------------------------------------------------------


#' @rdname percMethylation-methods
#' @aliases percMethylation,methylBaseDB-method
setMethod("percMethylation", "methylBaseDB",
          function(methylBase.obj,
                   rowids = FALSE,
                   save.txt = FALSE,
                   chunk.size = 1e6) {
            meth.fun <- function(data, numCs.index, numTs.index, rowids) {
              dat = 100 * data[, numCs.index] / (data[, numCs.index] +
                                                   data[, numTs.index])
              if(rowids) {
                dat = cbind(pos = 
                              paste(as.character(data[, 1]),
                                      data[, 2], data[, 3], 
                                    sep = "."),
                            dat)
                
              }
              return(dat)
              
            }
            if (save.txt) {
              
              filename <- paste0(basename(gsub(".txt.bgz","",
                                      methylBase.obj@dbpath)),"_methMath.txt")
              
              textHeader = methylBase.obj@sample.ids
              if(rowids) textHeader = c("pos",textHeader)
              
              outfile = applyTbxByChunk(methylBase.obj@dbpath,
                                         return.type = "text",
                                         textHeader = textHeader,
                                         chunk.size = chunk.size,
                                         dir = dirname(methylBase.obj@dbpath), 
                                         filename = filename,
                                         FUN = meth.fun, 
                                         numCs.index = methylBase.obj@numCs.index,
                                         numTs.index = methylBase.obj@numTs.index,
                                         rowids = rowids)
              return(outfile)
              
            } else {
              
              meth.mat = applyTbxByChunk(methylBase.obj@dbpath,
                                         return.type = "data.frame", 
                                         chunk.size = chunk.size,
                                         FUN = meth.fun, 
                                         numCs.index = methylBase.obj@numCs.index,
                                         numTs.index = methylBase.obj@numTs.index,
                                         rowids = rowids)
              if(rowids){
                rownames(meth.mat) = meth.mat$pos 
                meth.mat$pos = NULL
              }
              
              names(meth.mat)=methylBase.obj@sample.ids
              return(as.matrix(meth.mat))
              
            }
})


# clusterSamples ----------------------------------------------------------


#' @rdname clusterSamples-methods
#' @aliases clusterSamples,methylBaseDB-method
setMethod("clusterSamples", "methylBaseDB",
          function(.Object, dist, method ,sd.filter, sd.threshold, 
                   filterByQuantile, plot,chunk.size)
          {
            
            getMethMat <- function(mat,numCs.index,numTs.index,sd.filter, 
                                   sd.threshold, filterByQuantile){
              
              # remove rows containing NA values, they might be 
              # introduced at unite step
              mat      =mat[ rowSums(is.na(mat))==0, ] 
              
              meth.mat = mat[, numCs.index]/
                (mat[,numCs.index] + mat[,numTs.index] )                                      
              
              # if Std. Dev. filter is on remove rows with low variation
              if(sd.filter){
                if(filterByQuantile){
                  sds=rowSds(as.matrix(meth.mat))
                  cutoff=quantile(sds,sd.threshold)
                  meth.mat=meth.mat[sds>cutoff,]
                }else{
                  meth.mat=meth.mat[rowSds(as.matrix(meth.mat))>sd.threshold,]
                }
              }
            }
            
            meth.mat <- applyTbxByChunk(.Object@dbpath,chunk.size = chunk.size,
                                        return.type = "data.frame",
                                        FUN=getMethMat,
                                        numCs.ind=.Object@numCs.index,
                                        numTs.ind=.Object@numTs.index,
                                        sd.filter=sd.filter, 
                                        sd.threshold=sd.threshold, 
                                        filterByQuantile=filterByQuantile)
            
            names(meth.mat)=.Object@sample.ids
            
            .cluster(meth.mat, dist.method=dist, hclust.method=method, 
                     plot=plot, treatment=.Object@treatment,
                     sample.ids=.Object@sample.ids,
                     context=.Object@context)
            
          }
)

# PCASamples --------------------------------------------------------------


#' @rdname PCASamples-methods
#' @aliases PCASamples,methylBaseDB-method
setMethod("PCASamples", "methylBaseDB",
          function(.Object, screeplot, adj.lim,scale,center,comp,
                   transpose,sd.filter, sd.threshold, 
                   filterByQuantile,obj.return,chunk.size)
{
  
  getMethMat <- function(mat,numCs.index,numTs.index,sd.filter, 
                         sd.threshold, filterByQuantile){
    
    # remove rows containing NA values, they might be introduced at unite step
    mat      =mat[ rowSums(is.na(mat))==0, ] 
    
    meth.mat = mat[, numCs.index]/
      (mat[,numCs.index] + mat[,numTs.index] )                                      
    
    
    # if Std. Dev. filter is on remove rows with low variation
    if(sd.filter){
      if(filterByQuantile){
        sds=rowSds(as.matrix(meth.mat))
        cutoff=quantile(sds,sd.threshold)
        meth.mat=meth.mat[sds>cutoff,]
      }else{
        meth.mat=meth.mat[rowSds(as.matrix(meth.mat))>sd.threshold,]
      }
    }
    
  }
  
  meth.mat <- applyTbxByChunk(.Object@dbpath,chunk.size = chunk.size,
                              return.type = "data.frame",FUN=getMethMat,
                              numCs.ind=.Object@numCs.index,
                              numTs.ind=.Object@numTs.index,
                              sd.filter=sd.filter, sd.threshold=sd.threshold, 
                              filterByQuantile=filterByQuantile)
  
  names(meth.mat)=.Object@sample.ids
  
  if(transpose){
    .pcaPlotT(meth.mat,comp1=comp[1],comp2=comp[2],screeplot=screeplot, 
              adj.lim=adj.lim, 
              treatment=.Object@treatment,sample.ids=.Object@sample.ids,
              context=.Object@context
              ,scale=scale,center=center,obj.return=obj.return)
    
  }else{
    .pcaPlot(meth.mat,comp1=comp[1],comp2=comp[2],screeplot=screeplot, 
             adj.lim=adj.lim, 
             treatment=.Object@treatment,sample.ids=.Object@sample.ids,
             context=.Object@context,
             scale=scale,center=center,  obj.return=obj.return)
  }
  
}      
)

# reorganize --------------------------------------------------------------


#' @rdname reorganize-methods
#' @aliases reorganize,methylBaseDB-method
setMethod("reorganize", signature(methylObj="methylBaseDB"),
          function(methylObj,sample.ids,treatment,chunk.size,save.db=TRUE,...){
            
  #sample.ids length and treatment length should be equal
  if(length(sample.ids) != length(treatment) ){
    stop("length of sample.ids should be equal to treatment")
  }
  
  if( ! all(sample.ids %in% methylObj@sample.ids) ){
    stop("provided sample.ids is not a subset of the sample ids of the object")
  }
  
  if(save.db) {
    
    temp.id = methylObj@sample.ids # get the subset of ids
    # get the column order in the original matrix
    col.ord = order(match(temp.id,sample.ids))[1:length(sample.ids)] 
    
    # make a matrix indices for easy access 
    ind.mat=rbind(methylObj@coverage.index[col.ord],  
                  methylObj@numCs.index[col.ord],
                  methylObj@numTs.index[col.ord])
    
    # get indices of coverage,numCs and numTs in the data frame 
    coverage.ind=seq(5,by=3,length.out=length(sample.ids))
    numCs.ind   =coverage.ind+1
    numTs.ind   =coverage.ind+2
    
    
    getSub <- function(data,ind.mat) {
      
      newdat =data[,1:4]
      for(i in 1:ncol(ind.mat))
      {
        newdat=cbind(newdat,data[,ind.mat[,i]])
      }
      return(newdat)
    }
    
    # catch additional args 
    args <- list(...)
    
    if( ( "dbdir" %in% names(args))   ){
       dir <- .check.dbdir(args$dbdir) 
    } else { dir <- dirname(methylObj@dbpath) }
    
    if(!( "suffix" %in% names(args) ) ){
      suffix <- "_reorganized"
    } else { 
      suffix <- paste0("_",args$suffix)
    }
    
    # filename <- paste0(paste(sample.ids,collapse = "_"),suffix,".txt")
    filename <- paste0(gsub(".txt.bgz","",methylObj@dbpath)
                       ,suffix,".txt")
    
    filename <- .checkTabixFileExists(filename)
    
    filename <- basename(filename)
    
    ## creating the tabix header
    slotList <- list(
      dbtype = methylObj@dbtype,
      sample.ids = sample.ids,
      assembly = methylObj@assembly,
      context = methylObj@context,
      treatment = treatment,
      destranded = methylObj@destranded,
      resolution = methylObj@resolution,
      coverage.index = coverage.ind,
      numCs.index = numCs.ind,
      numTs.index = numTs.ind
    )
    
    tabixHead <- makeTabixHeader(slotList)
    tabixHeadString <- .formatTabixHeader(class = "methylBaseDB",
                                          tabixHead = tabixHead)
    
    newdbpath <- applyTbxByChunk(tbxFile = methylObj@dbpath,
                                 chunk.size = chunk.size, dir=dir,
                                 filename = filename, 
                                 return.type = "tabix", FUN = getSub, 
                                 ind.mat=ind.mat,tabixHead = tabixHeadString) 
    
    readMethylBaseDB(dbpath = newdbpath,dbtype = methylObj@dbtype,
                     sample.ids=sample.ids,
                     assembly=methylObj@assembly,context=methylObj@context,
                     treatment=treatment,destranded=methylObj@destranded, 
                     resolution=methylObj@resolution )
  } else {
    
    obj <- methylObj[]
    reorganize(obj,sample.ids,treatment,save.db=FALSE,...)
    
  }
})

# pool --------------------------------------------------------------------


#' @rdname pool-methods
#' @aliases pool,methylBaseDB-method
setMethod("pool", "methylBaseDB",
          function(obj,sample.ids,chunk.size,save.db=TRUE,...){
            
            
  treat = unique(obj@treatment)
  if(length(treat) != length(sample.ids)) {
        stop("Length of sample.ids should be equal to \n",
                 "length of the unique treatment vector")}
  
  if(save.db) {
    
    mypool <- function(df,treatment,numCs.index){
      
      treat=unique(treatment)

      res=df[,1:4]
      for(i in 1:length(treat) ){
        
        # get indices
        setCs=numCs.index[treatment==treat[i]]
        setTs=setCs+1
        set.cov=setCs-1
        
        if(length(setCs)>1){
          Cs=rowSums(df[,setCs],na.rm=TRUE)
          Ts=rowSums(df[,setTs],na.rm=TRUE)
          covs=rowSums(df[,set.cov],na.rm=TRUE)
          
        }else{
          Cs  =df[,setCs]
          Ts  =df[,setTs]
          covs=df[,set.cov]
        }
        res=cbind(res,covs,Cs,Ts) # bind new data
      }
      return(res)
    }
    
    coverage.ind=3*(1:length(treat)) + 2
    
    # catch additional args 
    args <- list(...)
    
    if( ( "dbdir" %in% names(args))   ){
      if( !(is.null(args$dbdir)) ) { 
        dir <- .check.dbdir(args$dbdir) }
    } else { dir <- dirname(obj@dbpath) }
    if(!( "suffix" %in% names(args) ) ){
      suffix <- "_pooled"
    } else { 
      suffix <- paste0("_",args$suffix)
    }
    
    # filename <- paste0(paste(sample.ids,collapse = "_"),suffix,".txt")
    
    filename <- paste0(gsub(".txt.bgz","",obj@dbpath)
                       ,suffix,".txt")
    
    filename <- .checkTabixFileExists(filename)
    
    filename <- basename(filename)
    
    # if(file.exists(paste0(filename,".bgz"))) message("overwriting file.")
    
    # get indices of coverage in the data frame 
    coverage.ind=seq(5,by=3,length.out=length(sample.ids))
    numCs.ind   =coverage.ind+1
    numTs.ind   =coverage.ind+2
    
    ## creating the tabix header
    slotList <- list(dbtype = obj@dbtype,
                     sample.ids=sample.ids,
                     assembly=obj@assembly,context=obj@context,
                     treatment=treat,destranded=obj@destranded,
                     coverage.index=coverage.ind,numCs.index=numCs.ind,
                     numTs.index=numTs.ind,
                     resolution=obj@resolution)
    
    tabixHead <- makeTabixHeader(slotList)
    tabixHeadString <- .formatTabixHeader(class = "methylBaseDB",
                                          tabixHead = tabixHead)
    
    newdbpath <- applyTbxByChunk(tbxFile = obj@dbpath,chunk.size = chunk.size,
                                 dir=dir,filename = filename, 
                                 return.type = "tabix", FUN = mypool, 
                                 treatment = obj@treatment,
                                 numCs.index = obj@numCs.index, 
                                 tabixHead = tabixHeadString) 
    
    objdb <- readMethylBaseDB(dbpath = newdbpath,dbtype = obj@dbtype,
                     sample.ids=sample.ids,
                     assembly=obj@assembly,context=obj@context,
                     treatment=treat,destranded=obj@destranded,
                     resolution=obj@resolution )
    
    message(paste0("flatfile located at: ",getDBPath(objdb)))
    
    objdb
    
  } else {
    
    tmp <- obj[]
    pool(tmp,sample.ids,save.db=FALSE)
  }
})




# MethylDiffDB ------------------------------------------------------------

# calculateDiffMeth -------------------------------------------------------


#' @aliases calculateDiffMeth,methylBaseDB-method
#' @rdname calculateDiffMeth-methods
setMethod("calculateDiffMeth", "methylBaseDB",
          function(.Object, covariates, overdispersion, adjust,
                   effect, parShrinkMN,
                   test ,mc.cores , slim , weighted.mean,
                   chunk.size, save.db=TRUE, ...){
            
            if(save.db) {
        
              # check object before starting calculations
              .checksCalculateDiffMeth(treatment = .Object@treatment,
                                       covariates = covariates,
                                       Tcols = .Object@numTs.index)
      
              # catch additional args 
              args <- list(...)
              dir <- dirname(.Object@dbpath)
              
              if( ( "dbdir" %in% names(args))   ){
                if( !(is.null(args$dbdir)) ) { 
                  dir <- .check.dbdir(args$dbdir) 
                }
              }
              if(!( "suffix" %in% names(args) ) ){
                suffix <- NULL
              } else { 
                suffix <- paste0("_",args$suffix)
              }
              
              # filename <- paste0(paste(.Object@sample.ids,collapse = "_"),suffix,".txt")
              
              filename <- paste0(gsub("methylBase","methylDiff",
                                      gsub(".txt.bgz","",.Object@dbpath)
                                      ),suffix,".txt")
              
              filename <- .checkTabixFileExists(filename)
              
              filename <- basename(filename)
              
              ## creating the tabix header
              slotList <- list(dbtype = .Object@dbtype, 
                               sample.ids=.Object@sample.ids,
                               assembly=.Object@assembly,
                               context=.Object@context,
                               destranded=.Object@destranded,
                               treatment=.Object@treatment,
                               resolution=.Object@resolution)
              
              tabixHead <- makeTabixHeader(slotList)
              tabixHeadString <- .formatTabixHeader(class = "methylDiffDB",
                                                    tabixHead = tabixHead)
              
              dbpath <- applyTbxByChunk(.Object@dbpath,dir = 
                                          dir,chunk.size = chunk.size,  
                                        filename = filename, 
                                        return.type = "tabix", 
                                        FUN = .calculateDiffMeth,
                                        treatment=.Object@treatment,
                                        overdispersion=overdispersion,
                                        effect=effect,
                                        parShrinkMN=parShrinkMN,
                                        test=test,
                                        adjust=adjust,
                                        mc.cores=mc.cores,
                                        tabixHead = tabixHeadString)
              
              obj=readMethylDiffDB(dbpath = dbpath,dbtype = .Object@dbtype, 
                                   sample.ids=.Object@sample.ids,
                                   assembly=.Object@assembly,
                                   context=.Object@context,
                                   destranded=.Object@destranded,
                                   treatment=.Object@treatment,
                                   resolution=.Object@resolution)
              
              message(paste0("flatfile located at: ",obj@dbpath))
              
              obj
              
            } else {
              
              tmp <- .Object[]
              calculateDiffMeth(tmp,covariates,overdispersion=overdispersion,
                                adjust=adjust,
                                effect=effect,parShrinkMN=parShrinkMN,
                                test=test,mc.cores=mc.cores,
                                slim=slim,weighted.mean=weighted.mean,
                                save.db=FALSE)
              
            }
}
)

# getMethylDiff -----------------------------------------------------------


#' @aliases getMethylDiff,methylDiffDB-method
#' @rdname getMethylDiff-methods
setMethod(f="getMethylDiff", signature="methylDiffDB", 
          definition=function(.Object,difference,qvalue,type,
                              chunk.size,save.db=TRUE,...) {
            
  if(!( type %in% c("all","hyper","hypo") )){
    stop("Wrong 'type' argument supplied for the function, ",
         "it can be 'hypo', 'hyper' or 'all' ")
  }
  meth.diff=NULL
  if(save.db) {
    
    #function applied to data
    f <- function(data,difference,qv,type){
      
      data <- data.table(data)
      .setMethylDBNames(data,methylDBclass = "methylDiffDB")
      
      if(type=="all"){
        data <- data[(qvalue < qv) & (abs(meth.diff) > difference)]
      }else if(type=="hyper"){
        data <- data[(qvalue < qv) & (meth.diff > difference)]
      }else if(type=="hypo"){
        data <- data[(qvalue < qv) & (meth.diff < -1*difference)]
      }
      return(data)
    }
    
    # catch additional args 
    args <- list(...)
    dir <- dirname(.Object@dbpath)
    
    if( ( "dbdir" %in% names(args))   ){
      if( !(is.null(args$dbdir)) ) { 
        dir <- .check.dbdir(args$dbdir) 
      } 
    }
    
    if(!( "suffix" %in% names(args) ) ){
      suffix <- paste0("_",type)
    } else { 
      suffix <- paste0("_",args$suffix)
    }
    
    
    # filename <- paste0(paste(.Object@sample.ids,collapse = "_"),suffix,".txt")
    
    filename <- paste0(gsub(".txt.bgz","",.Object@dbpath),suffix,".txt")
    
    filename <- .checkTabixFileExists(filename)
    
    filename <- basename(filename)
    
    ## creating the tabix header
    slotList <- list(dbtype = .Object@dbtype, 
                     sample.ids=.Object@sample.ids,
                     assembly=.Object@assembly,context=.Object@context,
                     destranded=.Object@destranded,
                     treatment=.Object@treatment,resolution=.Object@resolution)
    
    tabixHead <- makeTabixHeader(slotList)
    tabixHeadString <- .formatTabixHeader(class = "methylDiffDB",
                                          tabixHead = tabixHead)
    
    dbpath <- applyTbxByChunk(.Object@dbpath,chunk.size = chunk.size, 
                              dir = dir, filename = filename, 
                              return.type = "tabix", FUN = f,
                              difference = difference, qv = qvalue, type = type,
                              tabixHead = tabixHeadString)
    
    
    obj=readMethylDiffDB(dbpath = dbpath,dbtype = .Object@dbtype, 
                         sample.ids=.Object@sample.ids,
                         assembly=.Object@assembly,context=.Object@context,
                         destranded=.Object@destranded,
                         treatment=.Object@treatment,resolution=.Object@resolution)
    return(obj)
    
  } else {
    
    tmp <- .Object[]
    getMethylDiff(tmp,difference,qvalue,type,save.db=FALSE)
    
  }
    
}) 

# diffMethPerChr ----------------------------------------------------------


#' @aliases diffMethPerChr,methylDiffDB-method
#' @rdname  diffMethPerChr-methods
setMethod("diffMethPerChr", signature(x = "methylDiffDB"),
          function(x,plot,qvalue.cutoff, meth.cutoff,exclude,keep.empty.chrom,...){
            
            
            
            res <-
              applyTbxByChr(
                x@dbpath,
                return.type = "data.frame",
                FUN = .diffMethPerChr,
                qvalue.cutoff = qvalue.cutoff,
                meth.cutoff = meth.cutoff,
                keep.empty.chrom = keep.empty.chrom
              )
            
            # order the chromosomes
            dmc.hyper.hypo = res[order(as.numeric(sub("chr", "", res$chr))),]
            
            # get percentages of hypo/ hyper
            dmc.hyper = 100 * sum(res$number.of.hypermethylated) / x@num.records 
            dmc.hypo = 100 * sum(res$number.of.hypomethylated) / x@num.records
            
            all.hyper.hypo = data.frame(
              number.of.hypermethylated = sum(res$number.of.hypermethylated),
              percentage.of.hypermethylated = dmc.hyper,
              number.of.hypomethylated = sum(res$number.of.hypomethylated),
              percentage.of.hypomethylated = dmc.hypo
            )
            
            if (all(dmc.hyper.hypo$chr %in% exclude)) {
              warning("Cannot plot figure, excluded all available chromosomes.")
              plot <- FALSE
            }

            if (plot) {
              
                .plotDiffMethPerChr(dmc.hyper.hypo, exclude, qvalue.cutoff, meth.cutoff,...)
                
              } else {
                
                return(list(diffMeth.per.chr = dmc.hyper.hypo,
                            diffMeth.all = all.hyper.hypo))
              }
            
})

# Regionalize methods ----------------------------------------------------

# regionCounts ------------------------------------------------------------


#' @rdname regionCounts
#' @aliases regionCounts,methylRawDB,GRanges-method
setMethod("regionCounts", signature(object="methylRawDB",regions="GRanges"),
          function(object,regions,cov.bases,
                   strand.aware,chunk.size,save.db=TRUE,...){
            
  if(save.db) {
    
    # sort regions
    regions <- sortSeqlevels(regions)
    regions <- sort(regions,ignore.strand=TRUE)
    
    getCounts <- function(data,regions,cov.bases,strand.aware){
      .setMethylDBNames(data)
      # overlap data with regions
      # convert data to GRanges without metacolumns
      g.meth=with(data, GRanges(chr, IRanges(start=start, end=end),strand =strand))
      if(!strand.aware){
        strand(g.meth)="*"
        mat=IRanges::as.matrix( findOverlaps(regions,g.meth ) )
        #mat=matchMatrix( findOverlaps(regions,g.meth ) )
        
      }else{
        mat=IRanges::as.matrix( findOverlaps(regions,g.meth) )
        #mat=matchMatrix( findOverlaps(regions,as(object,"GRanges")) )
        
      }
      
      # create a temporary data.table row ids from regions and counts from object
      temp.dt=data.table(id = mat[, 1], data[mat[, 2], c(5, 6, 7)])
      
      coverage=NULL
      numCs=NULL
      numTs=NULL
      id=covered=NULL
      
      # use data.table to sum up counts per region
      sum.dt=temp.dt[,list(coverage=sum(coverage),
                           numCs   =sum(numCs),
                           numTs   =sum(numTs),covered=length(numTs)),by=id] 
      sum.dt=sum.dt[covered>=cov.bases,]
      temp.df=as.data.frame(regions) # get regions to a dataframe
      
      #create a new methylRaw object to return
      new.data=data.frame(#id      =new.ids,
        chr     =temp.df[sum.dt$id,"seqnames"],
        start   =temp.df[sum.dt$id,"start"],
        end     =temp.df[sum.dt$id,"end"],
        strand  =temp.df[sum.dt$id,"strand"],
        coverage=sum.dt$coverage,
        numCs   =sum.dt$numCs,
        numTs   =sum.dt$numTs)
    }
    
    # catch additional args 
    args <- list(...)
    dir <- dirname(object@dbpath)
    
    if( "dbdir" %in% names(args) ){
      if( !(is.null(args$dbdir)) ){
        dir <- .check.dbdir(args$dbdir) 
      }
    } 
    if(!( "suffix" %in% names(args) ) ){
      suffix <- "_regions"
    } else { 
      suffix <- paste0("_",args$suffix)
    }
    
    filename <- paste0(gsub(".txt.bgz","",object@dbpath)
                       ,suffix,".txt")
    
    filename <- .checkTabixFileExists(filename)
    
    filename <- basename(filename)
    
    ## creating the tabix header
    slotList <- list(sample.id=object@sample.id,
                     assembly=object@assembly, context =object@context,
                     resolution="region",
                     dbtype = object@dbtype)
    
    tabixHead <- makeTabixHeader(slotList)
    tabixHeadString <- .formatTabixHeader(class = "methylRawDB",
                                          tabixHead = tabixHead)
    
    newdbpath <- applyTbxByOverlap(object@dbpath,chunk.size = chunk.size, 
                                   ranges=regions,  dir=dir,filename = filename, 
                                   return.type = "tabix", FUN = getCounts,
                                   regions = regions ,cov.bases = cov.bases,
                                   strand.aware = strand.aware,
                                   tabixHead = tabixHeadString)
    
    readMethylRawDB(dbpath = newdbpath,sample.id=object@sample.id,
                    assembly=object@assembly, context =object@context,
                    resolution="region",
                    dbtype = object@dbtype)
    
  } else {
    tmp <- object[]
    regionCounts(tmp,regions,cov.bases,strand.aware,save.db=FALSE)
  }
}
)

#' @rdname regionCounts
#' @aliases regionCounts,methylRawDB,GRangesList-method
# assume that each name of the element in the GRangesList is unique and 
setMethod("regionCounts", signature(object="methylRawDB",regions="GRangesList"),
          function(object,regions,cov.bases,strand.aware,chunk.size,save.db=TRUE,...){
            
            # combine and sort GRanges from List
            regions <- unlist(regions)
            regions <- sortSeqlevels(regions)
            regions <- sort(regions,ignore.strand=TRUE)
            regions <- unique(regions)
            
            if(save.db) {
              
              getCounts <- function(data,regions,cov.bases,strand.aware){
                
                .setMethylDBNames(data)
                
                # overlap data with regions
                # convert data to GRanges without metacolumns
                g.meth=with(data, GRanges(chr, IRanges(start=start, end=end),strand =strand))
                
                if(!strand.aware){
                  strand(g.meth)="*"
                  mat=IRanges::as.matrix( findOverlaps(regions,g.meth ) )
                  #mat=matchMatrix( findOverlaps(regions,g.meth ) )
                  
                }else{
                  mat=IRanges::as.matrix( findOverlaps(regions,g.meth) )
                  #mat=matchMatrix( findOverlaps(regions,as(object,"GRanges")) )
                  
                }
                
                # create a temporary data.table row ids from regions and counts from object
                temp.dt=data.table(id = mat[, 1], data[mat[, 2], c(5, 6, 7)])
                
                coverage=NULL
                numCs=NULL
                numTs=NULL
                id=covered=NULL
                
                # use data.table to sum up counts per region
                sum.dt=temp.dt[,list(coverage=sum(coverage),
                                     numCs   =sum(numCs),
                                     numTs   =sum(numTs),covered=length(numTs)),by=id] 
                sum.dt=sum.dt[covered>=cov.bases,]
                temp.df=as.data.frame(regions) # get regions to a dataframe
                
                #create a new methylRaw object to return
                new.data=data.frame(#id      =new.ids,
                  chr     =temp.df[sum.dt$id,"seqnames"],
                  start   =temp.df[sum.dt$id,"start"],
                  end     =temp.df[sum.dt$id,"end"],
                  strand  =temp.df[sum.dt$id,"strand"],
                  coverage=sum.dt$coverage,
                  numCs   =sum.dt$numCs,
                  numTs   =sum.dt$numTs)
              }
              
              
              
              # catch additional args 
              args <- list(...)
              dir <- dirname(object@dbpath)
              
              if( "dbdir" %in% names(args) ){
                if( !(is.null(args$dbdir)) ){
                  dir <- .check.dbdir(args$dbdir) 
                }
              } 
              if(!( "suffix" %in% names(args) ) ){
                suffix <- "_regions"
              } else { 
                suffix <- paste0("_",args$suffix)
              }
              
              
              filename <- paste0(gsub(".txt.bgz","",object@dbpath)
                                 ,suffix,".txt")
              
              filename <- .checkTabixFileExists(filename)
              
              filename <- basename(filename)
              
              ## creating the tabix header
              slotList <- list(sample.id=object@sample.id,
                               assembly=object@assembly, 
                               context =object@context,
                               resolution="region",
                               dbtype = object@dbtype)
              
              tabixHead <- makeTabixHeader(slotList)
              tabixHeadString <- .formatTabixHeader(class = "methylRawDB",
                                                    tabixHead = tabixHead)
              
              newdbpath <- applyTbxByOverlap(object@dbpath,
                                             chunk.size = chunk.size, 
                                             ranges=regions,  dir=dir,filename = filename, 
                                             return.type = "tabix", 
                                             FUN = getCounts,regions = regions ,
                                             cov.bases = cov.bases,
                                             strand.aware = strand.aware,
                                             tabixHead = tabixHeadString)
              
              
              readMethylRawDB(dbpath = newdbpath,sample.id=object@sample.id,
                              assembly=object@assembly, context =object@context,resolution="region",
                              dbtype = object@dbtype)
              
            } else {
              tmp <- object[]
              regionCounts(tmp,regions,cov.bases,strand.aware,save.db=FALSE)
            }
            
          }
)

#' @rdname regionCounts
#' @aliases regionCounts,methylRawListDB,GRanges-method
setMethod("regionCounts", signature(object="methylRawListDB",regions="GRanges"),
          function(object,regions,cov.bases,strand.aware,chunk.size,save.db=TRUE,...){
            
            if(save.db){
              args <- list(...)
              
              if( !( "dbdir" %in% names(args)) ){
                dbdir <- NULL
              } else { dbdir <- basename(.check.dbdir(args$dbdir)) }
              
              outList = lapply(object,regionCounts,regions,cov.bases, strand.aware,
                               chunk.size,save.db,dbdir=dbdir,...)
              new("methylRawListDB", outList,treatment=object@treatment)
              
            } else {
              outList = lapply(object,regionCounts,regions,cov.bases,strand.aware,
                               chunk.size,save.db=FALSE)
              new("methylRawList", outList,treatment=object@treatment)
            }
          }
)

#' @rdname regionCounts
#' @aliases regionCounts,methylRawListDB,GRangesList-method
setMethod("regionCounts", signature(object="methylRawListDB",
                                    regions="GRangesList"),
          function(object,regions,cov.bases,strand.aware,chunk.size,save.db=TRUE,...){
            
            if(save.db){
              args <- list(...)
              
              if( !( "dbdir" %in% names(args)) ){
                dbdir <- NULL
              } else { dbdir <- basename(.check.dbdir(args$dbdir)) }
              
              outList = lapply(object,regionCounts,regions,cov.bases, strand.aware,
                               chunk.size,save.db,dbdir=dbdir,...)
              new("methylRawListDB", outList,treatment=object@treatment)
              
            } else {
              outList = lapply(object,regionCounts,regions,cov.bases,strand.aware,
                               chunk.size,save.db=FALSE)
              new("methylRawList", outList,treatment=object@treatment)
            }
          }
)

#' @rdname regionCounts
#' @aliases regionCounts,methylBaseDB,GRanges-method
setMethod("regionCounts", signature(object="methylBaseDB",regions="GRanges"),
          function(object,regions,cov.bases,strand.aware,chunk.size,
                   save.db=TRUE,...){
            
    if(save.db) {
      
      # sort regions
      regions <- sortSeqlevels(regions)
      regions <- sort(regions,ignore.strand=TRUE)
      
      getCounts <- function(data,regions,cov.bases,strand.aware){
        .setMethylDBNames(data)
        # overlap data with regions
        # convert data to GRanges without metacolumns
        g.meth=with(data, GRanges(chr, IRanges(start=start, end=end),
                                  strand =strand))
        if(!strand.aware){
          strand(g.meth)="*"
          mat=IRanges::as.matrix( findOverlaps(regions,g.meth ) )
          #mat=matchMatrix( findOverlaps(regions,g.meth ) )
          
        }else{
          mat=IRanges::as.matrix( findOverlaps(regions,g.meth) )
          #mat=matchMatrix( findOverlaps(regions,as(object,"GRanges")) )
          
        }
        
        #require(data.table)
        # create a temporary data.table row ids from regions and counts 
        #from object
        temp.dt=data.table(id = mat[, 1], data[mat[, 2], 5:ncol(data)])
        
        coverage=NULL
        numCs=NULL
        numTs=NULL
        id=NULL
        .SD=NULL
        numTs1=covered=NULL
        # use data.table to sum up counts per region
        sum.dt=temp.dt[,c(lapply(.SD,sum),covered=length(numTs1)),by=id] 
        sum.dt=sum.dt[covered>=cov.bases,]
        temp.df=as.data.frame(regions) # get regions to a dataframe
        
        #create a new methylRaw object to return
        new.data=data.frame(#id      =new.ids,
          chr     =temp.df[sum.dt$id,"seqnames"],
          start   =temp.df[sum.dt$id,"start"],
          end     =temp.df[sum.dt$id,"end"],
          strand  =temp.df[sum.dt$id,"strand"],
          as.data.frame(sum.dt[,c(2:(ncol(sum.dt)-1)),with=FALSE]),
          stringsAsFactors=FALSE)
      }
      
      
      # catch additional args 
      args <- list(...)
      dir <- dirname(object@dbpath)
      
      if( "dbdir" %in% names(args) ){
        dir <- .check.dbdir(args$dbdir) 
      } 
      if(!( "suffix" %in% names(args) ) ){
        suffix <- "_regions"
      } else { 
        suffix <- paste0("_",args$suffix)
      }
      
      # filename <- paste0(paste(object@sample.ids,collapse = "_"),suffix,".txt")
      
      filename <- paste0(gsub(".txt.bgz","",object@dbpath),suffix,".txt")
      
      filename <- .checkTabixFileExists(filename)
      
      filename <- basename(filename)
      
      destranded = ifelse( test = (strand.aware & !(object@destranded)), 
                           FALSE, TRUE)
      
      ## creating the tabix header
      slotList <- list(dbtype = object@dbtype,
                       sample.ids=object@sample.ids,
                       assembly=object@assembly,
                       context=object@context,
                       treatment=object@treatment,
                       destranded=destranded,
                       resolution="region")
      
      tabixHead <- makeTabixHeader(slotList)
      tabixHeadString <- .formatTabixHeader(class = "methylBaseDB",
                                            tabixHead = tabixHead)
      
      newdbpath <- applyTbxByOverlap(object@dbpath,chunk.size = chunk.size, 
                                     ranges=regions,  dir=dir,
                                     filename = filename, 
                                     return.type = "tabix", 
                                     FUN = getCounts,regions = regions ,
                                     cov.bases = cov.bases,
                                     strand.aware = strand.aware,
                                     tabixHead = tabixHeadString)
      
      readMethylBaseDB(dbpath = newdbpath,dbtype = object@dbtype,
                       sample.ids=object@sample.ids,
                       assembly=object@assembly,
                       context=object@context,treatment=object@treatment,
                       destranded=destranded,resolution="region")
      
    } else {
      
      tmp <- object[]
      regionCounts(tmp,regions,cov.bases,strand.aware,save.db=FALSE)
      
    }
  }
)

#' @rdname regionCounts
#' @aliases regionCounts,methylBaseDB,GRangesList-method
setMethod("regionCounts", signature(object="methylBaseDB",regions="GRangesList"),
          function(object,regions,cov.bases,strand.aware,chunk.size,
                   save.db=TRUE,...){
            
            # combine and sort GRanges from List
            regions <- unlist(regions)
            regions <- sortSeqlevels(regions)
            regions <- sort(regions,ignore.strand=TRUE)
            regions <- unique(regions)
            
            if(save.db) {
              
              getCounts <- function(data,regions,cov.bases,strand.aware){
                .setMethylDBNames(data)
                # overlap data with regions
                # convert data to GRanges without metacolumns
                g.meth=with(data, GRanges(chr, IRanges(start=start, end=end),
                                          strand =strand))
                if(!strand.aware){
                  strand(g.meth)="*"
                  mat=IRanges::as.matrix( findOverlaps(regions,g.meth ) )
                  #mat=matchMatrix( findOverlaps(regions,g.meth ) )
                  
                }else{
                  mat=IRanges::as.matrix( findOverlaps(regions,g.meth) )
                  #mat=matchMatrix( findOverlaps(regions,as(object,"GRanges")) )
                  
                }
                
                #require(data.table)
                # create a temporary data.table row ids from regions and counts 
                #from object
                temp.dt=data.table(id = mat[, 1], data[mat[, 2], 5:ncol(data)])
                #dt=data.table::data.table(dt)
                #dt=data.table(id=mat[,1],data[mat[,2],c(5,6,7)] ) 
                #worked with data.table 1.7.7
                
                coverage=NULL
                numCs=NULL
                numTs=NULL
                id=NULL
                .SD=NULL
                numTs1=covered=NULL
                # use data.table to sum up counts per region
                sum.dt=temp.dt[,c(lapply(.SD,sum),covered=length(numTs1)),by=id] 
                sum.dt=sum.dt[covered>=cov.bases,]
                temp.df=as.data.frame(regions) # get regions to a dataframe
                
                #create a new methylRaw object to return
                new.data=data.frame(#id      =new.ids,
                  chr     =temp.df[sum.dt$id,"seqnames"],
                  start   =temp.df[sum.dt$id,"start"],
                  end     =temp.df[sum.dt$id,"end"],
                  strand  =temp.df[sum.dt$id,"strand"],
                  as.data.frame(sum.dt[,c(2:(ncol(sum.dt)-1)),with=FALSE]),
                  stringsAsFactors=FALSE)
              }
              
              # catch additional args 
              args <- list(...)
              dir <- dirname(object@dbpath)
              
              if( "dbdir" %in% names(args) ){
                dir <- .check.dbdir(args$dbdir) 
              } 
              if(!( "suffix" %in% names(args) ) ){
                suffix <- paste0("_","regions")
              } else { 
                suffix <- paste0("_",args$suffix)
              }
              
              # filename <- paste0(paste(object@sample.ids,collapse = "_"),suffix,".txt")
              
              filename <- paste0(gsub(".txt.bgz","",object@dbpath),suffix,".txt")
              
              filename <- .checkTabixFileExists(filename)
              
              filename <- basename(filename)
              
              destranded = ifelse( test = (strand.aware & !(object@destranded)), 
                                  FALSE, TRUE)
              ## creating the tabix header
              slotList <- list(dbtype = object@dbtype,
                               sample.ids=object@sample.ids,
                               assembly=object@assembly,
                               context=object@context,
                               treatment=object@treatment,
                               destranded=destranded,
                               resolution="region")
              
              tabixHead <- makeTabixHeader(slotList)
              tabixHeadString <- .formatTabixHeader(class = "methylBaseDB",
                                                    tabixHead = tabixHead)
              
              newdbpath <- applyTbxByOverlap(object@dbpath,chunk.size = chunk.size, 
                                             ranges=regions,  dir=dir,
                                             filename = filename, 
                                             return.type = "tabix", 
                                             FUN = getCounts,regions = regions ,
                                             cov.bases = cov.bases,
                                             strand.aware = strand.aware,
                                             tabixHead = tabixHeadString)
              
              readMethylBaseDB(dbpath = newdbpath,dbtype = object@dbtype,
                               sample.ids=object@sample.ids,
                               assembly=object@assembly,
                               context=object@context,treatment=object@treatment,
                               destranded=destranded,resolution="region")
              
            } else {
              
              tmp <- object[]
              regionCounts(tmp,regions,cov.bases,strand.aware,save.db=FALSE)
              
            }
          }
)

# tileMethylCounts --------------------------------------------------------


#' @aliases tileMethylCounts,methylRawDB-method
#' @rdname tileMethylCounts-methods
setMethod("tileMethylCounts", signature(object="methylRawDB"),
          function(object,win.size,step.size,cov.bases,mc.cores,
                   save.db=TRUE,...){
            
            if(save.db) {
              
              tileCount <- function(data,win.size,step.size,cov.bases,resolution) 
                {
                
                .setMethylDBNames(data,"methylRawDB")
                # overlap data with regions
                # convert data to GRanges without metacolumns
                g.meth=with(data, GRanges(chr, IRanges(start=start, end=end),
                                          strand =strand))
                
                chrs   =as.character(unique(seqnames(g.meth)))
                
                # get max length of feature covered chromosome
                max.length=max(IRanges::end(g.meth)) 
                # get start of sliding window
                min.length=min(IRanges::end(g.meth))
                win.start = floor(min.length/win.size)*win.size
                
                #get sliding windows with covered CpGs
                numTiles=floor(  (max.length-(win.size-step.size)- win.start)/step.size )+1
                
                all.wins=GRanges(seqnames=rep(chrs,numTiles),
                                 ranges=IRanges(start=win.start + 1 + 0:(numTiles-1)*step.size,
                                                width=rep(win.size,numTiles)) )
                # clean up
                rm(g.meth)
                
                tmp <- new("methylRaw",data,sample.id="temp",resolution=object@resolution)
                # clean up
                rm(data)
                
                res <- regionCounts(tmp,all.wins,cov.bases,strand.aware=FALSE)
                # clean up
                rm(tmp)
                
                return(getData(res))
                
              }
              
              # catch additional args 
              args <- list(...)
              dir <- dirname(object@dbpath)
              
              if( "dbdir" %in% names(args) ){
                if( !(is.null(args$dbdir)) ){
                  dir <- .check.dbdir(args$dbdir) 
                }
              } 
              if(!( "suffix" %in% names(args) ) ){
                suffix <- "_tiled"
              } else { 
                suffix <- paste0("_",args$suffix)
              }
              
              
              # filename <- paste0(paste(object@sample.id,collapse = "_"),
              #                    suffix,".txt")
              
              filename <- paste0(gsub(".txt.bgz","",object@dbpath),suffix,".txt")
              
              filename <- .checkTabixFileExists(filename)
              
              filename <- basename(filename)
              
              ## creating the tabix header
              slotList <- list(sample.id=object@sample.id,
                               assembly=object@assembly, 
                               context =object@context,
                               resolution="region",
                               dbtype = object@dbtype)
              
              tabixHead <- makeTabixHeader(slotList)
              tabixHeadString <- .formatTabixHeader(class = "methylRawDB",
                                                    tabixHead = tabixHead)
              
              newdbpath <- applyTbxByChr(object@dbpath, return.type = "tabix",
                                         dir = dir,filename = filename,
                                         FUN = tileCount, win.size = win.size,
                                         step.size = step.size,
                                         cov.bases = cov.bases,
                                         resolution=object@resolution,
                                         mc.cores = mc.cores,
                                         tabixHead = tabixHeadString)
              
              readMethylRawDB(dbpath = newdbpath,sample.id=object@sample.id,
                              assembly=object@assembly, context =object@context,
                              resolution="region",
                              dbtype = object@dbtype)
            } else {
              
              tmp <- object[]
              tileMethylCounts(tmp,win.size,step.size,cov.bases,mc.cores)
              
            }
            
          }
)

#' @aliases tileMethylCounts,methylRawListDB-method
#' @rdname tileMethylCounts-methods
setMethod("tileMethylCounts", signature(object="methylRawListDB"),
          function(object,win.size,step.size,cov.bases,mc.cores,save.db=TRUE,...){
            
            
            if(save.db){
              args <- list(...)
              
              if( !( "dbdir" %in% names(args)) ){
                dbdir <- NULL
              } else { dbdir <- basename(.check.dbdir(args$dbdir)) }
              
              new.list=lapply(object,tileMethylCounts,win.size,step.size,
                              cov.bases,mc.cores,save.db,dbdir=dbdir,...) 
              new("methylRawListDB", new.list,treatment=object@treatment)
              
            } else {
              new.list=lapply(object,tileMethylCounts,win.size,step.size,
                              cov.bases,save.db=FALSE) 
              new("methylRawList", new.list,treatment=object@treatment)
            }
            
          })

#' @aliases tileMethylCounts,methylBaseDB-method
#' @rdname tileMethylCounts-methods
setMethod("tileMethylCounts", signature(object="methylBaseDB"),
          function(object,win.size,step.size,cov.bases,
                   mc.cores,save.db=TRUE,...){
            
  if(save.db) {
    
    tileCount <- function(data,win.size,step.size,cov.bases,
                          resolution,destranded) {
      
      .setMethylDBNames(data,"methylBaseDB")
      # overlap data with regions
      # convert data to GRanges without metacolumns
      g.meth=with(data, GRanges(chr, IRanges(start=start, end=end),
                                strand =strand))
      
      chrs   =as.character(unique(seqnames(g.meth)))
      
      # get max length of feature covered chromosome
      max.length=max(IRanges::end(g.meth)) 
      # get start of sliding window
      min.length=min(IRanges::end(g.meth))
      win.start = floor(min.length/win.size)*win.size
      
      #get sliding windows with covered CpGs
      numTiles=floor(  (max.length-(win.size-step.size)- win.start)/step.size )+1
      
      all.wins=GRanges(seqnames=rep(chrs,numTiles),
                       ranges=IRanges(start=win.start + 1 + 0:(numTiles-1)*step.size,
                                      width=rep(win.size,numTiles)) )
      
      # clean up
      rm(g.meth)
      
      tmp <- new("methylBase",data,
                 resolution=object@resolution,
                 destranded=object@destranded)
      # clean up
      rm(data)
      
      res <- regionCounts(tmp,all.wins,cov.bases,strand.aware=FALSE)
      # clean up
      rm(tmp)
      
      getData(res)
      
    }
    
    # catch additional args 
    args <- list(...)
    dir <- dirname(object@dbpath)
    
    if( "dbdir" %in% names(args) ){
        dir <- .check.dbdir(args$dbdir) 
    } 
    if(!( "suffix" %in% names(args) ) ){
      suffix <- paste0("_","tiled")
    } else { 
      suffix <- paste0("_",args$suffix)
    }
    
    # filename <- paste0(paste(object@sample.ids,collapse = "_"),suffix,".txt")
    
    filename <- paste0(gsub(".txt.bgz","",object@dbpath),suffix,".txt")
    
    filename <- .checkTabixFileExists(filename)
    
    filename <- basename(filename)
    
    ## creating the tabix header
    slotList <- list(dbtype = object@dbtype,
                     sample.ids=object@sample.ids,
                     assembly=object@assembly,context=object@context,
                     treatment=object@treatment,
                     destranded=TRUE,resolution="region")
    
    tabixHead <- makeTabixHeader(slotList)
    tabixHeadString <- .formatTabixHeader(class = "methylBaseDB",
                                          tabixHead = tabixHead)
    
    newdbpath <- applyTbxByChr(object@dbpath, return.type = "tabix",
                               dir = dir,filename = filename,
                               FUN = tileCount, win.size = win.size,
                               step.size = step.size,
                               cov.bases = cov.bases,
                               resolution=object@resolution,
                               mc.cores = mc.cores,
                               tabixHead = tabixHeadString)
    
    readMethylBaseDB(dbpath = newdbpath,dbtype = object@dbtype,
                     sample.ids=object@sample.ids,
                     assembly=object@assembly,context=object@context,
                     treatment=object@treatment,
                     destranded=TRUE,resolution="region")
  } else {
    
    tmp <- object[]
    tileMethylCounts(tmp,win.size,step.size,cov.bases,mc.cores)
    
  }
  
}
)

# BedGraph methods --------------------------------------------------------

# to be able to write w/o scientific notation, there might be better methods
.write.bed<-function(...){
  defsci=options()$scipen
  options(scipen = 999)
  write.table(...)
  options(scipen = defsci)
}
#' @rdname bedgraph-methods
#' @aliases bedgraph,methylRawDB-method
setMethod("bedgraph", signature(methylObj="methylRawDB"),
          function(methylObj,file.name,col.name,unmeth,log.transform,negative
                   ,add.on,chunk.size){
  if(!col.name %in%  c('coverage', 'numCs','numTs','perc.meth') ){
  stop("col.name argument is not one of 'coverage', 'numCs','numTs','perc.meth'")
  }
  
  bedgr <- function(data,col.name,file.name,unmeth,log.transform,
                    negative,add.on,sample.id){
    
    data <- as.data.table(data)
    .setMethylDBNames(df = data,methylDBclass = "methylRawDB")
    chr=start=end=score=numCs=coverage=.=NULL
    if(col.name=="perc.meth"){
      df= data[,.(chr,start=start-1,end,score=100*numCs/coverage )]
    }else{
      df=data[,.(chr,start=start-1,end,get(col.name) )]
      df <- as.data.frame(df)
      names(df)[4] <- col.name
      if(log.transform){
        df[,4]=log10(df[,4])
      }
      if(negative){
        df[,4]=-1*(df[,4])
      }
    }
    
    if(is.null(file.name)){
      return(df)
    }else{
      
      if(unmeth & col.name=="perc.meth")
      {
        # write meth data to single file
        .write.bed(df,file=paste0(file.name,"_meth"),quote=FALSE,
                    col.names=FALSE,row.names=FALSE,sep="\t",
                    append=TRUE)
        
        # write unmeth data to single file
        df[,4]=100-df[,4]
        .write.bed(df,file=paste0(file.name,"_unmeth"),quote=FALSE,
                    col.names=FALSE,row.names=FALSE,sep="\t",
                    append=TRUE)
        
      }else{
        .write.bed(df,file=file.name,quote=FALSE,col.names=FALSE,
                    row.names=FALSE,sep="\t",append=TRUE)
      }
    }
  }
  
  
  if(is.null(file.name)){
    
    applyTbxByChunk(methylObj@dbpath,chunk.size = chunk.size, 
                    return.type = "data.frame", FUN = bedgr,
                    col.name = col.name, file.name = file.name, 
                    unmeth = unmeth, log.transform=log.transform,
                    negative= negative,
                    add.on = add.on, sample.id = methylObj@sample.id)
    
  } else {
    
    rndFile=paste(sample(c(0:9, letters, LETTERS),9, replace=TRUE),collapse="")
    filename2=paste(file.name,rndFile,sep="_")
    
    applyTbxByChunk(methylObj@dbpath,chunk.size = chunk.size,
                    return.type = "data.frame", FUN = bedgr,
                    col.name = col.name, file.name= filename2, 
                    unmeth = unmeth, log.transform=log.transform, 
                    negative= negative,
                    add.on = add.on, sample.id = methylObj@sample.id)
    
    
    if(unmeth & col.name=="perc.meth")
    {
      # combine single files produced by bedgr function
      track.line=paste(
        "track type=bedGraph name='",methylObj@sample.id," METH Cs",
        "' description='",methylObj@sample.id," METH Cs",
        "' visibility=full color=255,0,0 maxHeightPixels=80:80:11 ",
        add.on,sep="")                        
      cat(track.line,"\n",file=file.name)
      file.append(file.name,paste0(filename2,"_meth"))
      track.line2=paste(
        "track type=bedGraph name='",methylObj@sample.id," UNMETH Cs",
        "' description='",methylObj@sample.id," UNMETH Cs",
        "' visibility=full color=0,0,255 maxHeightPixels=80:80:11 ",
        add.on,sep="")
      cat(track.line2,"\n",file=file.name,append=TRUE)
      file.append(file.name,paste0(filename2,"_unmeth"))
      
    }else{
      
      track.line=paste(
        "track type=bedGraph name='",methylObj@sample.id," ",col.name,
        "' description='",methylObj@sample.id," ",col.name,
        "' visibility=full color=255,0,0 maxHeightPixels=80:80:11 ",
        add.on,sep="")
      cat(track.line,"\n",file=file.name)
      file.append(file.name,filename2)
      
    }
    # tidy up
    unlink(list.files(path = dirname(file.name),pattern = rndFile,
                      full.names = TRUE))
    
  }
  
}

)

#' @rdname bedgraph-methods
#' @aliases bedgraph,methylRawListDB-method
setMethod("bedgraph", signature(methylObj="methylRawListDB"),
          function(methylObj,file.name,col.name,unmeth,log.transform,negative,add.on,chunk.size){
  if(!col.name %in%  c('coverage', 'numCs','numTs','perc.meth') ){
    stop("col.name argument is not one of 'coverage', 'numCs','numTs','perc.meth' options")
  }
  if( is.null(file.name) ){
    
    result=list()
    for(i in 1:length(methylObj))
    {
      result[[ methylObj[[i]]@sample.id ]]=bedgraph(methylObj[[i]],
                                                    file.name=NULL,
                                                    col.name=col.name,
                                                    unmeth=FALSE,
                                                    log.transform=log.transform,
                                                    negative=negative,
                                                    chunk.size=chunk.size)
    }
    return(result)
  }else{
    
    bedgraph(methylObj[[1]],file.name=file.name,
             col.name=col.name,unmeth=unmeth,log.transform=log.transform,
             negative=negative,chunk.size=chunk.size)
    
    for(i in 2:length(methylObj))
    {
      bedgraph(methylObj[[i]],file.name=paste(file.name,i,sep = "_"),
               col.name=col.name,unmeth=unmeth,log.transform=log.transform,
               negative=negative,chunk.size=chunk.size)
      
      file.append(file.name,paste(file.name,i,sep = "_"))
    }
    
    unlink(list.files(path = dirname(file.name),
                      pattern = paste0(basename(file.name),
                                       "_[[:digit:]]+"),full.names = TRUE))
    
  }
})

#' @rdname bedgraph-methods
#' @aliases bedgraph,methylDiffDB-method
setMethod("bedgraph", signature(methylObj="methylDiffDB"),
          function(methylObj,file.name,col.name,log.transform,
                   negative,add.on,chunk.size){
            
  if(! col.name %in% c('pvalue','qvalue', 'meth.diff') )
  {
    stop("col.name argument is not one of 'pvalue','qvalue', 'meth.diff'")
  }
  
  bedgr <- function(data,col.name,file.name,log.transform,negative,add.on,
                    sample.id){
    
    data <- as.data.table(data)
    .setMethylDBNames(df = data,methylDBclass = "methylDiffDB")
    chr=start=end=.=NULL
    df=data[,.(chr,start=start-1,end,get(col.name) )]
    df <- as.data.frame(df)
    names(df)[4] <- col.name
    if(log.transform){
      df[,4]=log10(df[,4])
    }
    if(negative){
      df[,4]=-1*(df[,4])
    }
    if(is.null(file.name)){
      return(df)
    }else{
      .write.bed(df,file=file.name,quote=FALSE,col.names=FALSE,
                  row.names=FALSE,sep="\t",append=TRUE)
    }
  }
  
  if(is.null(file.name)){
    
    applyTbxByChunk(methylObj@dbpath,chunk.size = chunk.size, 
                    return.type = "data.frame", FUN = bedgr,
                    col.name = col.name, file.name= file.name,
                    log.transform=log.transform, negative= negative,
                    add.on = add.on, sample.id = methylObj@sample.id)
  } else {
    
    rndFile=paste(sample(c(0:9, letters, LETTERS),9, replace=TRUE),
                  collapse="")
    filename2=paste(file.name,rndFile,sep="_")
    
    txtPath <- applyTbxByChunk(methylObj@dbpath,
                               chunk.size = chunk.size, 
                               return.type = "data.frame", 
                               FUN = bedgr,
                               col.name = col.name,
                               file.name= filename2, 
                               log.transform=log.transform, 
                               negative= negative,
                               add.on = add.on, 
                               sample.id = methylObj@sample.id)
    
    track.line=paste(
      "track type=bedGraph name='",file.name,"' description='",col.name,
      "' visibility=full color=255,0,255 altColor=102,205,170 maxHeightPixels=80:80:11 ",
      add.on,sep="")
    cat(track.line,"\n",file=file.name)
    file.append(file.name,filename2)
    
    unlink(filename2)
    
  }
}
)

Try the methylKit package in your browser

Any scripts or data that you put into this service are public.

methylKit documentation built on Jan. 30, 2021, 2 a.m.