chromunity: Discovery of communities in Pore-C concatemers

#' @importFrom data.table data.table
#' @importFrom data.table rbindlist
#' @importFrom data.table set
#' @importFrom data.table setDT
#' @importFrom data.table setkey
#' @importFrom data.table setkeyv
#' @importFrom data.table setnames
#' @importFrom data.table transpose
#' @importFrom Matrix sparseMatrix
#' @importFrom plyr round_any
#' @import zoo
#' @import arrow
#' @importFrom pbmcapply pbmclapply 
#' @importFrom parallel mclapply
#' @importFrom stats median
#' @importFrom stats na.omit
#' @importFrom MASS ginv
#' @importFrom utils globalVariables
#' @importFrom magrittr %>%
#' @import GenomicRanges
#' @import gUtils
#' @importFrom igraph graph.adjacency
#' @importFrom igraph cluster_louvain
#' @importFrom igraph cluster_fast_greedy
#' @importFrom BiocGenerics t

globalVariables(c("::", ":::", "num.memb", "community", "max.local.dist", "read_idx", "as.data.table", "count", "dcast.data.table", "combn", "tot", "copy", "nfrac", "nmat", ".", "bx2", "bx1", "as", "seqlevels", "loess", ".N", ".SD", ":="))

#' @name parquet2gr
#' @description
#' Converts parquet format from Pore-C snakemake pipeline to a single gRanges object
#'
#' @param path string Path to a directory containing all pore_c.parquet files to be read in.
#' @param col_names vector Column names from parquet files to read in.
#' @param save_path string Path to save the gRanges to.
#' @param prefix string File prefix
#' @param mc.cores integer Number of cores in mclapply (default = 5)
#' @param verbose boolean "verbose" flag (default = FALSE)
#' @return GRanges format of the parquet files combined
#' @author Aditya Deshpande, Marcin Imielinski
#' 
#' @export
parquet2gr = function(path = NULL, col_names = NULL, save_path = NULL, prefix = "NlaIII_this_sample", mc.cores = 5, verbose = TRUE){

    if(is.null(path)){
        stop("Need a valid path to all Pore-C parquets.")
    }

    all.paths = data.table(file_path = dir(path, all.files = TRUE, recursive = TRUE, full = TRUE))[grepl("*.parquet*", file_path)]

    if(nrow(all.paths) == 0){
        stop("No valid files files with suffix .parquet found.")
    } 

    if(verbose){"Beginning to read parquet files"}

    if(is.null(col_names)){col_names = c("read_name", "chrom", "start", "end", "pass_filter")}
    
    parq.list = pbmclapply(1:nrow(all.paths), function(k){
        parq.al = read_parquet(all.paths[k]$file_path, col_select = col_names)
        parq.al = as.data.table(parq.al)
        return(parq.al)
    }, mc.cores = mc.cores)

    parq.dt = rbindlist(parq.list, fill = TRUE)

    parq.dt[, read_idx := .GRP, by = read_name]
    uni.dt = unique(parq.dt[, .(read_name, read_idx)])
    uni.dt[, fr.read_name := .N, by = read_name]
    uni.dt[, fr.read_idx := .N, by = read_idx]
    if (!any(uni.dt$fr.read_name > 1)){
        if(!any(uni.dt$fr.read_idx > 1)){
            rm(uni.dt)
        } else {
            message("Duplicate read_idx found, check this field in the output")
        }
    } else {
        message("Duplicate read_name found, make all files belong to a single, unique sample")
    }

    parq.gr = dt2gr(parq.dt)

    if (!is.null(save_path)){
        saveRDS(parq.gr, paste0(save_path, "/", prefix, ".rds"))
    }
    return(parq.gr)
}



#' @name csv2gr
#' @description
#' Converts csv format loaded to GEO to a single gRanges object
#'
#' @param path string Path to a directory containing all csv files to be read in.
#' @param col_names vector Column names from csv files to read in.
#' @param save_path string Path to save the gRanges to.
#' @param prefix string File prefix
#' @param mc.cores integer Number of cores in mclapply (default = 5)
#' @param verbose boolean "verbose" flag (default = FALSE)
#' @return GRanges format of the csv files combined
#' @author Aditya Deshpande, Marcin Imielinski
#' 
#' @export
csv2gr = function(path = NULL, col_names = NULL, save_path = NULL, prefix = "NlaIII_this_sample", mc.cores = 5, verbose = TRUE){

    if(is.null(path)){
        stop("Need a valid path to all Pore-C csvs.")
    }

    all.paths = data.table(file_path = dir(path, all.files = TRUE, recursive = TRUE, full = TRUE))[grepl("*fragment_alignments.csv.gz*", file_path)]

    if(nrow(all.paths) == 0){
        stop("No valid files files with suffix pore_c.csv found.")
    } 

    if(verbose){"Beginning to read csv files"}

    if(is.null(col_names)){col_names = c("read_name", "chrom", "start", "end", "pass_filter")}
    
    csv.list = pbmclapply(1:nrow(all.paths), function(k){
        csv.al = fread(all.paths[k]$file_path)[, .(read_name, chrom, start, end, pass_filter)]
        csv.al = as.data.table(csv.al)
        csv.al = csv.al[pass_filter ==  TRUE]  
        return(csv.al)
    }, mc.cores = mc.cores)

    csv.dt = rbindlist(csv.list, fill = TRUE)

    gc()
    
    csv.dt[, read_idx := .GRP, by = read_name]
    uni.dt = unique(csv.dt[, .(read_name, read_idx)])
    uni.dt[, fr.read_name := .N, by = read_name]
    uni.dt[, fr.read_idx := .N, by = read_idx]
    if (!any(uni.dt$fr.read_name > 1)){
        if(!any(uni.dt$fr.read_idx > 1)){
            rm(uni.dt)
        } else {
            message("Duplicate read_idx found, check this field in the output")
        }
    } else {
        message("Duplicate read_name found, make all files belong to a single, unique sample")
    }

    csv.gr = dt2gr(csv.dt)

    if (!is.null(save_path)){
        saveRDS(csv.gr, paste0(save_path, "/", prefix, ".rds"))
    }
    return(csv.gr)
}

    


#' @name re_background
#' @description
#' Given n binsets generates random "background" binsets that mirrors the input binset characteristics with respect to chromosome, width, and distance.
#'
#' Generates (chromosome specific) kernels for width, cardinality, distance and models interchromosomal binsets by allowing
#' a chromosome transition matrix which then lands you on a coordinate that is also chosen from a kernel. 
#' x
#' @param binsets GRanges of bins with fields seqnames, start, end, and $bid specifying binset id
#' @param resolution the resolution to use for distance simulation
#' @param n integer scalar specifying number of sets to sample (default = nrow(binsets))
#' @param pseudocount integer used to populate chr-chr transition matrix
#' @param resolution integer resolution to use for simulating bin-sets
#' @param gg gGnome object to be used for simulating bin-sets in the presence of structural variation
#' @param interchromosomal.dist integer inferred average inter-chr distance to be used in case of whole genome simulation
#' @param interchromosomal.table data.table optional table that contains specific chr-chr inferred distances.
#' @param verbose boolean "verbose" flag (default = FALSE)
#' @param mc.cores integer Number of cores in mclapply (default = 10)
#' @author Aditya Deshpande, Marcin Imielinski
#' @export

re_background = function(binsets, n = length(binsets), pseudocount = 1, resolution = 5e4, gg=NULL, interchromosomal.dist = 1e8, interchromosomal.table = NULL, verbose = TRUE, mc.cores = 5, seed=42)
{
  set.seed(seed)
  if (!length(binsets))
    stop('empty binsets')

  ## sort from left to right
  ## binsets = gr2dt(binsets)
  ## setkeyv(binsets, c('seqnames','start', 'end'))

  if (verbose) smessage('Making kernels')

  ## populate data for kernels
####
  ## Approach to put all dostances in one bag
  binsets$binid = 1:length(binsets)
  if (is.null(gg))
  {
    distance.kernel = gr2dt(binsets)[, as.data.table(expand.grid(i = binid, j = binid))[i<j, ], by = bid] %>% setkeyv(c('i', 'j'))
    distance.kernel[, val := GenomicRanges::distance(binsets[i], binsets[j])]
    distance.kernel = distance.kernel[j-i==1]
  } else {
    if (verbose) smessage('Using graph distance')
######
    distance.kernel = gr2dt(binsets)[, as.data.table(expand.grid(i = binid, j = binid))[i<j, ], by = bid] %>% setkeyv(c('i', 'j'))
    distance.kernel[, val := GenomicRanges::distance(binsets[i], binsets[j])]
    distance.kernel.intra = distance.kernel[!is.na(val)]    
####
    gg = gg$copy$disjoin(disjoin(binsets))
    
    binsetd = data.table(binid = binsets$binid, bid = binsets$bid) %>% setkey('binid')
    binsetd[, gid := gr.match(binsets, gr.chr(gg$nodes$gr))] ## will streamline distance computation to get index
    distance.kernel.g = pbmclapply(unique(binsetd$bid), function(this.bid){
        this.dist = tryCatch(gg$dist(binsetd[bid == this.bid, gid]) %>% melt %>% as.data.table %>% setnames(., c('gi', 'gj', 'val')),
                             error = function(e) NULL)
        if(!is.null(this.dist)){
            this.dist[, bid := as.factor(this.bid)]
            return(this.dist)
         }
    }, mc.cores = mc.cores) %>% rbindlist
    distance.kernel = merge(distance.kernel.g, binsetd, by.x = c("gi", "bid"), by.y = c("gid", "bid"), allow.cartesian=TRUE)
    setnames(distance.kernel, "binid", "i")
    distance.kernel = merge(distance.kernel, binsetd, by.x = c("gj", "bid"), by.y = c("gid", "bid"), allow.cartesian=TRUE)
    setnames(distance.kernel, "binid", "j")
    distance.kernel = unique(distance.kernel[, .(bid, i, j, val)][i<j])
    distance.kernel = merge(distance.kernel, distance.kernel.intra, by = c("bid", "i", "j"), all.x = T)
    distance.kernel[, val := ifelse(val.x <= 1, val.y, val.x)]  
    distance.kernel[, val := round_any(val, resolution, f = ceiling)]
    distance.kernel = distance.kernel[, .(bid, i , j, val)]  
    setkeyv(distance.kernel, c('i', 'j'))
  }

  if (!is.null(interchromosomal.table)){
      interchromosomal.table[, dist := round_any(dist, resolution)]
      distance.kernel = merge(distance.kernel, gr2dt(binsets)[, .(seqnames, binid)], by.x = "i", by.y = "binid")
      distance.kernel = merge(distance.kernel, gr2dt(binsets)[, .(seqnames, binid)], by.x = "j", by.y = "binid")
      setnames(distance.kernel, c("seqnames.x", "seqnames.y"), c("V1", "V2"))
      distance.kernel = merge(distance.kernel, interchromosomal.table, by = c("V1", "V2"), all.x = T)
      distance.kernel[, val := ifelse(is.na(val), dist, val)]
      distance.kernel = distance.kernel[, .(j,   i,  bid, val)]
      distance.kernel[, val := ifelse(is.na(val), max(interchromosomal.table$dist), val)]
      setkeyv(distance.kernel, c('i', 'j'))
  } else {
      distance.kernel[is.infinite(val), val := interchromosomal.dist]
      distance.kernel[is.na(val), val := interchromosomal.dist]
      #distance.kernel = distance.kernel[!is.infinite(val)]
      #distance.kernel = distance.kernel[!is.na(val)] 
  }

  ## distance.kernel[is.na(val), val := interchromosomal.dist]
  
  ####
  binsets = gr2dt(binsets)
  setkeyv(binsets, c('seqnames','start', 'end'))

  ####
  ends.kernel = binsets[, .(val = max(end)), by = .(seqnames, bid)] %>% setkey('seqnames')
  ends.kernel[, freq := .N, by = seqnames]
  if (any(ends.kernel[, freq] == 1)){
      ends.kernel = rbind(ends.kernel[freq == 1], ends.kernel) %>% setkey('seqnames')
  }

####
  ## distance.kernel = binsets[, .(val = start - (data.table::shift(end))), by = .(seqnames, bid)][!is.na(val), ] %>% setkey('seqnames')
  ## distance.kernel[, val := ifelse(is.na(val), 0, val)] %>% setkey('seqnames')
  ## distance.kernel[, freq := .N, by = .(seqnames, bid)]
  ## distance.kernel =  rbind(distance.kernel[freq == 1], distance.kernel) %>% setkey('seqnames')
  
  ####
  width.kernel = binsets[, .(seqnames, val = width)] ## %>% setkey('seqnames')
  ## width.kernel[, freq := .N, by = seqnames]
  ## width.kernel = rbind(width.kernel[freq == 1], width.kernel)%>% setkey('seqnames')
  
  ends.bw =  ends.kernel[, .(bw = density(val)$bw), keyby = seqnames]

  ## distance.bw =  distance.kernel[, .(bw = density(val)$bw), keyby = seqnames]
  ## width.bw =  width.kernel[, .(bw = density(val)$bw), keyby = seqnames]

  distance.bw =  distance.kernel[, .(bw = density(val)$bw)]
  width.bw =  width.kernel[, .(bw = density(val)$bw)]
  
  if (verbose) smessage('Making transition matrices')

  ## populate cardinality 
  cardinality.multinomial = binsets[, .(cardinality = .N), by = .(bid)][, .N, by = cardinality][, .(cardinality, prob = N/sum(N))] %>% setkey(cardinality)

  ## populate chrom multi-nomial and chr-chr transition matrix
  chrom.multinomial = binsets[, .(count = .N + pseudocount), by = seqnames][, .(seqnames, prob = count / sum(count))] %>% setkey('seqnames')

  ## populate chrom multi-nomial and chr-chr transition matrix

  chrom.transition = merge(binsets, binsets, by = 'bid', allow.cartesian = TRUE)[, .(count = .N + pseudocount), by = .(seqnames.x, seqnames.y)][, .(seqnames.y, prob = count / sum(count)), keyby = .(seqnames.x)] 
  
  ## chrom.transition = merge(binsets, binsets, by = 'bid', allow.cartesian = TRUE)[, .(count = .N + pseudocount), by = .(seqnames.x, seqnames.y)]
  ## chrom.transition[, exclude := ifelse(seqnames.x %in% unique(distance.kernel$seqnames), FALSE, TRUE)]
  ## chrom.transition[, count := ifelse(exclude & seqnames.x == seqnames.y, 0, count)]
  ## chrom.transition = chrom.transition[, .(seqnames.y, prob = count / sum(count)), keyby = .(seqnames.x)]
  ## chrom.transition[, prob := ifelse(is.nan(prob), 0, prob)]
  ## chrom.transition[, sum.p := sum(prob), by = seqnames.x]
  ## chrom.transition = chrom.transition[sum.p > 0]
  
  
  if (verbose) smessage('Generating random sets')
  out = pbmclapply(1:n, mc.cores = mc.cores, function(i)
  {
    cardinality = cardinality.multinomial[, sample(cardinality, 1, prob = prob)]
    binset = data.table(bid = i, seqnames = chrom.multinomial[, sample(seqnames, prob = prob, 1)])
    binset$end = rdens(1, ends.kernel[.(binset$seqnames), val], width = ends.bw[.(binset$seqnames), bw])
    binset$start = binset$end - round_any(rdens(1, width.kernel[, val], width = width.bw[, bw]),resolution)
    ## binset$start = binset$end - rdens(1, width.kernel[, val], width = width.bw[, bw])
    for (k in setdiff(1:cardinality, 1))
    {
      lastchrom = tail(binset$seqnames, 1)
      laststart = tail(binset$start, 1)
      newchrom = chrom.transition[.(lastchrom), sample(seqnames.y, 1, prob = prob)]
      ##print(as.character(newchrom))
      if (newchrom == lastchrom)
      {
        ## newbin = data.table(bid = i,
        ##                     seqnames = newchrom,
        ##                     end = laststart - rdens(1, distance.kernel[, val], resolution, width = distance.bw[, bw]))
        ## newbin[, start := end - rdens(1, width.kernel[, val], width = width.bw[, bw])]
        ##
        newbin = data.table(bid = i,
                           seqnames = newchrom,
                           end = laststart - round_any(rdens(1, distance.kernel[, val], resolution, width = distance.bw[, bw]), resolution))
        newbin[, start := end - round_any(rdens(1, width.kernel[, val], width = width.bw[, bw]), resolution)]
        ##print(newbin)
      }
      else
      {
        newbin = data.table(bid = i,
                            seqnames = newchrom,
                            end = rdens(1, ends.kernel[.(newchrom), val], width = ends.bw[.(newchrom), bw]))
        newbin[, start := end - round_any(rdens(1, width.kernel[, val], width = width.bw[, bw]), resolution)]
        ##newbin[, start := end - rdens(1, width.kernel[, val], width = width.bw[, bw])]
        ##print(newbin)
      }
      binset = rbind(binset, newbin, fill = TRUE)
    }
    binset
  }) %>% rbindlist(fill = TRUE)

  ## fix coordinates
  out[, start := pmax(1, ceiling(start))]
  out[, end := pmax(1, ceiling(end))]
  out = gr2dt(dt2gr(out))
  return(out)
}



#' @name rdens
#' @description
#'
#' Function to sample kernel density from a distribution adapted from
#' https://stats.stackexchange.com/questions/321542/how-can-i-draw-a-value-randomly-from-a-kernel-density-estimate
#' 
#' @param n integer number of samples
#' @param values vector data to be used for building the kernel
#' @param width integer kernel width
#' @param kernel string kernel type, use Gaussian
#' @return data.able with values drawn from the kernel

rdens = function(n, values, width = NULL, kernel="gaussian") {
  if (is.null(width)) width = density(values)$bw
  rkernel <- function(n) rnorm(n, sd=width)  # Kernel sampler
  ret = data.table(value = sample(values, n, replace=TRUE) + rkernel(n))
  ret[, value := ifelse(value < 0, value*-1, value)]
  return(ret$value)
}



#' @name rdens_sliding
#' @description
#'
#' Function to sample kernel density from a distribution adapted from
#' https://stats.stackexchange.com/questions/321542/how-can-i-draw-a-value-randomly-from-a-kernel-density-estimate
#' for sliding_window implementation
#' 
#' @param n integer number of samples
#' @param den vector data to be used for building the kernel
#' @param dat integer kernel width
#' @param kernel string kernel type, use Gaussian
#' @return data.able with values drawn from the kernel


rdens_sliding = function(n, den, dat, kernel="gaussian") {
    width <- den$bw                              # Kernel width
    rkernel <- function(n) rnorm(n, sd=width)  # Kernel sampler
    ret = data.table(value = sample(dat, n, replace=TRUE) + rkernel(n))
    ret[, value := ifelse(value < 0, value*-1, value)]
    return(ret$value)
}



#' @name .chr2str
#' @description
#'
#' Internal function to convert chromosome string to int
#' 
#' @param chr_str chromosome string
#' @return int chr


.chr2str = function(chr_str){
    chr.int.ch = gsub("chr", "", chr_str)
    if (any(chr.int.ch == "X")){
        chr.int.ch[which(chr.int.ch == "X")] = 23
    } else if (any(chr.int.ch == "Y")){
        chr.int.ch[which(chr.int.ch == "Y")] = 24
    }
        chr.int = as.integer(chr.int.ch)
    return(chr.int)
}


#' @name extract_dw
#' @description
#'
#' Internal function to extract covariates
#' 
#' @param chromunity.outbinsets from chromunity algorithms
#' @return data.table with covariates



extract_dw = function(chromunity.out, num.cores = 10){
####
    unique.chrom = as.character(unique(chromunity.out$bid))
    message("Generating distributions")
    list.dw.vec = pbmclapply(1:length(unique.chrom), function(i){
        this.uc = unique.chrom[i]
        this.chrom = chromunity.out %Q% (bid %in% this.uc)
#####
        ## modified for immidiate dists
        this.chrom.dt = gr2dt(this.chrom)
        
        dist.vec = as.data.table(gr.dist(this.chrom)[lower.tri(gr.dist(this.chrom))])[, type := "dist"]
        width.vec = as.data.table(width(this.chrom))[, type := "width"]
        this.card = as.data.table(length(this.chrom))[, type := "cardinality"]
        all.vec = rbind(dist.vec, width.vec, this.card)
        all.vec[, bid := this.uc]
        if (this.card$V1 > 2){
            return(all.vec)
        }
        }, mc.cores = num.cores)
    all.dt = rbindlist(list.dw.vec, fill = T)
    return(all.dt)
}




#' @name annotate
#' @description
#'
#' Given n binsets annotates their k-power set (i.e. all sets in the power set with cardinality <= k) with
#' basic (min median max distance, cardinality, width) and optional numerical and interval covariates provided as input.
#' 
#' @param binsets GRanges of bins with fields $bid specifying binset id
#' @param concatemers GRanges of monomers with fields seqnames, start, end, and $cid specifying concatemer id, which will be counted across each binset
#' @param covariates Covariate object specifying numeric or interval covariates to merge with this binset
#' @param k the max cardinality of sets to annotate from the power set of each binmer
#' @param gg optional gGraph input specifying alternate distance function
#' @param interchromosomal.dist numeric scalar of "effective" distance for inter chromosomal bins [1e8]
#' @param verbose logical flag
#' @param mc.cores integer how many cores to parallelize
#' @param threads used to set number of data table threads to use with setDTthreads function, segfaults may occur if >1
#' @author Aditya Deshpande, Marcin Imielinski
#' @export
#' @return data.table of sub-binsets i.e. k-power set of binsets annotated with $count field representing covariates, ready for fitting, **one row per binset

annotate = function(binsets, concatemers, covariates = NULL, k = 5, interchromosomal.dist = 1e8, interchromosomal.table = NULL, gg = NULL, mc.cores = 5, numchunks = 200*mc.cores-1, seed = 42, verbose = TRUE, unique.per.setid = TRUE, resolution = 5e4, threads = 1)
{
  setDTthreads(threads)
  set.seed(seed)
  if (!inherits(binsets, 'GRanges'))
    binsets = dt2gr(binsets)

  binsets = gr.stripstrand(binsets)
  binsets$bid = as.factor(binsets$bid)
  ## each row of binsets is a bin
  binsets$binid = 1:length(binsets)
  #####
  ## frontload / batch some useful calculations
  #####
  ## bin vs concatemer overlaps
  if (verbose) smessage('Overlapping ', length(binsets), ' bins with ', length(concatemers), ' monomers')

  ov = binsets %*% concatemers[, 'cid'] %>% as.data.table
  ##
  if (verbose) smessage('Computing bin by bin pairwise distance')
  ## bin x bin pairwise distance within each set
  if (is.null(gg))
  {
    bindist = gr2dt(binsets)[, as.data.table(expand.grid(i = binid, j = binid))[i<j, ], by = bid] %>% setkeyv(c('i', 'j'))
    bindist[, distance := GenomicRanges::distance(binsets[i], binsets[j])]
  } else {
    if (verbose) smessage('Using graph distance')
######
    bindist = gr2dt(binsets)[, as.data.table(expand.grid(i = binid, j = binid))[i<j, ], by = bid] %>% setkeyv(c('i', 'j'))
    bindist[, distance := GenomicRanges::distance(binsets[i], binsets[j])]
    bindist.intra = bindist[!is.na(distance)]    
######
    gg = gg$copy$disjoin(disjoin(binsets))
    binsetd = data.table(binid = binsets$binid, bid = binsets$bid) %>% setkey('binid')
    binsetd[, sun.bin.id := 1:.N, by = bid]

    ## binsetd[, gid := gr.match(binsets, gr.chr(gg$nodes$gr))]
    ## will streamline distance computation to get index
    ## bindist.g = pbmclapply(unique(binsetd$bid), function(this.bid){
    ##     this.dist = tryCatch(gg$dist(binsetd[bid == this.bid, gid]) %>% melt %>% as.data.table %>% setnames(., c('gi', 'gj', 'distance')),
    ##                          error = function(e) NULL)
    ##     if(!is.null(this.dist)){
    ##         this.dist[, bid := as.factor(this.bid)]
    ##         return(this.dist)
    ##      }
    ## }, mc.cores = mc.cores) %>% rbindlist
    ## bindist = merge(bindist.g, binsetd, by.x = c("gi", "bid"), by.y = c("gid", "bid"), allow.cartesian=TRUE)
    ## setnames(bindist, "binid", "i")
    ## bindist = merge(bindist, binsetd, by.x = c("gj", "bid"), by.y = c("gid", "bid"), allow.cartesian=TRUE)
    ## setnames(bindist, "binid", "j")
    ## bindist = unique(bindist[, .(bid, i, j, distance)][i<j])
    ## bindist = merge(bindist, bindist.intra, by = c("bid", "i", "j"), all.x = T)
    ## bindist[, distance := ifelse(distance.x <= 1, distance.y, distance.x)]  
    ## bindist[, distance := round_any(distance, resolution)]
    ## bindist = bindist[, .(bid, i , j, distance)]  
    ## ## bindist[, distance := ifelse(distance < resolution, resolution, distance)]
    ## setkeyv(bindist, c('i', 'j')) 

    ## Better approach to get graph distances, need further testing
    bindist.g = pbmclapply(unique(binsetd$bid), function(this.bid){
        this.dist = tryCatch(gg$dist(gr.nochr(binsets[binsetd[bid == this.bid, binid]])) %>% melt %>% as.data.table %>% setnames(., c('gi', 'gj', 'distance')), error = function(e) NULL)
        if(!is.null(this.dist)){
            this.dist[, bid := as.factor(this.bid)]
            return(this.dist)
        }
    }, mc.cores = mc.cores) %>% rbindlist
    bindist = merge(bindist.g, binsetd, by.x = c("gi", "bid"), by.y = c("sun.bin.id", "bid"), allow.cartesian=TRUE)
    setnames(bindist, "binid", "i")
    bindist = merge(bindist, binsetd, by.x = c("gj", "bid"), by.y = c("sun.bin.id", "bid"), allow.cartesian=TRUE)
    setnames(bindist, "binid", "j")
    bindist = unique(bindist[, .(bid, i, j, distance)][i<j])
    bindist = merge(bindist, bindist.intra, by = c("bid", "i", "j"), all.x = T)
    bindist[, distance := ifelse(distance.x <= 1, distance.y, distance.x)]  
    bindist[, distance := round_any(distance, resolution)]
    bindist = bindist[, .(bid, i , j, distance)]  
    ## bindist[, distance := ifelse(distance < resolution, resolution, distance)]
    setkeyv(bindist, c('i', 'j'))
  }
  if (!is.null(interchromosomal.table)){
      interchromosomal.table[, dist := round_any(dist, resolution)]
      bindist = merge(bindist, gr2dt(binsets)[, .(seqnames, binid)], by.x = "i", by.y = "binid")
      bindist = merge(bindist, gr2dt(binsets)[, .(seqnames, binid)], by.x = "j", by.y = "binid")
      setnames(bindist, c("seqnames.x", "seqnames.y"), c("V1", "V2"))
      bindist = merge(bindist, interchromosomal.table, by = c("V1", "V2"), all.x = T)
      bindist[, distance := ifelse(is.na(distance), dist, distance)]
      bindist = bindist[, .(j,   i,  bid,    distance)]
      bindist[, distance := ifelse(is.na(distance), max(interchromosomal.table$dist), distance)]
      setkeyv(bindist, c('i', 'j'))
  } else {
      bindist[is.infinite(distance), distance := interchromosomal.dist]
      bindist[is.na(distance), distance := interchromosomal.dist]
  }
#####
  ## now start annotating sub.binsets aka sets
  #####
  if (verbose) smessage('Making sub binsets')
  ## make all sub power sets up to k for all bids
  ## each row is now a binid with a setid and bid
  ## adding ones for sum.cov to be removed later

  sub.binsets = gr2dt(binsets)[, powerset(binid, 1, k), by = bid] %>% setnames(c('bid', 'setid', 'binid')) %>% setkey(bid)
  sub.binsets[, ":="(iid = 1:.N, tot = .N), by = .(setid, bid)] ## label each item in each sub-binset, and total count will be useful below

  if (verbose) smessage('Made ', nrow(sub.binsets), ' sub-binsets')

  ## first use ov to count how many concatemers fully overlap all the bins in the subbinset
  if (verbose) smessage('Counting concatemers across sub-binsets across ', mc.cores, ' cores')
  ref.counts = unique(sub.binsets[, .(bid, setid)])[, count := 0]
  if (nrow(ov))
    {
      ubid = unique(sub.binsets$bid) ## split up to lists to leverage pbmclapply
      ubidl = split(ubid, ceiling(runif(length(ubid))*numchunks)) ## randomly chop up ubid into twice the number of mc.coreso
      counts = pbmclapply(ubidl, mc.cores = mc.cores, function(bids)
      {
          out = tryCatch(merge(sub.binsets[.(as.factor(bids)), ], ov[bid %in% bids], by = c('binid', 'bid'), allow.cartesian = TRUE), error = function(e) NULL)
        if (!is.null(out) & nrow(out) > 0){
            if (unique.per.setid){
                out = out[, .(binid, bid, setid, iid, tot, cid)]
                this.step1 = out[, .(hit = all(1:tot[1] %in% iid)), by = .(cid, setid, bid, tot)][hit == TRUE]
                setkeyv(this.step1, c("bid", "cid", "tot"))
                this.step1 = this.step1[, tail(.SD, 1), by = .(cid, bid)]
                this.counts = this.step1[, .(count = sum(hit, na.rm = TRUE)), by = .(setid, bid)]
                this.counts = merge(ref.counts[bid %in% bids], this.counts, by = c("bid", "setid"), all.x = T)
                this.counts[, count := sum(count.x, count.y, na.rm = T), by = .(bid, setid)][, .(bid, setid, count)]
            } else {
                out[, .(hit = all(1:tot[1] %in% iid)), by = .(cid, setid, bid)][, .(count = sum(hit, na.rm = TRUE)), by = .(setid, bid)]
            }
        } else {
            NULL
        }
      })  %>% rbindlist
    }
  
  ## other goodies
  ## changing to mean instead of median
  if (verbose) smessage('Computing min median max distances per setid')
  ## dists = sub.binsets[, bindist[as.data.table(expand.grid(i = binid, j = binid))[i<j, ], .(dist = c('min.dist', 'mean.dist', 'max.dist'), value = as.numeric(summary(distance+1, na.rm = T)[c(1, 4, 6)]))], by = .(setid, bid)] %>% dcast(bid + setid ~ dist, value.var = 'value')

  sub.binsets = sub.binsets[tot > 1]
  ubid = unique(sub.binsets$bid) ## split up to lists to leverage pbmclapply
  ubidl = split(ubid, ceiling(runif(length(ubid))*numchunks)) ## randomly chop up ubid into twice the number of mc.coreso
  dists = pbmclapply(ubidl, mc.cores = mc.cores, function(bids)
  {
      this.sub.binsets = sub.binsets[bid %in% bids]
      this.dists = this.sub.binsets[, bindist[as.data.table(expand.grid(i = binid, j = binid))[i<j, ], .(dist = c('min.dist',  'max.dist'), value = quantile(distance+1,  c(0, 1)))], by = .(setid, bid)] %>% dcast(bid + setid ~ dist, value.var = 'value')
      })  %>% rbindlist

  ##browser()
  if (verbose) smessage('Computing marginal sum per bid')
  margs = counts[, .(sum.counts = sum(count)), by = .(bid)]
   
  if (verbose) smessage('Computing total width and cardinality per setid')
  ubid = unique(sub.binsets$bid) ## split up to lists to leverage pbmclapply
  ubidl = split(ubid, ceiling(runif(length(ubid))*numchunks)) ## randomly chop up ubid into twice the number of mc.coreso
  widths = pbmclapply(ubidl, mc.cores = mc.cores, function(bids)
  {
      this.sub.binsets = sub.binsets[bid %in% bids]
      this.widths = this.sub.binsets[, .(width = sum(width(binsets)[binid])+1, cardinality = .N), by = .(bid, setid)]
  })  %>% rbindlist


  if (verbose) smessage('Merging counts, distance, and width')
  annotated.binsets = unique(sub.binsets[, .(bid, setid)]) %>%
    merge(counts, by = c('bid', 'setid')) %>%
    merge(dists, all.x = TRUE, by = c('bid', 'setid')) %>%
    merge(widths, all.x = TRUE, by = c('bid', 'setid')) %>%
    merge(margs, all.x = TRUE, by = c('bid'))  

  annotated.binsets = annotated.binsets[cardinality > 1]
  
  ## now cycle through interval / numeric covariates
  if (!is.null(covariates))
  {
    setkeyv(sub.binsets, c('bid', 'binid'))
    if (verbose) smessage('Adding covariates')
    for (i in 1:length(covariates))
    {
      if (covariates$type[i] == 'numeric')
        {
          dat = covariates$data[[i]][, covariates$field[[i]]]
          names(values(dat)) = 'val'
        }
      else
        dat = covariates$data[[i]][, c(0)]

      if (verbose) smessage('Overlapping ', length(dat), ' ranges from covariate ', covariates$names[i])
      ov = binsets %*% dat %>% as.data.table

      if (verbose) smessage('Tallying values from ', covariates$type[i], ' covariate ', covariates$names[i])
      covdata = data.table(val = numeric(), bid = factor(), setid = numeric()) %>% setnames('val', covariates$names[i])

      if (nrow(ov))
        {
          sov = merge(sub.binsets, ov, by = c('bid', 'binid'), allow.cartesian = TRUE)
          
          if (covariates[i]$type == 'interval') ## count vs ?sum width TODO: add flag on covariates specifying whether to count
          {
            covdata = sov[ , .(val = .N), by = .(bid, setid)]
            ## covdata = ov[sub.binsets, , allow.cartesian = TRUE][, .(val = sum(width, na.rm = TRUE)), by = .(bid, setid)]          
          }
          else if (covariates[i]$type == 'numeric') ## average value
            covdata = sov[, .(val = sum(width*val, na.rm = TRUE)/sum(width + 0*val, na.rm = TRUE)), by = .(bid, setid)]

          ## eps is small amount to add to covariate to make it loggable
          eps = range(covdata$val, na.rm = TRUE) %>% diff * 1e-4
          covdata$val = covdata$val + eps

          if (any(is.na(covdata$val)) || any(covdata$val<=0))
            warning(paste('Covariate ', covariates$names[[i]], ' has NA or <= values, check or fix before fitting '))
          setnames(covdata, 'val', covariates$names[i])
        }
          
      if (verbose) smessage('Merging data from ', covariates$type[i], ' covariate ', covariates$names[i])
      annotated.binsets = merge(annotated.binsets, covdata, all.x = TRUE, by = c('bid', 'setid'))
    }
  }
  return(annotated.binsets) 
}


#' @name powerset
#' @description
#'
#' 
#' @param set vector
#' @param min.k minimum cardinality subset to include in power set
#' @param max.k maximum cardinality subset to include in power set
#' @author Aditya Deshpande, Marcin Imielinski
#' @export
powerset = function(set, min.k = 1, max.k = 5)
  (lapply(pmin(length(set), min.k):pmin(length(set), max.k), function(k) combn(set, k, simplify = FALSE)) %>% do.call('c', .) %>% dunlist)[, .(setid = listid, item = V1)]



#' @name fit
#' @description
#'
#' Fit model to annotated binsets (outputs of annotate)
#'
#' @param annotated.binsets data.table of annotated binsets with at least the fields $bid (binset id) $setid (subset id) $count $min.dist $med.dist $max.dist $width $cardinality $width +/- additional covariates as well, note: every column in this data.table must be a covariate 
#' @param nb logical flag specifying whether to run negative binomial vs. Poisson model
#' @param return.model logical flag whether to return the model or the scored input
#' @param verbose logical flag
#' @return fitted model that can be applied to new cases
#' @author Aditya Deshpande, Marcin Imielinski
#' @export
fit = function(annotated.binsets, nb = TRUE, return.model = TRUE, verbose = TRUE, maxit = 50, seed = 42)
{
  set.seed(seed)
  if (!nb) stop('not yet supported')
  ## added sumcounts as cov and width as only offset
  covariates = setdiff(names(annotated.binsets), c('bid', 'setid', 'mean.dist', 'count'))
  fmstring = paste('count ~', paste(paste0('log(', covariates, ')', collapse = ' + ')))
  ## fmstring = paste0(fmstring, " + ", "offset(log(width))")
  fm = formula(fmstring)
  ##
  model = tryCatch(glm.nb(formula = fm, data = annotated.binsets, control = glm.control(maxit = maxit)), error = function(e) NULL)

  return(list(model = model, covariates = covariates))
}


#' @name sscore
#' @description
#'
#' Takes as input annotated binsets and a model and returns each binset scored according to the model
#' Meaning it annotates the columns with $p, $p.left, p.right, $count.predicted
#'
#' @param annotated.binsets data.table of annotated binsets with field $count $min.width $med.width $max.width $cardinality $width +/- additional covariates as well
#' @param model model output of fit(), the fields of model must match the columns of annotated.binsets
#' @param verbose logical flag 
#' @return annotated.binsets with additional fields $p and $count.predicted specifying predicted count at each binset
#' @author Aditya Deshpande, Marcin Imielinski
#' @export
sscore = function(annotated.binsets, model, verbose = TRUE)
{
  if (is.null(annotated.binsets$count))
    stop('annotsynated.binsets need to have $count column, did you forget to annotate?')

  if (length(missing <- setdiff(model$covariates, names(annotated.binsets))))
    stop('annotated.binsets missing covariates: ', paste(missing, collapse = ', '))

  annotated.binsets$count.predicted = (predict(model$model, type = "response", newdata = annotated.binsets))

  ## randomized p value procedure for negative binomial
  pval = annotated.binsets[, pnbinom(count -1, mu = count.predicted, size = model$model$theta, lower.tail = F)]
  pval.right = annotated.binsets[, pnbinom(count, mu = count.predicted, size = model$model$theta, lower.tail = F)]
  pval.right = ifelse(is.na(pval.right), 1, pval.right)
  pval = ifelse(is.na(pval), 1, pval)
  annotated.binsets[, enrichment := count / count.predicted]
  annotated.binsets$pval = runif(nrow(annotated.binsets), min = pval.right, max = pval)
  return(annotated.binsets)
}

#' @name synergy
#' @description
#'
#' Applies synergy model by either taking as input (or training) a background model using a set of providved (or generated) background.binsets
#' and testing a set of binmers against a set of concatemers.  The model can be generated using a set of covariates. 
#' @export
#' @param concatemers data.table of monomers with fields seqnames, start, end, and $cid specifying concatemer id, which will be counted across each binset
#' @param binsets data.table of bins with fields seqnames, start, end, and $bid specifying binset id
#' @param background.binsets background binsets drawn from output of background or similar, if NULL and model is NULL will be computed [NULL]
#' @param model model to fit to binsets / concatemers, if NULL then model is fit after annotating background.binsets, model features must be compatible with provided covariates 
#' @param covariates covariates to use when annotating binsets and background.binsets
#' @param k cardinality k to which to limit k-power set for each binset or background.binset
#' @param gg gGraph around which to compute bin to bin distances in binsets and also extract copy numbers using node / edge feature $cn
#' @param maxit glmnb max iterations
#' @param mc.cores threads to parallelize
#' @param verbose logical flag whether to fit
#' @author Aditya Deshpande, Marcin Imielinski
#' @export
synergy = function(binsets, concatemers, background.binsets = NULL, model = NULL, covariates = NULL, annotated.binsets = NULL, k = 5, gg = NULL, mc.cores = 5, verbose = TRUE, resolution = 5e4, maxit = 50)
{
  if (!inherits(binsets, 'GRanges'))
    {
      binsets = muffle(dt2gr(binsets))
      if (is.null(binsets))
        stop('binsets failed conversion to GRanges, please provide valid GRanges')
    }

  if (is.null(background.binsets) & is.null(model))
  {
    stop('Please provide valid background binsets and model')
  }
  
  if (is.null(model))
  {
    if (verbose) smessage('Annotating k-power sets of background binsets using features and covariates')
    annotated.background.binsets = annotate(binsets = background.binsets, concatemers = concatemers, gg = gg, covariates = covariates, k = k, mc.cores = mc.cores, verbose = verbose)

    if (verbose) smessage('Fitting model to k-power sets of annotated background binsets')
    model = fit(annotated.background.binsets, nb = TRUE, return.model = TRUE, verbose = verbose, maxit = maxit)
  }

  if (is.null(annotated.binsets))
    annotated.binsets = annotate(binsets = binsets, concatemers = concatemers, covariates = covariates, k = k, gg = gg, mc.cores = mc.cores, verbose = verbose)

  ## browser()
  if (verbose) smessage('Scoring binsets')
  scored.binsets = sscore(annotated.binsets, model, verbose = verbose)

  scored.binsets[, multiway := cardinality > 2]
  setkey(scored.binsets, bid)
  ubid = unique(scored.binsets$bid)

  res = pbmclapply(ubid, function(this.bid) muffle(dflm(glm.nb.fh(data = scored.binsets[.(this.bid),], count ~ multiway + offset(log(count.predicted)), theta = model$model$theta, control = glm.control(maxit = maxit)))[2, ][, name := this.bid])) %>% rbindlist
  setnames(res, 'name', 'bid')

  return(res)
}


#' @name re_chromunity
#' @description
#'
#' Runs genome-wide chromunity detection across a sliding or provided genomic window
#'
#' @param concatemers GRanges with $cid
#' @param resolution bin size for community detection [5e4]
#' @param region region to run on [si2gr(concatemers)]
#' @param windows GRanges or GRangesList of windows to test, more appropriate for testing specific windows, e.g. targets such as promoters/enhancers windows. If sliding window approach is desired, keep this parameter as NULL and use piecewise argument.
#' @param shave  logical flag specifying whether to iteratively "shave" concatemers and bins to a subset C and B where every bin in B has at leat bthresh concatemer support across C and every concatemer in C has order / cardinality of at least cthresh across B, this is done by iteratively removing bins and concatemers until you reach a fixed point
#' @param window.size window size to do community detection within
#' @param max.size max size of problem (concatemers x bins) to consider (default is 2^31-1).  If we hit this problem size, will subsample concatemers so that concatemers * bins is < max.dim, this step is done downstream of shaving 
#' @param tiles.k.knn KNN parameter specifying how many nearest neighbors to sample when building KNN graph
#' @param peak.thresh peak threshold with which to call a peak
#' @param k.min minimal number of nearest neighbors an edge in KNN graph needs to have before community detection
#' @param pad integer pad to use when computing the footprint of each chromunity and finding peak regions which become binsets
#' @return list with items $binset,  $support, $params: $binsets is GRanges of bins with field $bid corresponding to binset id and $support which is the concatemer community supporting the binset which are GRanges with $bid
#' @author Aditya Deshpande, Marcin Imielinski
#' @export


re_chromunity = function(concatemers, resolution = 5e4, region = si2gr(concatemers), windows = NULL, piecewise = TRUE, shave = FALSE, bthresh = 3, cthresh = 3, max.size = 2^31-1, subsample.frac = NULL, window.size = 2e6, max.slice = 1e6, min.support = 5, stride = window.size/2, mc.cores = 5, k.knn = 25, k.min = 5, pad = 1e3, peak.thresh = 0.85, seed = 42, verbose = TRUE, filename=NULL)

{
  set.seed(seed)
  if (is.null(windows))
      windows = gr.start(gr.tile(region, stride))+window.size/2
      windows = dt2gr(gr2dt(windows)[, start := ifelse(start < 0, 1, start)])
  

  if (inherits(windows, 'GRanges'))
    windows = split(windows, 1:length(windows))

  if (is.null(concatemers$cid))
  {
      if ('read_idx' %in% names(values(concatemers)))
        names(values(concatemers))[match('read_idx', names(values(concatemers)))] = 'cid'
      else
        stop("concatemer GRanges must have metadata column $cid or $read_idx specifying the concatemer id")
  }
  
  params = data.table(k.knn = k.knn, k.min = k.min, seed = seed)

  if (!is.null(resolution)){
      bins = gr.tile(reduce(gr.stripstrand(unlist(windows))), resolution)[, c()]
  } else {
      bins = windows %>% unlist %>% gr.stripstrand %>% disjoin
  }
    
  if (verbose) cmessage('Generated ', length(bins), ' bins across ', length(windows), ' windows')

  if (verbose) cmessage('Matching concatemers with bins, and bins with windows using gr.match with max.slice ', max.slice, ' and ', mc.cores, ' cores')

  ## (batch) match up concatemers with binids
  concatemers$binid = gr.match(concatemers, bins, max.slice = max.slice, mc.cores =  mc.cores, verbose = verbose)

  ## maybe NA need to be removed
  concatemers = concatemers %Q% (!is.na(binid))

  ## match window ids and bins 
  binmap = bins %*% grl.unlist(windows)[, c('grl.ix')] %>% as.data.table %>% setnames('query.id', 'binid') %>% setnames('grl.ix', 'winid') %>% setkeyv('winid')

  ## cycle through (possibly complex) windows call cluster_concatemers and convert to gr.sums
  ## winids = unique(binmap$winid)
  winids = unique(binmap[binid %in% unique(concatemers$binid)]$winid)

  if (shave)
    {
      if (verbose)
        cmessage('Shaving concatemers with bthresh = ', bthresh, ' and cthresh = ', cthresh)
      concatemers = shave_concatemers(concatemers, bthresh = bthresh, cthresh = cthresh, verbose = verbose) 
    }

    ncat = concatemers$cid %>% unique %>% length
    nbin = concatemers$binid %>% unique %>% length


  if (verbose)
      cmessage(sprintf('Running concatemer communities with %s concatemers and %s bins', ncat, nbin))
  

    cc = concatemer_communities(concatemers, k.knn = k.knn, k.min = k.min, seed = seed, max.size = max.size, verbose = verbose, subsample.frac = subsample.frac, filename = filename)
  
    if (length(cc))
      cc = cc %Q% (support>=min.support)
      
  if (!length(cc))
    return(Chromunity(concatemers = GRanges(), binsets = GRanges(), meta = params))

  uchid = unique((cc %Q% (support >= min.support))$chid)

  if (verbose) cmessage('Analyzing gr.sums associated with ', length(uchid), ' concatemer communities to generate binsets')


  binsets = pbmclapply(uchid, mc.cores = mc.cores, function(this.chid)
  {
    suppressWarnings({
      this.cc = cc %Q% (chid == this.chid)
      peaks = gr.sum(this.cc + pad) %>% gr.peaks('score')
      binset = bins[, c()] %&% (peaks[peaks$score > quantile(peaks$score, peak.thresh)])
      if (length(binset))
      {
        binset$chid = this.chid
        binset$bid = this.chid
        binset$winid = this.cc$winid[1]
        binset = gr.reduce(binset)
      }
    })
    binset
  })  %>% do.call(grbind, .)

  return(Chromunity(concatemers = cc[cc$chid %in% binsets$chid], binsets = binsets, meta = params))
}


#' @name shave_concatemers
#' @description
#'
#' "Shaves" concatemers and bins to a set C and B, respectively such that every bin in B
#' has at least bthresh concatemer support and every concatemer in C has at least cthresh
#' bin-wise order.  This is done by iteratively removing concatemers and bins until a fixed point is reached.
#'
#' @param concatemers GRanges of concatemers with $cid and $binid
#' @param cthresh integer minimum concatemer order across binset B
#' @param bthresh integer minimum bin support across set C of concatemers
#' @param verbose logical flag whether to print diff statements cmessages for each step of iteration
#' @param threads used to set number of data table threads to use with setDTthreads function, segfaults may occur if >1
shave_concatemers = function(concatemers, cthresh = 3, bthresh = 2, verbose = TRUE, threads = 1)
{
  setDTthreads(threads)
  .shave = function(concatemers, bthresh = 2, cthresh = 2)
  {
    dt = unique(gr2dt(concatemers), by = c('cid', 'binid'))
    ccount = dt[, .N, by = cid]
    bcount = dt[, .N, by = binid]
    coolc = ccount[N>=bthresh, cid]
    coolb = bcount[N>=cthresh, binid]
    concatemers %Q% (cid %in% coolc) %Q% (binid %in% coolb) 
  }
  old = concatemers
  new = .shave(concatemers, bthresh = bthresh, cthresh = cthresh)
  while (length(new)<length(old))
  {
    if (verbose)
      cmessage('shaving --> Diff: ', length(old)-length(new), ', Old: ', length(old), ', New:', length(new))
    old = new;
    new = .shave(old, bthresh = bthresh, cthresh = cthresh)
  }
  new
}


#' @name concatemer_communities
#' @description
#'
#' Low level function that labels concatemers with chromunity ids $chid using community detection on a graph. 
#'
#' Given a GRanges of monomers labeled by concatemer id $cid
#'
#' @param concatemers GRanges of monomers with field $cid indicating concatemer id and $binid represent bin id
#' @param tiles.k.knn KNN parameter specifying how many nearest neighbors to sample when building KNN graph
#' @param k.min minimal number of nearest neighbors an edge in KNN graph needs to have before community detection
#' @param drop.small logical flag specifying whether to remove "small" concatemers ie those with a footprint <= small argument [FALSE]
#' @param small integer threshold for bases that define small concatemers, only relevant if drop.small = TRUE
#' @param subsample.frac optional arg specifying fraction of concatemers to subsample [NULL]
#' @param seed seed for subsampling
#' @param threads used to set number of data table threads to use with setDTthreads function, segfaults may occur if >1
#' @param filename optional argument to write gml object specifying network used for community detection 
#' @return GRanges of concatemers labeled by $c mmunity which specifies community id
concatemer_communities = function (concatemers, k.knn = 25, k.min = 5,
    drop.small = FALSE, small = 1e4, max.size = 2^31-1,
    subsample.frac = NULL, seed = 42, verbose = TRUE, debug = FALSE, threads = 1, filename = NULL, mc.cores=5)  
  {
  setDTthreads(threads) #horrible segfaults occur if you don't include this
  reads = concatemers

  if (is.null(reads$cid))
  {
    if ('read_idx' %in% names(values(reads)))
      names(values(reads))[match('read_idx', names(values(reads)))] = 'cid'
    else
      stop("concatemer GRanges must have metadata column $cid or $read_idx specifying the concatemer id")
  }
  
  if (drop.small) {
    if (verbose) cmessage(paste0("Filtering out reads < ", small))
    reads = gr2dt(reads)
    setkeyv(reads, c("seqnames", "start"))
    reads[, `:=`(max.local.dist, end[.N] - start[1]), by = cid]
    reads = reads[max.local.dist > small]
    reads = dt2gr(reads)
  }

  if (debug)
    browser()

  if (verbose) cmessage("Matching reads to tiles")
  reads = as.data.table(reads)[, `:=`(count, .N), by = cid]
  reads2 = reads[count > 2, ] 

  if (!nrow(reads2))
    {
      warning('no high order concatemers, returning empty result')
      return(reads[, chid := NA][c(), ])
    }

  reads2$cid = factor(reads2$cid)
  ucid = levels(reads2$cid)

  if (!is.null(subsample.frac))
  {
      if (verbose) cmessage("Using fraction subsampling")
    setkey(reads2, cid)
    reads2 = reads2[.(sample(ucid, length(ucid)*subsample.frac)), ]
    reads2$cid = factor(reads2$cid)
    ucid = levels(reads2$cid)
  }

    
  if (verbose) cmessage("Matrices made")
  ## gc()
 
  ## remove bins hit only by one concatemer
  reads2$binid = factor(reads2$binid)
  setkey(reads2, binid)
  ubid = reads2[, .N, by = binid][N>1 , binid]
 
  if (!length(ubid))
  {
    warning('No bins hit by two concatemers, returning empty result')
    return(reads[, chid := NA][c(), ])
  }

  reads2 = reads2[.(ubid), ]
  ## added for subsamp
  reads2$binid = factor(reads2$binid) 

  ## refactor may be necessary
  reads2$cid = factor(reads2$cid)
  ucid = levels(reads2$cid)

  ## all pairs of concatemers that share a bin
  reads2[, cidi := as.integer(cid)]

  ## size is the number of pairs which can't exceed the max.size (or integer max)
  size = data.table(cid = reads2$cid, binid = reads2$binid)[, choose(.N,2), by = binid][, sum(as.numeric(V1))]

  unique_reads2 = unique(reads2, by = c('binid', 'cidi'))
  numbins = unique_reads2[, length(binid), by='cidi']
  unique_reads2_trim = merge(unique_reads2, numbins, by='cidi')[V1 > 2,]
  rm(unique_reads2)
    
  ##trimming unnecessary fields
  unique_reads2_trim[, setdiff(colnames(unique_reads2_trim), c('cidi','binid')) := NULL]

  bincounts = unique_reads2_trim[, .N, by=binid] #number of concatemers that hits every bin
  bincounts = bincounts[N > 1]

  unique_cidi = unique(unique_reads2_trim$cidi)
  unique_binid = unique(unique_reads2_trim$binid)
  concats.per.bin = mean(unique_reads2_trim[, .N, by='binid']$N)
  bins.per.concat = mean(unique_reads2_trim[, .N, by='cidi']$N)
  total.pairs = length(unique_cidi) * bins.per.concat * concats.per.bin
  numchunks = ceiling(total.pairs / 1e8)   ####~100 million pairs per chunk
  if (numchunks < 10) numchunks = 10
  
  ucidl = split(unique_cidi, ceiling(runif(length(unique_cidi))*numchunks)) ## randomly chop up cids
  setkey(unique_reads2_trim, 'cidi')


 # browser()
  cmessage("Beginning line graph construction")
  concatm = sparseMatrix(unique_reads2_trim$cidi %>% as.integer, unique_reads2_trim$binid %>% as.integer, x=1, repr='R')
  edgelist = pbmclapply(ucidl, mc.cores=mc.cores, mc.preschedule=TRUE, function(cidis) {
      dt = unique_reads2_trim[.(cidis), .(cidi1 = cidi, binid = binid), by=cidi]
      dt[, cidi := NULL]
      dt.merged = merge(dt, unique_reads2_trim, by='binid', how='left', allow.cartesian = TRUE)[cidi1 < cidi]      
      edgelist = dt.merged[, .N, by=c('cidi1','cidi')]
      edgelist = edgelist[N > 2]
      colnames(edgelist)[colnames(edgelist) == "cidi1"] = "bx1"
      colnames(edgelist)[colnames(edgelist) == "cidi"] = "bx2"
      colnames(edgelist)[colnames(edgelist) == "N"] = "mat"
      p1 = concatm[edgelist[, bx1],]
      p2 = concatm[edgelist[, bx2],]  
      total = Matrix::rowSums(p1 | p2)
      edgelist$tot = total
      edgelist[, frac := mat/tot]
      edgelist = edgelist[mat > 2 & tot > 2]
      return(edgelist)
  })
  cmessage("End line graph construction")
  edgelist = rbindlist(edgelist)
    
  edgelist_2 = copy(edgelist)
  edgelist_2$bx2 = edgelist$bx1
  edgelist_2$bx1 = edgelist$bx2
  edgelist_3 = rbind(edgelist, edgelist_2)
  edgelist_3$nmat = edgelist_3$mat
  edgelist_3$nfrac = edgelist_3$frac
  setkeyv(edgelist_3, c("nfrac", "nmat"))
  edgelist_3 = unique(edgelist_3)
  edgelist_3 = edgelist_3[order(nfrac, nmat, -bx2, decreasing = T)]
  #concatm = t(incidence)
  
  k=k.knn
  knn.dt = edgelist_3[mat > 2 & tot > 2, .(knn = bx2[1:k]), by = bx1][!is.na(knn), ]
  
  
  ## ncat = reads2$cid %>% unique %>% length
  ## nbin = reads2$binid %>% unique %>% length
  
  ## if (size > max.size)
  ##   stop(sprintf('size %s of the problem %s with %s concatemers and %s bins  exceeds max.size %s, please considering subsampling concatemers, using fewer windows or bins, or shaving concatemers with more aggressive parameters (bthresh, cthresh)', size, ifelse(shave, 'after shaving', ''), ncat, nbin, max.size))




  ## all pairs of concatemers that share a bin
  ## reads2[, cidi := as.integer(cid)]

  ## if (verbose) cmessage("Making Pairs object") 
  ## pairs = reads2[, t(combn(cidi, 2)) %>% as.data.table, by = binid]
  ## if (verbose) cmessage("Pairs object made") 

  ## setkey(reads2, cid)
  ## concatm = reads2[.(ucid),  sparseMatrix(factor(cid, ucid) %>% as.integer, binid %>% as.integer, x = 1, dimnames = list(ucid, levels(binid)))]

  ## p1 = concatm[pairs[, V1], -1, drop = FALSE]
  ## p2 = concatm[pairs[, V2], -1, drop = FALSE]
  ## matching = Matrix::rowSums(p1 & p2)
  ## total = Matrix::rowSums(p1 | p2)
  ## dt = data.table(bx1 = pairs[, V1], bx2 = pairs[, V2], mat = matching, 
  ##                 tot = total)[, `:=`(frac, mat/tot)]
  ## dt2 = copy(dt)
  ## dt2$bx2 = dt$bx1
  ## dt2$bx1 = dt$bx2
  ## dt3 = rbind(dt, dt2)
  ## dt3$nmat = dt3$mat
  ## dt3$nfrac = dt3$frac
  ## setkeyv(dt3, c("nfrac", "nmat"))
  ## dt3 = unique(dt3)
  ## dt3.2 = dt3[order(nfrac, nmat, decreasing = T)]
  ## if (verbose) cmessage("Pairs made")
  ## gc()
  ## k = k.knn
  ## knn.dt = dt3.2[mat > 2 & tot > 2, .(knn = bx2[1:k]), by = bx1][!is.na(knn), ]



  if (!nrow(knn.dt))
  {
    warning('no concatemers with neighbors, perhaps data too sparse? returning empty result')
    return(reads[, chid := NA][c(), ])
  }
  setkey(knn.dt)
  knn = sparseMatrix(knn.dt$bx1, knn.dt$knn, x = 1)
  knn.shared = knn %*% knn
  if (verbose) cmessage("KNN done")
  KMIN = k.min
  A = knn.shared * sign(knn.shared > KMIN)
  A[cbind(1:nrow(A), 1:nrow(A))] = 0
#  A <- as(A, "sparseMatrix")
  A = A + t(A)
  G = graph.adjacency(A, weighted = TRUE, mode = "undirected")

  ## browser()

  ## cidis = as.integer(colnames(A))
  
  ## seqcount = reads2[, .N, by=c('cidi', 'seqnames')]
  ## seqcount[, is_multichromosomal := length(seqnames) > 1, by = cidi]
  ## seqcount = unique(seqcount[order(N, decreasing=T)], by='cidi')
  ## seqcount = seqcount[cidi %in% as.integer(igraph::V(G)$name),]
  ## setkey(seqcount, 'cidi')
  
  ## G = igraph::set_vertex_attr(G, 'seqnames', value=as.character(seqcount[cidis]$seqnames))
  ## G = igraph::set_vertex_attr(G, 'is_multichromosomal', value=as.integer(seqcount[cidis]$is_multichromosomal))
  
  cl.l = cluster_fast_greedy(G)
  #cl.l = cluster_leiden(G, objective_function='modularity')
  
  
  #cl.l = cluster_fast_greedy(G)
  cl = cl.l$membership
  ## rename so chid has most support
  cls = 1:max(cl)
  names(cls) = cl %>% table %>% sort %>% rev %>% names
  cl = cls[as.character(cl)]
  if (verbose) cmessage("Communities made")
  memb.dt = data.table(cid = ucid[1:nrow(A)], chid = cl)
  reads[, cid := as.character(cid)]
  reads = merge(reads, memb.dt, by = "cid")
  reads[, `:=`(support, length(unique(cid))), by = chid]
  reads = dt2gr(reads)
  return(reads)
}



##############

#' @name sliding_window_chromunity
#' @description
#'
#' Runs genome-wide chromunity detection across a sliding or provided genomic window
#'
#' @param concatemers GRanges with $cid
#' @param resolution bin size for community detection [5e4]
#' @param region region to run on [si2gr(concatemers)]
#' @param windows GRanges or GRangesList of windows to test, more appropriate for testing specific windows, e.g. targets such as promoters/enhancers windows. If sliding window approach is desired, keep this parameter as NULL and use piecewise argument.
#' @param window.size window size to do community detection within
#' @param max.size max size of problem (concatemers x bins) to consider (default is 2^31-1).  If we hit this problem size, will subsample concatemers so that concatemers * bins is < max.dim, this step is done downstream of shaving 
#' @param k.knn KNN parameter specifying how many nearest neighbors to sample when building KNN graph
#' @param peak.thresh peak threshold with which to call a peak
#' @param k.min minimal number of nearest neighbors an edge in KNN graph needs to have before community detection
#' @param pad integer pad to use when computing the footprint of each chromunity and finding peak regions which become binsets
#' @param take_sub_sample take subsample to be used for training
#' @param subsample.frac proprtion of concatemers to subsample
#' @return list with items $binset,  $support, $params: $binsets is GRanges of bins with field $bid corresponding to binset id and $support which is the concatemer community supporting the binset which are GRanges with $bid
#' @author Aditya Deshpande, Marcin Imielinski
#' @export

sliding_window_chromunity = function(concatemers, resolution = 5e4, region = si2gr(concatemers), windows = NULL, chr = NULL, take_sub_sample = TRUE, subsample.frac = 0.5, window.size = 2e6, max.slice = 1e6, min.support = 3, stride = window.size/2, mc.cores = 5, k.knn = 25, k.min = 5, pad = 1e3, peak.thresh = 0.85, seed = 145, verbose = TRUE, genome = "BSgenome.Hsapiens.UCSC.hg38::Hsapiens")
{
    
  set.seed(seed)
    
  if (is.null(chr)){
      stop("Provide chr as this is sliding window implementation")
  }
    
  if (is.null(windows) & !is.null(genome)) {
    windows = gr.tile(hg_seqlengths(genome = genome), window.size/2)+window.size/4
  } else if (is.null(genome)) {
    windows = gr.tile(region, window.size/2)+window.size/4
  }

  windows = dt2gr(gr2dt(windows)[, start := ifelse(start < 0, 1, start)])
  windows = windows %Q% (seqnames == chr)
  windows = sortSeqlevels(windows)
  windows = sort(windows)
   
  if (is.null(concatemers$cid))
  {
      if ('read_idx' %in% names(values(concatemers)))
        names(values(concatemers))[match('read_idx', names(values(concatemers)))] = 'cid'
      else
        stop("concatemer GRanges must have metadata column $cid or $read_idx specifying the concatemer id")
  }
  
  params = data.table(k.knn = k.knn, k.min = k.min, seed = seed)

  if (!is.null(resolution) & !is.null(genome)){
      bins = gr.tile(hg_seqlengths(genome = genome), resolution)
  } else if (!is.null(resolution) & is.null(genome)) {    
      #bins = bins %Q% (seqnames == chr) 
      bins = gr.tile(region, resolution)
  } else {
      bins = windows %>% unlist %>% gr.stripstrand %>% disjoin
  }

  if (verbose) cmessage('Generated ', length(bins), ' bins across ', length(windows), ' windows')

  ## (batch) match up concatemers with binids
  concatemers$binid = gr.match(concatemers, bins, max.slice = max.slice, mc.cores =  mc.cores, verbose = verbose)

  ## maybe NA need to be removed
  concatemers = concatemers %Q% (!is.na(binid))

  if (!is.null(subsample.frac)){
      take_sub_sample = TRUE
      if (verbose) cmessage('Taking sub-sample with fraction ', subsample.frac)
  }

    chrom.comm = mclapply(1:length(windows), mc.cores = mc.cores, function(j){
        suppressWarnings({
            which.gr = windows[j]
            these.cids = (concatemers %&% which.gr)$cid
            this.pc.gr =  concatemers %Q% (cid %in% these.cids)
            this.bins = bins %&&% which.gr
            this.chromunity.out = tryCatch(gr2dt(concatemer_chromunity_sliding(this.pc.gr,
                                                                               k.knn = k.knn,
                                                                               k.min = k.min,
                                                                               tiles = this.bins,
                                                                               take_sub_sample = take_sub_sample,
                                                                               frac = subsample.frac,
                                                                               verbose = FALSE)), error = function(e) NULL)  
            if (!is.null(this.chromunity.out)){
                this.chromunity.out[, winid := j]
            } else {
                this.chromunity.out = data.table(NA)
            }
        })
        return(this.chromunity.out)
    })
    chrom.comm = rbindlist(chrom.comm, fill = T)

    chrom.comm[, tix := ifelse(is.na(tix), 0, tix)]
    chrom.comm[, V1 := NULL]
    chrom.comm = na.omit(chrom.comm)
    chrom.comm[, chid := .GRP, by = .(chid, winid)]
    chrom.comm.filt = chrom.comm[support >= min.support]
    chrom.comm.filt = dt2gr(chrom.comm.filt)
      
  if (!length(chrom.comm.filt))
    return(Chromunity(concatemers = GRanges(), binsets = GRanges(), meta = params))

  uchid = unique((chrom.comm.filt %Q% (support >= min.support))$chid)

  if (verbose) cmessage('Analyzing gr.sums associated with ', length(uchid), ' concatemer communities to generate binsets')

  binsets = mclapply(uchid, mc.cores = mc.cores, function(this.chid)
  {
    suppressWarnings({
      this.chrom.comm = chrom.comm.filt %Q% (chid == this.chid)
      winid = unique(this.chrom.comm$winid)
      peaks = gr.sum(this.chrom.comm + pad) 
      binset = bins[, c()] %&% (peaks[peaks$score > quantile(peaks$score, peak.thresh)])
      binset = binset %&% windows[winid]
      if (length(binset))
      {
        binset$chid = this.chid
        binset$bid = this.chid
        binset$winid = winid
        binset = gr.reduce(binset)
      }
    })
    binset
  })  %>% do.call(grbind, .)

  chrom.comm = dt2gr(chrom.comm)
  
  return(Chromunity(concatemers = chrom.comm, binsets = binsets, meta = params))
}



#' @name concatemer_chromunity_sliding
#' @description
#'
#' Low level function that labels concatemers with chromunity ids $chid using community detection on a graph. A faster implementation  for sliding window 
#'
#' Given a GRanges of monomers labeled by concatemer id $cid
#'
#' @param concatemers GRanges of monomers with field $cid indicating concatemer id and $binid represent bin id
#' @param tiles.k.knn KNN parameter specifying how many nearest neighbors to sample when building KNN graph
#' @param k.min minimal number of nearest neighbors an edge in KNN graph needs to have before community detection
#' @param drop.small logical flag specifying whether to remove "small" concatemers ie those with a footprint <= small argument [FALSE]
#' @param small integer threshold for bases that define small concatemers, only relevant if drop.small = TRUE
#' @param take_sub_sample optional arg specifying if fraction of concatemers to subsample [FALSE]  
#' @param subsample.frac optional arg specifying fraction of concatemers to subsample [0.5]
#' @param seed seed for subsampling
#' @param threads used to set number of data table threads to use with setDTthreads function, segfaults may occur if >1
#' @return GRanges of concatemers labeled by $c mmunity which specifies community id

concatemer_chromunity_sliding <- function (concatemers, k.knn = 10, k.min = 1, tiles,  
    drop.small = FALSE, small = NULL, 
    take_sub_sample = FALSE, frac = 0.5, seed.n = 154, verbose = FALSE, threads = 1) 
{
    setDTthreads(threads) #horrible segfaults occur if you don't include this
    reads = concatemers
    
    if (drop.small) {
        if (verbose) cmessage(paste0("Filtering out reads < ", small))
        reads = gr2dt(reads)
        setkeyv(reads, c("seqnames", "start"))
        reads[, `:=`(max.local.dist, end[.N] - start[1]), by = cid]
        reads = reads[max.local.dist > small]
        reads = dt2gr(reads)
    }
    reads$tix = gr.match(reads, tiles)
    reads = as.data.table(reads)[, `:=`(count, .N), by = cid] 
    mat = suppressMessages(dcast.data.table(reads[count > 2, ] %>% gr2dt, cid ~ 
        tix, value.var = "strand", fill = 0))
    mat2 = mat[, c(list(cid = cid), lapply(.SD, function(x) x >= 
        1)), .SDcols = names(mat)[-1]]
    mat2 = suppressWarnings(mat2[, `:=`("NA", NULL)])
    reads.ids = mat2$cid
    mat2 = as.data.table(lapply(mat2, as.numeric))
    if (take_sub_sample) {
        tot.num = nrow(mat2[rowSums(mat2[, -1]) > 1, ])
        if (verbose) cmessage(paste0("Total number of rows are: ", tot.num))
        if (verbose) cmessage("taking a subsample")
        number.to.subsample = pmax(round(tot.num * frac), 1000)
        if (verbose) cmessage(paste0("Number sampled: ", number.to.subsample))
        set.seed(seed.n)
        gt = mat2[rowSums(mat2[, -1]) > 1, ][sample(.N, number.to.subsample), 
            ]
    }
    else {
        gt = mat2[rowSums(mat2[, -1]) > 1, ]
    }
    ubx = gt$cid
    if (verbose) cmessage("Matrices made")
    gc()
    pairs = t(do.call(cbind, apply(gt[, setdiff(which(colSums(gt) > 
        1), 1), with = FALSE] %>% as.matrix, 2, function(x) combn(which(x != 0), 2)))) 
    gt = as(as.matrix(as.data.frame(gt)), "sparseMatrix")    
    p1 = gt[pairs[, 1], -1]
    p2 = gt[pairs[, 2], -1]
    matching = rowSums(as.array(p1 & p2))
    total = rowSums(as.array(p1 | p2))    
    dt = data.table(bx1 = pairs[, 1], bx2 = pairs[, 2], mat = matching, 
        tot = total)[, `:=`(frac, mat/tot)]
    dt2 = copy(dt)
    dt2$bx2 = dt$bx1
    dt2$bx1 = dt$bx2
    dt3 = rbind(dt, dt2)
    dt3$nmat = dt3$mat
    dt3$nfrac = dt3$frac
    setkeyv(dt3, c("nfrac", "nmat"))
    dt3 = unique(dt3)
    dt3.2 = dt3[order(nfrac, nmat, decreasing = T)]
    if (verbose) cmessage("Pairs made")
    gc()
    k = k.knn
    knn.dt = dt3.2[mat > 2 & tot > 2, .(knn = bx2[1:k]), by = bx1][!is.na(knn), 
        ]
    setkey(knn.dt)
    knn = sparseMatrix(knn.dt$bx1, knn.dt$knn, x = 1)
    knn.shared = knn %*% knn
    if (verbose) cmessage("KNN done")
    KMIN = k.min
    A = knn.shared * sign(knn.shared > KMIN)
    A[cbind(1:nrow(A), 1:nrow(A))] = 0
    A <- as(A, "matrix")
    A <- as(A, "sparseMatrix")
    A = A + t(A)
    G = graph.adjacency(A, weighted = TRUE, mode = "undirected")
    cl.l = cluster_fast_greedy(G)
    cl = cl.l$membership
    if (verbose) cmessage("Communities made")
    memb.dt = data.table(cid = ubx[1:nrow(A)], chid = cl)
    reads = merge(reads, memb.dt, by = "cid")
    reads[, `:=`(support, length(unique(cid))), by = chid]
    reads = dt2gr(reads)
    return(reads)
}



#' @name sliding_window_background
#' @description
#'
#' Given n binsets generates random "background" binsets that mirrors the input binset characteristics with respect to chromosome, width, and distance.
#'
#' @param chromosome the chromosome to work on, string.
#' @param binsets GRanges of bins with fields seqnames, start, end, and $bid specifying bin-set id
#' @param n number of binsets to generate [1000]
#' @param resolution to use for the simulation [5e4]
#' @param seed seed for subsampling
#' @param genome.to.use genome build to use for the simulation [hg38]
#' @return GRanges of simulated binsets
#' @export

sliding_window_background = function(chromosome, binsets, seed = 145, n = 1000, resolution = 5e4, num.cores = 10, genome.to.use = "BSgenome.Hsapiens.UCSC.hg38::Hsapiens", region=si2gr(binsets)){
    set.seed(seed)
    this.final.dt = data.table()
    chr.int = .chr2str(chromosome)
    if(is.null(genome.to.use)) {
        upper.bound = gr2dt(region)$end
    } else {
        upper.bound = hg_seqlengths(genome = genome.to.use)[chr.int]
    }
#### get the relevant distributions

    dist.pdf.dt = extract_dw(binsets, num.cores = num.cores)
    this.num = 0
    this.tot = 0
    i = 0
    message("Generating GRanges")

    this.list = pbmclapply(1:n, function(i){
        this.final.dt = data.table()
        this.iter = i
        this.card = round(rdens_sliding(1, den = density(dist.pdf.dt[type == "cardinality"]$V1),
                                dat = dist.pdf.dt[type == "cardinality"]$V1))
        if (this.card > 0){
            this.dists = tryCatch(round_any(c(0, (rdens_sliding(this.card-1, den = density(dist.pdf.dt[type == "dist"]$V1), dat = dist.pdf.dt[type== "dist"]$V1))), resolution), error = function(e) NULL)
            this.width = tryCatch(round_any(rdens_sliding(this.card, den = density(dist.pdf.dt[type == "width"]$V1), dat = dist.pdf.dt[type == "width"]$V1), resolution),  error = function(e) NULL)
            if (!is.null(this.dists) & !is.null(this.width)){
                this.width = this.width-1
                this.dists = this.dists+1
                this.width[this.width < 1] = 1
                this.loc.dt = data.table()
                for (j in 1:this.card){
                    if (j == 1){
                        if (!is.null(genome.to.use)) {
                            anchor.pt = start(gr.sample(hg_seqlengths(genome = genome.to.use)[chr.int], 1, wid = 1))
                        } else {
                            anchor.pt = start(gr.sample(region, 1, wid = 1))
                        }
        
                        sts = anchor.pt + this.dists[j]
                        this.gr = GRanges(seqnames = Rle(c(chromosome), c(1)),
                                          ranges = IRanges(c(sts)))
                        this.gr = this.gr + (this.width[j]/2)
                        this.dt = gr2dt(this.gr)
                        this.loc.dt = rbind(this.loc.dt, this.dt)
                    } else {
                        sts = tail(this.loc.dt, n = 1)$end + this.dists[j]
                        ends = sts + this.width[j]
                        if(ends > 2e9)
                            break
                        this.gr = GRanges(seqnames = Rle(c(chromosome), c(1)),
                                          ranges = IRanges(sts, end = ends))
                        this.dt = gr2dt(this.gr)
                        this.loc.dt = rbind(this.loc.dt, this.dt)
                    }
                }
                this.loc.dt[, bid := paste0("rand_", i)]
                this.loc.dt[width<0, width := 0]
                this.loc.gr = dt2gr(this.loc.dt)
                if (!any(start(this.loc.gr) > upper.bound)){
                    this.num = this.num + 1

                    this.final.dt = rbind(this.final.dt, this.loc.dt)
                } else {this.final.dt = data.table(NA)}
            } else {this.final.dt = data.table(NA)}
        } else {this.final.dt = data.table(NA)}
        return(this.final.dt)
    }, mc.cores = num.cores)
    this.dt = rbindlist(this.list, fill = TRUE)
    return(this.dt)
}



#' @name smessage
#' @description
#'
#' @author Aditya Deshpande, Marcin Imielinski
#' @export
#' @private 
smessage = function(..., pre = 'Synergy')
  message(pre, ' ', paste0(as.character(Sys.time()), ': '), ...)

#' @name cmessage
#' @description
#'
#' @author Aditya Deshpande, Marcin Imielinski
#' @export
#' @private 
cmessage = function(..., pre = 'Chromunity')
  message(pre, ' ', paste0(as.character(Sys.time()), ': '), ...)



##########################################################################
## CHROMUNITY.CLASS
#########################################################################

#' @name Chromunity
#' @title Chromunity
#'
#' @export
Chromunity = function(binsets = GRanges(), concatemers = GRanges(), meta = data.table(), verbose = TRUE)
{
  ChromunityObj$new(binsets = binsets, concatemers = concatemers, meta = meta, verbose = verbose)
}


#' @name ChromunityObj
#' @title Chromunity object
#' @description
#'
#' Vectorized object that stores the output of chromunity analysis and can be inputted into annotate / synergy functions.
#' The main accessors are $binsets and $concatemers which each return GRanges linked through a chromunity id $chid field
#'
#' Chromunities can be subsetted, concatenated.  Concatenation will result in deduping any $chid that are present in two inputs. 
#' @export
ChromunityObj = R6::R6Class("Chromunity", 
  public = list(
    initialize = function(binsets = GRanges(), concatemers = GRanges(), meta = data.table(), verbose = TRUE)
    {
      if (!inherits(binsets, 'GRanges'))
        stop('binsets must non-NULL and a GRanges')
      
      if (!inherits(concatemers, 'GRanges'))
        stop('concatemers must be a GRanges')

      private$pseqlengths = data.table(len = c(seqlengths(binsets), seqlengths(concatemers)),
                                       nm = c(names(seqlengths(binsets)), names(seqlengths(concatemers))))[, max(len, na.rm = TRUE), keyby = nm][, structure(V1, names = nm)]

      if (!is.null(meta))
        {
          if (!is.data.table(meta))
            stop('meta should be a data.table')

          private$pmeta = copy(meta)
        }
      
      ## empty binset return base object 
      if (!length(binsets))
        return(self)

      if (is.null(binsets$chid))
        stop('binset must have chromunity $chid field defined')
      
      private$pbinsets = as.data.table(binsets) %>% setkey(chid)

      private$pchid = unique(binsets$chid)

      ## empty concatemers, nothing to do 
      if (!length(concatemers))
        return(self)

      if (is.null(concatemers$cid))
      {
        if ('read_idx' %in% names(values(concatemers)))
          names(values(concatemers))[match('read_idx', names(values(concatemers)))] = 'cid'
        else
          stop("concatemer GRanges must have metadata column $cid or $read_idx specifying the concatemer id")
      }


      
      if (is.null(concatemers$chid))
      {
        warning('concatemers do not have $chid field pre-defined, doing overlap query to match up ')
        concatemers = concatemers %*% binsets[, 'chid']

      }
      concatemers = as.data.table(concatemers) %>% setkey(chid)

      ## if (!retain){
      ##     if (length(setdiff(concatemers$chid, binsets$chid)))
      ##     {
      ##         warning("concatemers defined that don't map to provided binsets, removing")
      ##         concatemers = concatemers[.(unique(binsets$chid)), ]
      ##     }
      ## }

      ## tally support as keyed vector
      support = merge(private$pbinsets, concatemers, by = 'chid', allow.cartesian = TRUE)[, .(support = length(unique(cid))), keyby = chid]
      private$psupport = support[.(private$pchid), ][is.na(support), support := 0][, chid := private$pchid][, structure(support, names = private$pchid)]

      private$pconcatemers = concatemers %>% copy

      ## all done .. 
      return(invisible(self))
    },
    subset = function(ix, ...){
      if (is.integer(ix) | is.numeric(ix))
        ix = private$pchid[ix]
      
      if (any(is.na(ix)) || length(setdiff(ix, private$pchid)))
        stop('indices outside of ix of object, check query against chromunity id $chid')
      


      out = data.table(chid = ix, chid.new = dedup(ix))
      private$pchid = out$chid.new
      private$psupport = structure(private$psupport[out$chid], names = out$chid.new)

      binsets = merge(private$pbinsets, out, by = 'chid', allow.cartesian = TRUE);
      binsets$chid = binsets$chid.new
      binsets$chid.new = NULL
      private$pbinsets = binsets;

      concatemers = merge(private$pconcatemers, out, by = 'chid', allow.cartesian = TRUE);
      concatemers$chid = concatemers$chid.new
      concatemers$chid.new = NULL
      private$pconcatemers = concatemers;

      return(invisible(self))
    },
    print = function(...)
    {
      quants = quantile(private$psupport, c(0, 0.5, 1))
      message('Chromunity object with ', self$length, ' chromunities (', paste(head(private$pchid, 3), collapse = ', '), ifelse(self$length>3, ', ...', ''),  ') spanning ', nrow(private$pconcatemers), ' concatemers');
      message('\t with median concatemer support:', quants[2], ', range: [', quants[1], '-', quants[3], ']')
    },
    gtrack = function(name = '', binsets = TRUE, concatemers = TRUE, heatmap = FALSE, binsize = 1e4, clim = NA, ...)
    {
      out = gTrack()
      if (binsets)
      {
        binsets = self$binsets
        out = c(out, gTrack(split(binsets, binsets$chid) %>% unname, height = 5, name = paste(name, 'binsets')));
      }
      if (concatemers)
      {
        concatemers = self$concatemers
        out = c(out, gTrack(split(concatemers, concatemers$cid) %>% unname, height = 10, name = paste(name, 'concatemers')));
      }
      if (heatmap)
      {
        concatemers = self$concatemers
        gm = self$gm(binsize = binsize)
        out = c(out, gm$gtrack(clim = clim, ...))
      }
      return(out)
    },
    gm = function(binsize = 5e3) GxG::cocount(self$concatemers, bins = gr.tile(self$footprint, binsize), by = 'chid')
  ),
  private = list(
    pchid = c(),
    pseqlengths = c(),
    pmeta = data.table(),
    psupport = c(),
    pbinsets = data.table(seqnames = factor(), start = integer(), end =  integer(), chid = factor(), support = integer()) %>% setkey(chid),
    pconcatemers = data.table(seqnames = factor(), start = integer(), end =  integer(), chid = factor()) %>% setkey(chid)
    ),
  active = list(
    chid = function() private$pchid,
    meta = function() copy(private$pmeta),
    length = function() length(private$pchid),
    names = function() private$pchid,
    gt = function(x) self$gtrack(),
    footprint = function(x) self$binsets %>% gr.stripstrand %>% sort %>% reduce, 
    binsets = function() dt2gr(private$pbinsets, seqlengths = private$pseqlengths),
    concatemers = function() dt2gr(private$pconcatemers, seqlengths = private$pseqlengths),
    support = function() private$psupport,
    seqlengths = function() private$pseqlengths
  )
)


#' @name [.Chromunity
#' @title Subset chromunity
#' @description
#'
#' Overrides the subset operator x[] for use with Chromunity
#'
#' @param obj Covariate This is the Covariate to be subset
#' @param range vector This is the range of Covariates to return, like subsetting a vector. e.g. c(1,2,3,4,5)[3:4] == c(3,4)
#' @return A new Covariate object that contains only the Covs within the given range
#' @author Marcin Imielinski
#' @export
'c.Chromunity' = function(...){

  ##Ensure that all params are of type Covariate
    cc = list(...)
    isc = sapply(cc, function(x)  class(x)[1] == 'Chromunity')

    if(any(!isc)){
        stop('Error: All inputs must be of class Chromunity.')
    }


  chids = lapply(cc, function(x) x$chid)
  ## this will map old chids to new ones, preventing collisions from chid in the input lists
  chmap = dunlist(chids)[, .(chid.old = V1, listid = listid)][, chid.new := dedup(chid.old)] %>% setkeyv(c('listid', 'chid.old'))

  
  binsets = lapply(1:length(cc), function(x) {tmp = cc[[x]]$binsets; tmp$chid = chmap[.(x, tmp$chid), chid.new];tmp %>% as.data.table})
  concatemers = lapply(1:length(cc), function(x) {tmp = cc[[x]]$concatemers; tmp$chid = chmap[.(x, tmp$chid), chid.new]; tmp %>% as.data.table})
  metas = lapply(cc, function(x) x$meta)

  sl = (lapply(cc, function(x) data.table(nm = names(x$seqlengths), len = x$seqlengths)) %>% rbindlist)[, .(len = max(len)), keyby = nm][, structure(len, names = nm)]
  binsets = rbindlist(binsets) %>% dt2gr(seqlengths = sl)
  concatemers = rbindlist(concatemers) %>% dt2gr(seqlengths = sl)
  return(ChromunityObj$new(binsets = binsets, concatemers = concatemers, meta = rbindlist(metas, fill = TRUE)))
}


#' @name [.Chromunity
#' @title Subset chromunity
#' @description
#'
#' Overrides the subset operator x[] for use with Chromunity
#'
#' @param obj Covariate This is the Covariate to be subset
#' @param range vector This is the range of Covariates to return, like subsetting a vector. e.g. c(1,2,3,4,5)[3:4] == c(3,4)
#' @return A new Covariate object that contains only the Covs within the given range
#' @author Marcin Imielinski
#' @export
'[.Chromunity' = function(obj, range){
  if (any(is.na(range)))
    stop('NA in subscripts not allowed')

  if (any(range>length(obj)))
    stop('Subscript out of bounds')

  ##Clone the object so we don't mess with the original
  ret = obj$clone()
  ##Call the subset function of the Covariate class that will modify the cloned Covariate
  ret$subset(range)

  return (ret)
}

#' @name names.Chromunity
#' @title title
#' @description
#'
#' Overrides the names function for Covariate object
#'
#' @param obj Covariate This is the Covariate whose names we are extracting'
#' @return names of covariates
#' @author Zoran Z. Gajic
#' @export
'names.Chromunity' = function(x) return (x$chid)

#' @name length.Chromunity
#' @title length.Chromunity
#' @description
#' Number of binsets in chromunity object 
#'
#' @param obj Covariate object that is passed to the length function
#' @return number of covariates contained in the Covariate object as defined by length(Covariate$data)
#' @author Zoran Z. Gajic
#' @export
'length.Chromunity' = function(obj,...) return(obj$length)


##########################################################################
## COVARIATE.CLASS
#########################################################################


#' @name Covariate
#' @title title
#' @description
#'
#' Stores Covariates for passing to synergy or annotate function
#'
#' These are GRanges of interval or numeric type.  The numeric can be optionally log transformed while the interval covariates can be
#' count (ie count the total number of intervals in the binset) or width based (count the total width in the binset)
#'
#' 
#'
#' @param name character vector Contains names of the covariates to be created, this should not include the names of any Cov objects passed
#' @param pad numeric vector Indicates the width to extend each item in the covarite. e.g. if you have a GRanges covariate with two ranges (5:10) and (20:30) with a pad of 5,
#' These ranges wil become (0:15) and (15:35)
#' @param type character vector Contains the types of each covariate (numeric, interval, sequencing)
#' @param signature, see ffTrack, a vector of signatures for use with ffTrack sequence covariates
#' fftab signature: signatures is a named list that specify what is to be tallied.  Each signature (ie list element)
#' consist of an arbitrary length character vector specifying strings to %in% (grep = FALSE)
#' or length 1 character vector to grepl (if grep = TRUE)
#' or a length 1 or 2 numeric vector specifying exact value or interval to match (for numeric data)
#' Every list element of signature will become a metadata column in the output GRanges
#' specifying how many positions in the given interval match the given query
#' @param field, a chracter vector for use with numeric covariates (NA otherwise) the indicates the column containing the values of that covarites.
#' For example, if you have a covariate for replication timing and the timings are in the column 'value', the parameter field should be set to the character 'Value'
#' @param na.rm, logical vector that indicates whether or not to remove nas in the covariates
#' @param grep, a chracter vector of  grep for use with sequence covariates of class ffTrack
#' The function fftab is called during the processing of ffTrack sequence covariates grep is used to specify inexact matches (see fftab)
#' @param data, a list of covariate data that can include any of the covariate classes (GRanges, ffTrack, RleList, character)
#' @param log logical flag specifying whether to log the numeric covariate, only applicable to numeric covariate
#' @param count logical flag specifying whether to count the number of intervals 
#' @return Covariate object that can be passed directly to the Chromunity object constructor
#' @author Marcin Imielinski
#' @import R6
#' @export
Covariate = R6::R6Class('Covariate',
    public = list(

      ## See the class documentation
        initialize = function(data = NULL,
                              field = as.character(NA),
                              name = as.character(NA),
                              log = FALSE,
                              count = TRUE, 
                              pad = 0,
                              type = 'numeric',
                              signature = as.character(NA),
                              na.rm = as.logical(NA),
                              grep = as.logical(NA)){

        ##If data are valid and are a list of tracks concatenate with any premade covs
        if(is.null(data))
        {
          self$data = NULL
          self$names = name
          self$type = type
          self$signature = signature
          self$field = field
          self$pad = pad
          self$log = log
          self$count = count
          self$na.rm = na.rm
          self$grep = grep
          return()
        }

        if(class(data) != 'list'){
          data = list(data)
        }

        ## replicate params and data if necessary
        params = data.table(id = 1:length(data), field = field, name = name, pad = pad, type = type, signature = signature, na.rm = na.rm, grep = grep, log = log, count = count)

        if (length(data)==1 & nrow(params)>1)
          data = rep(data, nrow(params))

        self$data = data

        params$dclasses = sapply(self$data, class)

        if (any(ix <<- params$dclasses == 'character'))
        {
          if (any(iix <<- !file.exists(unlist(self$data[ix]))))
          {
            stop(sprintf('Some covariate files not found:\n%s', paste(unlist(self$data[ix][iix]), collapse = ',')))
          }
        }

        ## for any GRanges data that are provided where there is more than one metadata
        ## column, there should be a field given, otherwise we complain
        dmeta = NULL
        if (any(ix <<- params$dclasses != 'character'))
        {
          dmeta = lapply(self$data[ix], function(x) names(values(x)))
        }

        ## we require field to be specified if GRanges have more than one metadata column, otherwise
        ## ambiguous
        if (length(dmeta)>0)
        {
          ## check to make sure that fields actually exist in the provided GRanges arguments
          found = mapply(params$field[ix], dmeta, FUN = function(x,y) ifelse(is.na(x), NA, x %in% y))

          if (any(!found, na.rm = TRUE))
          {
            stop('Some provided Covariate fields not found in their corresponding GRanges metadata, please check arguments')
          }
        }

        if (na.ix <<- any(is.na(params$name)))
        {
          params[na.ix, name := ifelse(!is.na(field), field, paste0('Covariate', id))]
          params[, name := dedup(name)]
        }

        ## label any type = NA covariates for which a field has not been specified
        ## as NA by default
        if (any(na.ix <<- !is.na(params$field) & is.na(params$type)))
        {
          params[na.ix, type := 'numeric']
        }

        if (any(na.ix <<- is.na(params$field) & is.na(params$type)))
        {
          params[na.ix, type := 'interval']
        }

        ## check names to make sure not malformed, i.e. shouldn't start with number or contain spaces or special
        ## characters

        if (any(iix <<- grepl('\\W', params$name)))
        {
          warning('Replacing spaces and special characters with "_" in Covariate names')
          params$names[iix] = gsub('\\W+', '_', params$name[iix])
        }
        
        if (!is.null(params$name))
          {
            if (any(iix <<- duplicated(params$name)))
            {
              warning('Deduping covariate names')
              params$name = dedup(params$name)
            }
          }

        self$names = params$name
        self$type = params$type
        self$signature = params$signature
        self$field = params$field
        self$pad = params$pad
        self$log = params$log
        self$count = params$count
        self$na.rm = params$na.rm
    },

    ## Params:
    ## ... Other Covariates to be merged into this array, note that it can be any number of Covariates
    ## Return:
    ## A single Covariate object that contains the contents of self and all passed Covariates
    ## UI:
    ## None
    ## Notes:
    ## This is linked to the c.Covariate override and the c.Covariate override should be used preferentially over this
    merge = function(...){
        return (c(self,...))
    },


    ## Params:
    ## No params required, included arguments will be ignored.
    ## Return:
    ## returns a list of character vectors. If the respective covariate is of class GRanges, the vector will contain all of the chromosome names,
    ## if it is not of class GRanges, will return NA
    ## UI:
    ## None
    seqlevels = function(...){
        if(length(private$pCovs) == 0){
            return(NULL)
        }
        seqs = lapply(c(1:length(private$pCovs)), function(x){
            cov = private$pCovs[[x]]
            if(class(cov) == 'GRanges'){
                return(GenomeInfoDb::seqlevels(cov))
            } else{
                return(NA)
            }
        })
        return(seqs)
    },

    ## Params:
    ## range, a numeric vector of the covariates to include. e.g. if the Covariate contains the covariates (A,B,C) and the range is c(2:3),
    ## this indicates you wish to get a Covariate containing (B,C). NOTE THAT THIS DOES NOT RETURN A NEW COV_ARR, IT MODIFIES THE CURRENT.
    ## Return:
    ## None, this modifies the Covariate on which it was called
    ## UI:
    ## None
    ## Notes:
    ## If you want to create a new Covariate containing certain covariates, use the '[' operator, e.g. Covariate[2:3]
    subset = function(range, ...){

      private$pCovs = private$pCovs[range]
      private$pnames = private$pnames[range]
      private$ptype = private$ptype[range]
      private$psignature = private$psignature[range]
      private$pfield = private$pfield[range]
      private$ppad = private$ppad[range]
      private$plog = private$plog[range]
      private$pcount = private$pcount[range]
      private$pna.rm = private$pna.rm[range]
    },

    ## Params:
    ## No params required, included arguments will be ignored.
    ## Return:
    ## A list of lists where each internal list corresponds to the covariate and is for use internally in the annotate.hypotheses function
    ## The list representation of the covariate will contain the following variables: type, signature, pad, na.rm, field, grep
    ## UI:
    ## None
    toList = function(...){
        if(length(private$pCovs) == 0){
            return(list())
        }
        out = lapply(c(1:length(private$pCovs)), function(x){
            return (list(track = private$pCovs[[x]],
                         type = private$ptype[x],
                         signature = private$psignature[x],
                         pad = private$ppad[x],
                         na.rm = private$pna.rm[x],
                         field = private$pfield[x],
                         log = private$plog[x],
                         count = private$pcount[x]
                         ))
        })
        names(out) = private$pnames
        return(out)

        },

    ## Params:
    ## No params required, included arguments will be ignored.
    ## Return:
    ## Nothing
    ## UI:
    ## Prints information about the Covariate to the console with all of covariates printed in order with variables printed alongside each covariate
    print = function(...){
        if(length(private$pCovs) == 0){
            fmessage('Empty Covariate Object')
            return(NULL)
        }

        message('', length(private$pCovs), ' Covariates with features:')
        print(data.table(
          name = private$pnames,
          type = private$ptype,
          class = sapply(private$pCovs, class),
          field = private$pfield,
          signature = private$psignature,
          na.rm = private$pna.rm,
          pad = private$ppad,
          log = private$plog,
          count = private$pcount
        ))

        ## out= sapply(c(1:length(private$pCovs)),
        ##     function(x){
        ##         cat(c('Covariate Number: ' , x, '\nName: ', grepprivate$pnames[x],
        ##         '\ntype: ',private$ptype[x], '\tsignature: ', private$psignature[x],
        ##         '\nfield: ',private$pfield[x], '\tpad: ', private$ppad[x],
        ##         '\nna.rm: ', private$pna.rm[x], '\tgrep: ', private$pgrep[x],
        ##         '\nCovariate Class: ', class(private$pCovs[[x]]), '\n\n'), collapse = '', sep = '')
    }
    ),

    ## Private variables are internal variables that cannot be accessed by the user
    ## These variables will have active representations that the user can interact with the update
    ## and view these variables, all internal manipulations will be done with these private variables
    private = list(
      ## The list of covariates, each element can be of class: 'GRanges', 'character', 'RleList', 'ffTrack'
      pCovs = list(),
      ## A string vector containing the names of the covariates, the covariate will be refered to by its name in the final table
      pnames = c(),
      ## Type is a string vector of types for each covariate, can be: 'numeric','sequence', or 'interval'
      ptype = c(),
      ## A vector of signatures for use with ffTrack, se fftab
      psignature = c(),
      ## A character vector of field names for use with numeric covariates, see the Covariate class definition for more info
      pfield = c(),
      ## A numeric vector of paddings for each covariate, see the 'pad' param in Covariate class definition for more info
      ppad = c(),
      ## A logical vector for each covariate, see the 'na.rm' param in Covariate class definition for more info
      pna.rm = c(),
      ## logical vector specifying whether to log
      plog = c(),
      ## logical vector specifying whether to count
      pcount = c(),
      ##  Valid Covariate Types
      COV.TYPES = c('numeric', 'interval'),
      ##  Valid Covariate Classes
      COV.CLASSES = c('GRanges', 'RleList', 'ffTrack', 'character')
    ),

    ## The active list contains a variable for each private variable.
    ## Active variables are for user interaction,
    ## Interactions can be as such
    ## class$active will call the active variable function with the value missing
    ## class$active = value will call the active variable function with the value = value
    active = list(

            ## Covariate Names
            ## Here we check to make sure that all names are of class chracter and that they are the same length as pCovs -> the internal covariate list
            ## If the names vector is equal in length to the pCovs list length we will allow the assignment
            ## If the pCovs list length is a multiple of the names vector, will will replicate the names vector such that it matches in length
            ## to the pCovs list.
            names = function(value) {

                if(!missing(value)){

                  if(!is.character(value) && !all(is.na(value)) ){
                    stop('Error: names must be of class character')
                  }

                  if(length(value) != length(private$pCovs) & length(private$pCovs) %% length(value) != 0){
                    stop('Error: Length of names must be of length equal to the number of Covariates or a divisor of number of covariates.')
                  }

                  if(length(private$pCovs) / length(value) != 1){
                    private$pnames = rep(value, length(private$pCovs)/length(value))
                    return(private$pnames)
                  }

                  if (any(iix <<- grepl('\\W', value)))
                  {
                    warning('Replacing spaces and special characters with "_" in provided Covariate names')
                    value[iix] = gsub('\\W+', '_', value[iix])
                  }

                  private$pnames = value
                  return(private$pnames)

                } else{
                  return(private$pnames)
                }
            },

            ## Covariate type
            ## Here we check to make sure that all types are of class chracter and that they are the same length as pCovs -> the internal covariate list
            ## We then check to make sure that each type is a valid type as defined by the COV.TYPES private parameter
            ## If the types vector is equal in length to the pCovs list length we will allow the assignment
            ## If the pCovs list length is a multiple of the types vector, will will replicate the types vector such that it matches in length
            ## to the pCovs list.
            type = function(value) {

                if(!missing(value)){
                    if(!is.character(value) && !all(is.na(value))){
                        stop('Error: type must be of class character')
                    }

                    if(length(value) != length(private$pCovs) & length(private$pCovs) %% length(value) != 0){
                        stop('Error: Length of type must be of length equal to the number of Covariates or a divisor of number of covariates.')
                    }

                    if(!all(value %in% private$COV.TYPES)){
                        stop('"type" must be "numeric", "sequence", or "interval"')
                    }

                    if(length(private$pCovs) / length(value) != 1){
                        private$ptype = rep(value, length(private$pCovs)/length(value))
                        return(private$ptype)
                    }

                    private$ptype = value
                    return(private$ptype)

                } else{
                    return(private$ptype)
                }
            },

            ##Covariate Signature
            ## Here we check to make sure that all signatures are list within lists  and that they are the same length as pCovs -> the internal covariate list
            ## If the signature vector is equal in length to the pCovs list length we will allow the assignment
            ## If the pCovs list length is a multiple of the signature vector, will will replicate the signature vector such that it matches in length
            ## to the pCovs list.
            signature = function(value) {
                if(!missing(value)){

                    if(length(value) != length(private$pCovs) & length(private$pCovs) %% length(value) != 0){
                        stop('Error: Length of signature must be of length equal to the number of Covariates or a divisor of number of covariates.')
                    }
                    if(length(private$pCovs) / length(value) != 1){
                        private$psignature = rep(value, length(private$pCovs)/length(value))
                        return(private$psignature)
                    }

                    private$psignature = value
                    return(private$psignature)

                } else{
                    return(private$psignature)
                }
            },

            ##Covariate Field
            ## Here we check to make sure that all fields are of class chracter and that they are the same length as pCovs -> the internal covariate list
            ## If the fields vector is equal in length to the pCovs list length we will allow the assignment
            ## If the pCovs list length is a multiple of the fields vector, will will replicate the fields vector such that it matches in length
            ## to the pCovs list.
            field = function(value) {
                if(!missing(value)){
                    if(!is.character(value) && !all(is.na(value))){
                       stop('Error: field must be of class character')
                    }
                    if(length(value) != length(private$pCovs) & length(private$pCovs) %% length(value) != 0){
                        stop('Error: Length of field must be of length equal to the number of Covariates or a divisor of number of covariates.')
                    }

                    if(length(private$pCovs) / length(value) != 1){
                        private$pfield = rep(value, length(private$pCovs)/length(value))
                        return(private$pfield)
                    }

                    private$pfield = value
                    return(private$pfield)

                } else{
                    return(private$pfield)
                }
            },

          ## Covariate log
          ## Here we check to make sure that all pad are of class numeric and that they are the same length as pCovs -> the internal covariate list
          ## If the pad vector is equal in length to the pCovs list length we will allow the assignment
          ## If the pCovs list length is a multiple of the pad vector, will will replicate the pad vector such that it matches in length
          ## to the pCovs list.
          log = function(value) {
            if(!missing(value)){
              if(!is.logical(value) && !all(is.na(value))){
                stop("Error: log must be of class logical")
              }
              if(length(value) != length(private$pCovs) & length(private$pCovs) %% length(value) != 0){
                stop("Error: Length of pad must be of length equal to the number of Covariates or a divisor of number of covariates.")
              }
              if(length(private$pCovs) / length(value) != 1){
                private$plog = rep(value, length(private$pCovs)/length(value))
                return(private$plog)
              }
              
              private$plog = value
                    return(private$plog)
              
            } else{
              return(private$plog)
            }
          },
          
          ## Covariate Paddinig
          ## Here we check to make sure that all pad are of class numeric and that they are the same length as pCovs -> the internal covariate list
          ## If the pad vector is equal in length to the pCovs list length we will allow the assignment
          ## If the pCovs list length is a multiple of the pad vector, will will replicate the pad vector such that it matches in length
          ## to the pCovs list.
          count = function(value) {
            if(!missing(value)){
              if(!is.logical(value) && !all(is.na(value))){
                stop("Error: count must be of class logical")
              }
              if(length(value) != length(private$pCovs) & length(private$pCovs) %% length(value) != 0){
                stop("Error: Length of count must be of length equal to the number of Covariates or a divisor of number of covariates.")
              }
              if(length(private$pCovs) / length(value) != 1){
                private$pcount = rep(value, length(private$pCovs)/length(value))
                return(private$pcount)
              }

              private$pcount = value
              return(private$pcount)

            } else{
              return(private$pcount)
            }
          },



            ## Covariate Padding
            ## Here we check to make sure that all pad are of class numeric and that they are the same length as pCovs -> the internal covariate list
            ## If the pad vector is equal in length to the pCovs list length we will allow the assignment
            ## If the pCovs list length is a multiple of the pad vector, will will replicate the pad vector such that it matches in length
            ## to the pCovs list.
            pad = function(value) {
                if(!missing(value)){
                    if(!is.numeric(value) && !all(is.na(value))){
                        stop("Error: pad must be of class numeric")
                    }
                    if(length(value) != length(private$pCovs) & length(private$pCovs) %% length(value) != 0){
                        stop("Error: Length of pad must be of length equal to the number of Covariates or a divisor of number of covariates.")
                    }
                    if(length(private$pCovs) / length(value) != 1){
                        private$ppad = rep(value, length(private$pCovs)/length(value))
                        return(private$ppad)
                    }

                    private$ppad = value
                    return(private$ppad)

                } else{
                    return(private$ppad)
                }
            },




            ##Covariate na.rm
            ## Here we check to make sure that all na.rms are of class logical and that they are the same length as pCovs -> the internal covariate list
            ## If the na.rms vector is equal in length to the pCovs list length we will allow the assignment
            ## If the pCovs list length is a multiple of the na.rms vector, will will replicate the na.rms vector such that it matches in length
            ## to the pCovs list.
            na.rm = function(value) {
                if(!missing(value)){
                    if(!is.logical(value) && !all(is.na(value))){
                        stop("Error: na.rm must be of class logical")
                    }
                    if(length(value) != length(private$pCovs) & length(private$pCovs) %% length(value) != 0){
                        stop("Error: Length of na.rm must be of length equal to the number of Covariates or a divisor of number of covariates.")
                    }

                    if(length(private$pCovs) / length(value) != 1){
                        private$pna.rm = rep(value, length(private$pCovs)/length(value))
                        return(private$pna.rm)
                    }

                    private$pna.rm = value
                    return(private$pna.rm)

                } else{
                    return(private$pna.rm)
                }
            },

           

            ##Covariate Covs
            data = function(value) {
                if(!missing(value)){
                    private$pCovs = value
                    return(private$pCovs)
                } else{
                    return(private$pCovs)
                }
            }
    ),
)

#' @name c.Covariate
#' @title title
#' @description
#'
#' Override the c operator for covariates so that you can merge them like a vector
#'
#' @param ... A series of Covariates, note all objects must be of type Covariate
#' @return Covariate object that can be passed directly into the Chromunity object constructor that contains all of the Covariate covariates
#' Passed in the ... param
#' @author Zoran Z. Gajic
#' @export
'c.Covariate' = function(...){

  ##Ensure that all params are of type Covariate
    Covariates = list(...)
    isc = sapply(Covariates, function(x)  class(x)[1] == 'Covariate')

    if(any(!isc)){
        stop('Error: All inputs must be of class Covariate.')
    }

    ## Merging vars of the covariates
  names  = unlist(lapply(Covariates, function(x) x$names))
  type  = unlist(lapply(Covariates, function(x) x$type))
  signature  = unlist(lapply(Covariates, function(x) x$signature))
  field  = unlist(lapply(Covariates, function(x) x$field))
  pad  = unlist(lapply(Covariates, function(x) x$pad))
  na.rm  = unlist(lapply(Covariates, function(x) x$na.rm))
  log  = unlist(lapply(Covariates, function(x) x$log))
  count  = unlist(lapply(Covariates, function(x) x$count))

  ## Merging Covariates
  covs = lapply(Covariates, function(x) x$data)
  Covs = unlist(covs, recursive = F)

  ##Creating a new Covariate and assigning all of the merged variables to it
  ret = Covariate$new(data = Covs, name = names, type = type, signature = signature, field = field, pad = pad, na.rm = na.rm, grep = grep)
  
  return(ret)
}



#' @name [.Covariate
#' @title title
#' @description
#'
#' Overrides the subset operator x[] for use with Covariate to allow for vector like subsetting
#'
#' @param obj Covariate This is the Covariate to be subset
#' @param range vector This is the range of Covariates to return, like subsetting a vector. e.g. c(1,2,3,4,5)[3:4] == c(3,4)
#' @return A new Covariate object that contains only the Covs within the given range
#' @author Zoran Z. Gajic
#' @export
'[.Covariate' = function(obj, range){
  if (any(is.na(range)))
    stop('NA in subscripts not allowed')

  if (any(range>length(obj)))
    stop('Subscript out of bounds')

  ##Clone the object so we don't mess with the original
  ret = obj$clone()
  ##Call the subset function of the Covariate class that will modify the cloned Covariate
  ret$subset(range)
  return (ret)
}


#' @name names.Covariate
#' @title title
#' @description
#'
#' Overrides the names function for Covariate object
#'
#' @param obj Covariate This is the Covariate whose names we are extracting'
#' @return names of covariates
#' @author Zoran Z. Gajic
#' @export
'names.Covariate' = function(x){
    ##Call the subset function of the Covariate class that will modify the cloned Covariate
    return (x$names)
}


#' @name length.Covariate
#' @title title
#' @description
#'
#' Overrides the length function 'length(Covariate)' for use with Covariate
#'
#' @param obj Covariate object that is passed to the length function
#' @return number of covariates contained in the Covariate object as defined by length(Covariate$data)
#' @author Zoran Z. Gajic
#' @export
'length.Covariate' = function(obj,...){
    return(length(obj$data))
}


#' @name covariate
#' @title covariate
#' @description
#'
#' function to initialize Covariates for passing to Chromunity object constructor.
#'
#' Can also be initiated by passing a vector of multiple vectors of equal length, each representing one of the internal variable names
#' You must also include a list containg all of the covariates (Granges, chracters, RLELists, ffTracks)
#'
#' Covariate serves to mask the underlieing list implemenations of Covariates in the Chromunity Object.
#' This class attempts to mimic a vector in terms of subsetting and in the future will add more vector like operations.
#'
#'
#' @param name character vector Contains names of the covariates to be created, this should not include the names of any Cov objects passed
#' @param log  logical flag whether to log output for numeric covariate after averaging [TRUE]
#' @param count logical flag whether to count the number of intervals (count = TRUE) or aggregate the width (count = FALSE) [TRUE]
#' @param pad numeric vector Indicates the width to extend each item in the covarite. e.g. if you have a GRanges covariate with two ranges (5:10) and (20:30) with a pad of 5,
#' These ranges wil become (0:15) and (15:35)
#' @param type character vector Contains the types of each covariate (numeric, interval, sequencing)
#' @param signature, see ffTrack, a vector of signatures for use with ffTrack sequence covariates
#' fftab signature: signatures is a named list that specify what is to be tallied.  Each signature (ie list element)
#' consist of an arbitrary length character vector specifying strings to %in% (grep = FALSE)
#' or length 1 character vector to grepl (if grep = TRUE)
#' or a length 1 or 2 numeric vector specifying exact value or interval to match (for numeric data)
#' Every list element of signature will become a metadata column in the output GRanges
#' specifying how many positions in the given interval match the given query
#' @param field, a chracter vector for use with numeric covariates (NA otherwise) the indicates the column containing the values of that covarites.
#' For example, if you have a covariate for replication timing and the timings are in the column 'value', the parameter field should be set to the character 'Value'
#' @param na.rm, logical vector that indicates whether or not to remove nas in the covariates
#' @param grep, a chracter vector of  grep for use with sequence covariates of class ffTrack
#' The function fftab is called during the processing of ffTrack sequence covariates grep is used to specify inexact matches (see fftab)
#' @param data, a list of covariate data that can include any of the covariate classes (GRanges, ffTrack, RleList, character)
#' @return Covariate object that can be passed directly to the Chromunity object constructor
#' @author Zoran Z. Gajic
#' @import R6
#' @export
covariate = function(data = NULL, field = as.character(NA), name = as.character(NA), log = TRUE, count = TRUE, pad = 0, type = as.character(NA),
               signature = as.character(NA), na.rm = NA, grep = NA){
  Covariate$new(name = name, data = data, pad = pad, type = type, count = count, log = log, signature = signature,
                field = field, na.rm = na.rm, grep = grep)
}



#' @name dunlist
#' @title dunlist
#' @description
#'
#' Utility function to unlist a list into a data.table
#'
#' @param x list
dunlist = function (x) 
{
    listid = rep(1:length(x), elementNROWS(x))
    if (!is.null(names(x))) 
        listid = names(x)[listid]
    xu = unlist(x, use.names = FALSE)
    if (is.null(xu)) {
        return(as.data.table(list(listid = c(), V1 = c())))
    }
    if (!(inherits(xu, "data.frame")) | inherits(xu, "data.table")) 
        xu = data.table(V1 = xu)
    out = cbind(data.table(listid = listid), xu)
    setkey(out, listid)
    return(out)
}


################################
#' @name dedup
#' @title dedup
#'
#' @description
#' relabels duplicates in a character vector with .1, .2, .3
#' (where "." can be replaced by any user specified suffix)
#'
#' @param x input vector to dedup
#' @param suffix suffix separator to use before adding integer for dups in x
#' @return length(x) vector of input + suffix separator + integer for dups and no suffix for "originals"
#' @author Marcin Imielinski
################################
dedup = function(x, suffix = '.')
{
  dup = duplicated(x);
  udup = setdiff(unique(x[dup]), NA)
  udup.ix = lapply(udup, function(y) which(x==y))
  udup.suffices = lapply(udup.ix, function(y) c('', paste(suffix, 2:length(y), sep = '')))
  out = x;
  out[unlist(udup.ix)] = paste(out[unlist(udup.ix)], unlist(udup.suffices), sep = '');
  return(out)
}


################################
#' @name glm.nb.fh
#' @title glm.nb.fh
#'
#' @description
#' modified glm.nb that takes a pre-defined theta
#' 
#' 
#' @author Marcin Imielinski
################################
glm.nb.fh = function (formula, data, weights, subset, na.action, start = NULL,
                      etastart, mustart, control = glm.control(...), method = "glm.fit",
                      model = TRUE, x = FALSE, y = TRUE, contrasts = NULL, ...,  theta = NULL,
                      init.theta, link = log)
{
    loglik <- function(n, th, mu, y, w) sum(w * (lgamma(th +
                                                        y) - lgamma(th) - lgamma(y + 1) + th * log(th) + y *
                                                 log(mu + (y == 0)) - (th + y) * log(th + mu)))
    link <- substitute(link)
    fam0 <- if (missing(init.theta))
                do.call("poisson", list(link = link))
            else do.call("negative.binomial", list(theta = init.theta,
                                                   link = link))
    mf <- Call <- match.call()
    m <- match(c("formula", "data", "subset", "weights", "na.action",
                 "etastart", "mustart", "offset"), names(mf), 0)
    mf <- mf[c(1, m)]
    mf$drop.unused.levels <- TRUE
    mf[[1L]] <- quote(stats::model.frame)
    mf <- eval.parent(mf)
    Terms <- attr(mf, "terms")
    if (method == "model.frame")
        return(mf)
    Y <- model.response(mf, "numeric")
    X <- if (!is.empty.model(Terms))
             model.matrix(Terms, mf, contrasts)
         else matrix(, NROW(Y), 0)
    w <- model.weights(mf)
    if (!length(w))
        w <- rep(1, nrow(mf))
    else if (any(w < 0))
        stop("negative weights not allowed")
    offset <- model.offset(mf)
    mustart <- model.extract(mf, "mustart")
    etastart <- model.extract(mf, "etastart")
    n <- length(Y)
    if (!missing(method)) {
        if (!exists(method, mode = "function"))
            stop(gettextf("unimplemented method: %s", sQuote(method)),
                 domain = NA)
        glm.fitter <- get(method)
    }
    else {
        method <- "glm.fit"
        glm.fitter <- stats::glm.fit
    }
    if (control$trace > 1)
        message("Initial fit:")

    fit <- glm.fitter(x = X, y = Y, w = w, start = start, etastart = etastart,
                      mustart = mustart, offset = offset, family = fam0, control = list(maxit = control$maxit,
                                                                                        epsilon = control$epsilon, trace = control$trace >
                                                                                                                       1), intercept = attr(Terms, "intercept") > 0)
    class(fit) <- c("glm", "lm")
    mu <- fit$fitted.values
    if (is.null(theta))
    {
        th <- as.vector(theta.ml(Y, mu, sum(w), w, limit = control$maxit,
                                 trace = control$trace > 2))
    }
    else
    {
        th = theta

        if (control$trace > 1)
            message(gettextf("Fixing theta value to 'theta': %f", signif(th)),
                    domain = NA)
    }

    if (control$trace > 1)
        message(gettextf("Initial value for 'theta': %f", signif(th)),
                domain = NA)
    fam <- do.call("negative.binomial", list(theta = th[1], link = link))
    iter <- 0
    d1 <- sqrt(2 * max(1, fit$df.residual))
    d2 <- del <- 1
    g <- fam$linkfun
    Lm <- loglik(n, th, mu, Y, w)
    Lm0 <- Lm + 2 * d1
    while (
    ((iter <- iter + 1) <= control$maxit) &
    ((abs(Lm0 - Lm)/d1 + abs(del)/d2) > control$epsilon)
    ){
        eta <- g(mu)
        fit <- glm.fitter(x = X, y = Y, w = w, etastart = eta,
                          offset = offset, family = fam, control = list(maxit = control$maxit,
                                                                        epsilon = control$epsilon, trace = control$trace >
                                                                                                       1), intercept = attr(Terms, "intercept") >
                                                                                                               0)
        t0 <- th
        if (is.null(theta))
        {
            th <- theta.ml(Y, mu, sum(w), w, limit = control$maxit,
                           trace = control$trace > 2)
        } else
        {
            th = theta
        }

        fam <- do.call("negative.binomial", list(theta = th[1],  ## we don't need all the thetas here if theta is vectorized
                                                 link = link))

        mu <- fit$fitted.values
        del <- t0 - th ## this is where the vectorized theta makes a difference
        Lm0 <- Lm
        Lm <- loglik(n, th, mu, Y, w) ## and here - log likelihood computation
        if (control$trace) {
            Ls <- loglik(n, th, Y, Y, w)
            Dev <- 2 * (Ls - Lm)
            message(sprintf("Theta(%d) = %f, 2(Ls - Lm) = %f",
                            iter, signif(th), signif(Dev)), domain = NA)
        }
    }
    if (!is.null(attr(th, "warn")))
        fit$th.warn <- attr(th, "warn")
    if (iter > control$maxit) {
        warning("alternation limit reached")
        fit$th.warn <- gettext("alternation limit reached")
    }
    if (length(offset) && attr(Terms, "intercept")) {
        null.deviance <- if (length(Terms))
                             glm.fitter(X[, "(Intercept)", drop = FALSE], Y, w,
                                        offset = offset, family = fam, control = list(maxit = control$maxit,
                                                                                      epsilon = control$epsilon, trace = control$trace >
                                                                                                                     1), intercept = TRUE)$deviance
        else fit$deviance
        fit$null.deviance <- null.deviance
    }
    class(fit) <- c("negbin", "glm", "lm")
    fit$terms <- Terms
    fit$formula <- as.vector(attr(Terms, "formula"))
    Call$init.theta <- signif(as.vector(th), 10)
    Call$link <- link
    fit$call <- Call
    if (model)
        fit$model <- mf
    fit$na.action <- attr(mf, "na.action")
    if (x)
        fit$x <- X
    if (!y)
        fit$y <- NULL
    fit$theta <- as.vector(th)
    fit$SE.theta <- attr(th, "SE")
    fit$twologlik <- as.vector(2 * Lm)
    fit$aic <- -fit$twologlik + 2 * fit$rank + 2
    fit$contrasts <- attr(X, "contrasts")
    fit$xlevels <- .getXlevels(Terms, mf)
    fit$method <- method
    fit$control <- control
    fit$offset <- offset
    fit
}




########
#' @name muffle
#' @title muffle
#'
#' Runs code returning NULL is there is any error
#'
#' @param code R code to eval while suppressing all errors
#' @param ... additional tryCatch arguments
#' @return output of evaluated R code or NULL if error
#' @author Marcin Imielinski
#########
muffle = function(code, ...)
{
  return(tryCatch(code, error = function(e) NULL, ...))
}



#' @name dflm
#' @title dflm
#' @description
#'
#' Formats lm, glm, or fisher.test outputs into readable data.table
#'
dflm = function(x, last = FALSE, nm = '')
{
  if (is.null(x))
    out = data.frame(name = nm, method = as.character(NA), p = as.numeric(NA), estimate = as.numeric(NA), ci.lower = as.numeric(NA),  ci.upper = as.numeric(NA), effect = as.character(NA))
  else if (any(c('lm', 'betareg') %in% class(x)))
  {
    
    coef = as.data.frame(summary(x)$coefficients)
    colnames(coef) = c('estimate', 'se', 'stat', 'p')
    if (last)
      coef = coef[nrow(coef), ]
    coef$ci.lower = coef$estimate - 1.96*coef$se
    coef$ci.upper = coef$estimate + 1.96*coef$se
    if (!is.null(summary(x)$family))
    {
      fam = summary(x)$family$family
      if (summary(x)$family$link %in% c('log', 'logit'))
      {
        coef$estimate = exp(coef$estimate)
        coef$ci.upper= exp(coef$ci.upper)
        coef$ci.lower= exp(coef$ci.lower)
      }
    }
    else
      fam = 'Unknown'
    
    if (!last)
      nm = paste(nm, rownames(coef))
    out = data.frame(name = nm, method = fam, p = signif(coef$p, 3), estimate = coef$estimate, ci.lower = coef$ci.lower, ci.upper = coef$ci.upper, effect = paste(signif(coef$estimate, 3), ' [',  signif(coef$ci.lower,3),'-', signif(coef$ci.upper, 3), ']', sep = ''))
  }
  else if (class(x) == 'htest')
  {
    if (is.null(x$estimate))
      x$estimate = x$statistic
    if (is.null(x$conf.int))
      x$conf.int = c(NA, NA)
    out = data.table(name = nm, method = x$method, estimate = x$estimate, ci.lower = x$conf.int[1], ci.upper = x$conf.int[2], effect = paste(signif(x$estimate, 3), ' [',  signif(x$conf.int[1],3),'-', signif(x$conf.int[2], 3), ']', sep = ''), p = x$p.value)
  }
  else if (class(x) == 'polr')
  {
    coef = coef(summary(x)) %>% as.data.frame
    nm = paste(nm, rownames(coef))
    coef = as.data.table(coef)
    setnames(coef, c('estimate', 'se', 't'))
    out = data.table(name = nm) %>% cbind(coef)
    out$p =  pnorm(abs(out$t), lower.tail = FALSE) * 2
    out[, ci.lower := estimate-1.96*se]
    out[, ci.upper := estimate+1.96*se]
    out[, effect := paste(signif(estimate, 3), ' [',  signif(ci.lower,3),'-', signif(ci.upper, 3), ']', sep = '')]
  }
  else
  {
    out = data.frame(name = nm, method = x$method, p = signif(x$p.value, 3), estimate = x$estimate, ci.lower = x$conf.int[1], ci.upper = x$conf.int[2], effect = paste(signif(x$estimate, 3), ' [',  signif(x$conf.int[1],3),'-', signif(x$conf.int[2], 3), ']', sep = ''))
  }
  
  out$effect = as.character(out$effect)
  out$name = as.character(out$name)
  out$method = as.character(out$method)
  rownames(out) = NULL
  return(as.data.table(out))
}




#' Find peaks in a \code{GRanges} over a given meta-data field
#'
#' Finds "peaks" in an input GRanges with value field y.
#' first piles up ranges according to field score (default = 1 for each range)
#' then finds peaks.  If peel > 0, then recursively peels segments
#' contributing to top peak, and recomputes nextpeak "peel" times
#' if peel>0, bootstrap controls whether to bootstrap peak interval nbootstrap times
#' if id.field is not NULL will peel off with respect to unique (sample) id of segment and not purely according to width
#' if FUN preovided then will complex aggregating function of piled up values in dijoint intervals prior to computing "coverage"
#' (FUN must take in a single argument and return a scalar)
#' if id.field is not NULL, AGG.FUN is a second fun to aggregate values from id.field to output interval
#'
#' @param gr \code{GRanges} with some meta-data field to find peaks on
#' @param field character field specifying metadata to find peaks on, default "score, can be NULL in which case the count is computed
#' @param minima logical flag whether to find minima or maxima
#' @param id.field character denoting field whose values specifyx individual tracks (e.g. samples)
#' @param bootstrap logical flag specifying whether to bootstrap "peel off" to statistically determine peak boundaries
#' @param na.rm remove NA from data
#' @param pbootstrap  quantile of bootstrap boundaries to include in the robust peak boundary estimate (i.e. essentially specifies confidence interval)
#' @param nboostrap   number of bootstraps to run
#' @param FUN  function to apply to compute score for a single individual
#' @param AGG.FUN function to aggregate scores across individuals
gr.peaks = function(gr, field = 'score',
                    minima = FALSE,
                    peel = 0,
                    id.field = NULL,
                    bootstrap = TRUE,
                    na.rm = TRUE,
                    pbootstrap = 0.95,
                    nbootstrap = 1e4,
                    FUN = NULL,
                    AGG.FUN = sum,
                    peel.gr = NULL, ## when peeling will use these segs instead of gr (which can just be a standard granges of scores)
                    score.only = FALSE,
                    verbose = peel>0)
{
  
  if (!is(gr, 'GRanges'))
    gr = seg2gr(gr)
  
  if (is.null(field))
    field = 'score'
  
  if (!(field %in% names(values(gr))))
    values(gr)[, field] = 1
  
  if (is.logical(values(gr)[, field]))
    values(gr)[, field] = as.numeric(values(gr)[, field])
  
  if (peel>0 & !score.only)
  {
    if (verbose)
      cat('Peeling\n')
    out = GRanges()
    
    if (bootstrap)
      pbootstrap = pmax(0, pmin(1, pmax(pbootstrap, 1-pbootstrap)))
    
    ## peel.gr are an over-ride if we have pre-computed the score and only want to match peaks to their supporting segments
    if (is.null(peel.gr))
      peel.gr = gr
    
    for (p in 1:peel)
    {
      if (verbose)
        cat('Peel', p, '\n')
      if (p == 1)
        last = gr.peaks(gr, field, minima, peel = 0, FUN = FUN, AGG.FUN = AGG.FUN, id.field = id.field)
      else
      {
        ## only need to recompute peak in region containing any in.peak intervals
        in.peak = gr.in(gr, peak.hood)
        
        tmp = NULL
        if (any(in.peak))
          tmp = gr.peaks(gr[in.peak, ], field, minima, peel = 0, FUN = FUN, AGG.FUN = AGG.FUN, id.field = id.field)
        last = grbind(last[!gr.in(last, peak.hood)], tmp)
        names(values(last)) = field
      }
      
      ## these are the regions with the maximum peak value
      mix = which(values(last)[, field] == max(values(last)[, field]))
      
      ## there can be more than one peaks with the same value
      ## and some are related since they are supported by the same gr
      ## we group these peaks and define a tmp.peak to span all the peaks that are related
      ## to the top peak
      ## the peak is the span beteween the first and last interval with the maximum
      ## peak value that are connected through at least one segment to the peak value
      
      ##
      tmp.peak = last[mix]
      
      if (length(tmp.peak)>1)
      {
        tmp.peak.gr = gr[gr.in(gr, tmp.peak)]
        ov = gr.findoverlaps(tmp.peak, tmp.peak.gr)
        ed = rbind(ov$query.id, ov$subject.id+length(tmp.peak))[1:(length(ov)*2)]
        cl = igraph::clusters(igraph::graph(ed), 'weak')$membership
        tmp = tmp.peak[cl[1:length(tmp.peak)] %in% cl[1]]
        peak = GRanges(seqnames(tmp)[1], IRanges(min(start(tmp)), max(end(tmp))))
        values(peak)[, field] = values(tmp.peak)[, field][1]
      }
      else
        peak = tmp.peak
      ## tmp.peak is the interval spanning all the top values in this region
      
      in.peak1 =  gr.in(peel.gr, gr.start(peak))
      in.peak2 = gr.in(peel.gr, gr.end(peak))
      in.peak = in.peak1 | in.peak2
      
      ## peak.gr are the gr supporting the peak
      peak.gr = peel.gr[in.peak1 & in.peak2] ## want to be more strict with segments used for peeling
      peak.hood = reduce(peak.gr) ## actual peak will be a subset of this, and we can this in further iterations to limit peak revision
      
      in.peak = rep(FALSE, length(gr))
      if (bootstrap && length(peak.gr))
      {
        ## asking across bootstrap smaples how does the intersection fluctuate
        ## among segments contributing to the peak
        
        if (!is.null(id.field))
        {
          peak.gr = seg2gr(gr2dt(peak.gr)[, list(seqnames = seqnames[1], start = min(start),
                                                 eval(parse(text = paste(field, '= sum(', field, '*(end-start))/sum(end-start)'))),end = max(end)),
                                          by = eval(id.field)])
          names(values(peak.gr))[ncol(values(peak.gr))] = field ## not sure why I need to do this line, should be done above
        }
        
        B = matrix(sample(1:length(peak.gr), nbootstrap * length(peak.gr), prob = abs(values(peak.gr)[, field]), replace = TRUE), ncol = length(peak.gr))
        ## bootstrap segment samples
        ## the intersection is tha max start and min end among the segments in each
        st = apply(matrix(start(peak.gr)[B], ncol = length(peak.gr)), 1, max)
        en = apply(matrix(end(peak.gr)[B], ncol = length(peak.gr)), 1, min)
        
        ## take the left tail of the start position as the left peak boundary
        start(peak) = quantile(st, (1-pbootstrap)/2)
        
        ## and the right tail of the end position as the right peak boundary
        end(peak) = quantile(en, pbootstrap + (1-pbootstrap)/2)
        
        in.peak =  gr.in(gr, peak)
      }
      gr = gr[!in.peak]
      peak$peeled = TRUE
      out = c(out, peak)
      if (length(gr)==0)
        return(out)
    }
    last$peeled = FALSE
    return(c(out, last[-mix]))
  }
  
  if (na.rm)
    if (any(na <- is.na(values(gr)[, field])))
      gr = gr[!na]
  
  if (!is.null(FUN))
  {
    agr = GenomicRanges::disjoin(gr)
    values(agr)[, field] = NA
    tmp.mat = cbind(as.matrix(values(gr.val(agr[, c()], gr, field, weighted = FALSE, verbose = verbose, by = id.field, FUN = FUN, default.val = 0))))
    values(agr)[, field] = apply(tmp.mat, 1, AGG.FUN)
    gr = agr
  }
  
  cov = as(GenomicRanges::coverage(gr, weight = values(gr)[, field]), 'GRanges')
  
  if (score.only)
    return(cov)
  
  dcov = diff(cov$score)
  dchrom = diff(as.integer(seqnames(cov)))
  
  if (minima)
    peak.ix = (c(0, dcov) < 0 & c(0, dchrom)==0) & (c(dcov, 0) > 0 & c(dchrom, 0)==0)
  else
    peak.ix = (c(0, dcov) > 0 & c(0, dchrom)==0) & (c(dcov, 0) < 0 & c(dchrom, 0)==0)
  
  out = cov[which(peak.ix)]
  
  if (minima)
    out = out[order(out$score)]
  else
    out = out[order(-out$score)]
  
  names(values(out))[1] = field
  
  return(out)
}






################
## Legacy code
################


## ##############################
## ## chromunity
## ##############################
## #' @name chromunity
## #'
## #' @title Discovery of communities in Pore-C concatemers
## #' 
## #' @description This function takes in a GRanges with each row as Pore-C monomer with a metadata of the corresponding concatemer id 
## #' 
## #' @export
## #' @param this.pc.gr is the GRanges with each row as Pore-C monomer and must have a cmetadata column with corresponding concatemer annotation. The column should be called "read_idx"
## #' 
## #' @param k.knn numeric threshoold on k nearest concatemer neighbors to be used to create the graph. 
## #' 
## #' @param k.min numeric the threshold to number of concatemers pairs to be considered "similar"
## #' 
## #' @param tiles GRanges object dividing the genome in a fixed size bin to be defined by user
## #' 
## #' @param which.gr the GRanges for the window of interest
## #'
## #' @param filter_local_number boolean (default == FALSE) filters out concatemers in the window that fall below filter_local_thresh length
## #' 
## #' @param filter_local_thresh (default == NULL) numeric minimum length of concatemer to be considered for downstream analyses. To be set if filter_local_number == TRUE.
## #' @param take_sub_sample take a sub sample of concatemers. A random sample of fraction frac is taken
## #' 
## #' @param frac fraction of concatemer to be subsampled. To be set if take_sub_sample == TRUE.
## #'
## #' @param seed.n numeric set a seed when doing random subsampling
## #' 
## #' @return \code{chromunity} returns input GRanges with additional columns as follows:
## #' 
## #'    \item{community}{  numeric; \cr
## #'              the community annotation for each concatemer
## #'    }
## #'    \item{num.memb}{  numeric; \cr
## #'              number of members in each community
## #' 
## #' @author Aditya Deshpande


## chromunity <- function(this.pc.gr, k.knn = 10, k.min = 1, tiles, which.gr = which.gr, filter_local_number = FALSE, filter_local_thresh = NULL, take_sub_sample = FALSE, frac = 0.25, seed.n = 154){
    
##     reads = this.pc.gr 
##     if (filter_local_number){
##         message(paste0("Filtering out reads < ", filter_local_thresh))
##         reads = gr2dt(reads)
##         setkeyv(reads, c("seqnames", "start"))
##         reads[, max.local.dist := end[.N]-start[1], by = read_idx]
##         reads = reads[max.local.dist > filter_local_thresh]
##         reads = dt2gr(reads)
##     }
    
##     reads$tix = gr.match(reads, tiles)
##     reads = as.data.table(reads)[, count := .N, by = read_idx]
##     mat = dcast.data.table(reads[count > 2 ,]  %>% gr2dt, read_idx ~ tix, value.var = "strand", fill = 0)
##     mat2 = mat[, c(list(read_idx = read_idx), lapply(.SD, function(x) x >= 1)),.SDcols = names(mat)[-1]]
##     mat2 = suppressWarnings(mat2[, "NA" := NULL])
##     reads.ids = mat2$read_idx
##     mat2 = as.data.table(lapply(mat2, as.numeric))

##     if (take_sub_sample){
##         tot.num = nrow(mat2[rowSums(mat2[, -1]) > 1, ])
##         message(paste0("Total number of rows are: ", tot.num))
##         message("taking a subsample")
##         number.to.subsample = pmax(round(tot.num*frac), 1000)
##         message(paste0("Number sampled: ", number.to.subsample))
##         set.seed(seed.n)
##         gt = mat2[rowSums(mat2[, -1]) > 1, ][sample(.N, number.to.subsample), ] 
##     }

##     else {
##         gt = mat2[rowSums(mat2[, -1]) > 1, ]
##     }
    
##     ubx = gt$read_idx
##     message("Matrices made")
##     gc()

##     ## Prepare pairs for KNN
##     pairs = t(do.call(cbind, apply(gt[,setdiff(which(colSums(gt) > 1),1), with = FALSE] %>% as.matrix, 2, function(x) combn(which(x!=0),2))))
##     p1 = gt[pairs[,1], -1]
##     p2 = gt[pairs[,2], -1]
##     matching = rowSums(p1 & p2)
##     total = rowSums(p1 | p2)
##     dt = data.table(bx1 = pairs[,1], bx2 = pairs[,2], mat = matching, tot = total)[, frac := mat/tot]
##     dt2 = copy(dt)
##     dt2$bx2 = dt$bx1
##     dt2$bx1 = dt$bx2
##     dt3 = rbind(dt, dt2)
##     dt3$nmat = dt3$mat
##     dt3$nfrac = dt3$frac
##     setkeyv(dt3, c('nfrac', 'nmat'))
##     dt3 = unique(dt3)
##     dt3.2 = dt3[order(nfrac, nmat, decreasing = T)]
##     message("Pairs made")
##     gc()

##     ## Clustering    
##     k = k.knn
##     knn.dt = dt3.2[mat > 2 & tot > 2, .(knn = bx2[1:k]), by = bx1][!is.na(knn), ]
##     setkey(knn.dt)
##     knn = sparseMatrix(knn.dt$bx1, knn.dt$knn, x = 1)
##     knn.shared = knn %*% knn
##     message("KNN done")

##     ## community find in graph where each edge weight is # nearest neighbors (up to k) shared between the two nodes
##     KMIN = k.min
##     A = knn.shared*sign(knn.shared > KMIN)
##     A[cbind(1:nrow(A), 1:nrow(A))] = 0
##     A <- as(A, "matrix")
##     A <- as(A, "sparseMatrix")
##     A = A + t(A)
##     G = graph.adjacency(A, weighted = TRUE, mode = 'undirected')
##     cl.l = cluster_fast_greedy(G)
##     cl = cl.l$membership
##     message("Communities made")
##     memb.dt = data.table(read_idx = ubx[1:nrow(A)], community = cl)
##     reads = merge(reads, memb.dt, by = "read_idx")
##     reads[, num.memb := length(unique(read_idx)), by = community]
##     reads = dt2gr(reads)
##     return(reads)
## }


## ##############################
## ## annotate_multimodal_communities
## ##############################
## #' @name  annotate_multimodal_communities 
## #' 
## #'
## #' @title Annotates communities that are very dense with respect to genomic coordinates. 
## #' 
## #' @description Using the nature of distribution of contacts on genomic coordinates, annotates community that are local  
## #' 
## #' @export
## #' @param granges GRanges output from chromunity function
## #' 
## #' @param which.gr the GRanges for the window of interest
## #'
## #' @param min.memb numeric minimum number of members needed to be in a community to be considered for further analyses
## #' 
## #' @return \code{annotate_local_communities} returns input GRanges with additional columns as follows:
## #' 
## #'    \item{multimodal}{  boolean; \cr
## #'              whether a community had multimodal contacts based on parameters set by user
## #'    }
## #'  
## #' @author Aditya Deshpande

## annotate_multimodal_communities <- function(granges, which.gr, min.memb = 50){
##     this.dt = gr2dt(granges)[num.memb > min.memb]
##     ind.int = unique(this.dt$community)
##     dt.stat = data.table()
##     for (i in 1:length(ind.int)){
##         which.int = (ind.int)[i]  
##         message(which.int)
##         dt.int = this.dt[community == which.int]
##         dt.int.tmp = gr2dt(dt2gr(dt.int) %&&% which.gr)
##         setkeyv(dt.int.tmp, c("seqnames", "start"))
##         dt.sum = suppressWarnings(gr.sum(dt2gr(dt.int.tmp)+1e3))
##         y.max.val = max(dt.sum$score)
##         peak.gr = find_multi_modes( dt.sum[-1], w =  round(0.05*length(dt.sum[-1])), distance = 0.1*width(which.gr))
##         status = unique(peak.gr$bimodal)
##         dt.stat = rbind(dt.stat, data.table(community = which.int, multimodal = status))
##     }

##     this.dt = merge(this.dt, dt.stat, by = "community", allow.cartesian = T)
##     return(dt2gr(this.dt))
## }
        
        

## ##############################
## ## find_multi_modes
## ##############################
## #' @name  find_multi_modes 
## #' 
## #'
## #' @title Determines if a community has multimodal ditribution of contacts along genome
## #' 
## #' @description Using loess smoothing, determines if a community has multimodal ditribution of contacts along genome  
## #' 
## #' @export
## #' @param granges GRanges output from chromunity function for one community
## #' 
## #' @param x.field This is the X axis along which smoothing is done
## #' 
## #' @param y.field values to be smoothed
## #'
## #' @param w window size over which to smooth the distribution
## #' 
## #' @param distance numeric genomic distance beyond which a peak is not considered local 
## #' 
## #' @return \code{find_multi_modes} returns boolean value if more than one peak is present
## #'  
## #' @author Aditya Deshpande


## find_multi_modes <- function(granges, x.field = "start", y.field = "score", w = 1,  distance = 1e5) {
##     which.chr = seqlevels(granges)
##     x = x = gr2dt(granges)$start
##     y = values(granges)[, y.field]
##     n = length(y)
##     y.smooth <- loess(y ~ x, span = 0.1)$fitted
##     y.max <- rollapply(zoo(y.smooth), 2*w+1, max, align="center")
##     delta <- y.max - y.smooth[-c(1:w, n+1-1:w)]
##     i.max <- which(delta <= 0) + w
##     max.gr <- granges[i.max]
##     if (any(gr.dist(max.gr) > distance)){
##         max.gr$bimodal = TRUE
##     } else {
##         max.gr$bimodal = FALSE
##     }
##     return(max.gr)
## }

## #' @name chromunity
## #' @description
## #'
## #' Runs genome-wide chromunity detection across a sliding or provided genomic window
## #'
## #' @param concatemers GRanges with $cid
## #' @param resolution bin size for community detection [5e4]
## #' @param region region to run on [si2gr(concatemers)]
## #' @param windows GRanges or GRangesList of windows to test, if empty will be generated by splitting region GRanges into window.size tiles with stride stride
## #' @param window.size window size to do community detection within
## #' @param tiles.k.knn KNN parameter specifying how many nearest neighbors to sample when building KNN graph
## #' @param peak.thresh peak threshold with which to call a peak
## #' @param k.min minimal number of nearest neighbors an edge in KNN graph needs to have before community detection
## #' @param pad integer pad to use when computing the footprint of each chromunity and finding peak regions which become binsets
## #' @return list with items $binset,  $support, $params: $binsets is GRanges of bins with field $bid corresponding to binset id and $support which is the concatemer community supporting the binset which are GRanges with $bid
## #' @author Aditya Deshpande, Marcin Imielinski
## #' @export
## chromunity = function(concatemers, resolution = 5e4, region = si2gr(concatemers), windows = NULL, window.size = 2e6, max.slice = 1e6, min.support = 5, stride = window.size/2, mc.cores = 20, k.knn = 25, k.min = 5, pad = 1e3, peak.thresh = 0.85, seed = 42, verbose = TRUE)
## {
##   if (is.null(windows))
##     windows = gr.start(gr.tile(region, stride))+window.size/2

##   if (inherits(windows, 'GRanges'))
##     windows = split(windows, 1:length(windows))

##   if (is.null(concatemers$cid))
##   {
##       if ('read_idx' %in% names(values(concatemers)))
##         names(values(concatemers))[match('read_idx', names(values(concatemers)))] = 'cid'
##       else
##         stop("concatemer GRanges must have metadata column $cid or $read_idx specifying the concatemer id")
##   }
  
##   params = data.table(k.knn = k.knn, k.min = k.min, seed = seed)

##   bins = gr.tile(reduce(gr.stripstrand(unlist(windows))), 5e4)[, c()]

##   if (verbose) cmessage('Generated ', length(bins), ' bins across ', length(windows), ' windows')

##   if (verbose) cmessage('Matching concatemers with bins, and bins with windows using gr.match with max.slice ', max.slice, ' and ', mc.cores, ' cores')

##   ## (batch) match up concatemers with binids
##   concatemers$binid = gr.match(concatemers, bins, max.slice = max.slice, mc.cores =  mc.cores, verbose = verbose)

##   ## match window ids and bins 
##   binmap = bins %*% grl.unlist(windows)[, c('grl.ix')] %>% as.data.table %>% setnames('query.id', 'binid') %>% setnames('grl.ix', 'winid') %>% setkeyv('winid')

##   ## cycle through (possibly complex) windows call cluster_concatemers and convert to gr.sums
##   winids = unique(binmap$winid)

##   if (verbose) cmessage('Starting concatemer community detection across ', length(winids), ' windows')

##   cc = pbmclapply(winids, mc.cores = mc.cores, function(win)
##   {
##     suppressWarnings({
##       these.bins = binmap[.(win), ]
##       cc = concatemer_communities(concatemers %Q% (binid %in% these.bins$binid), k.knn = k.knn, k.min = k.min, seed = seed, verbose = verbose>1)
##       if (length(cc))
##       {
##         cc = cc[cc$support >= min.support]
##         cc$winid = win
##       }
##     })
##     cc
##   }) %>% do.call(grbind, .)

##   if (!length(cc))
##     return(Chromunity(concatemers = cc, chromunities = GRanges(), params = params))

##   uchid = unique(cc$chid)

##   if (verbose) cmessage('Analyzing gr.sums associated with ', length(uchid), ' concatemer communities to generate binsets')

##   binsets = pbmclapply(uchid, mc.cores = mc.cores, function(this.chid)
##   {
##     suppressWarnings({
##       this.cc = cc %Q% (chid == this.chid)
##       peaks = gr.sum(this.cc + pad) %>% gr.peaks('score')
##       binset = bins[, c()] %&% (peaks[peaks$score > quantile(peaks$score, peak.thresh)])
##       if (length(binset))
##       {
##         binset$chid = this.chid
##         binset$winid = this.cc$winid[1]
##       }
##     })
##     binset
##   })  %>% do.call(grbind, .)

##   return(Chromunity(concatemers = cc[cc$chid %in% binsets$chid], binsets = binsets, meta = params))
## }


## #' @name concatemer_communities
## #' @description
## #'
## #' Low level function that labels concatemers with chromunity ids $chid using community detection on a graph. 
## #'
## #' Given a GRanges of monomers labeled by concatemer id $cid
## #'
## #' @param concatemers GRanges of monomers with field $cid indicating concatemer id and $binid represent bin id
## #' @param tiles.k.knn KNN parameter specifying how many nearest neighbors to sample when building KNN graph
## #' @param k.min minimal number of nearest neighbors an edge in KNN graph needs to have before community detection
## #' @param drop.small logical flag specifying whether to remove "small" concatemers ie those with a footprint <= small argument [FALSE]
## #' @param small integer threshold for bases that define small concatemers, only relevant if drop.small = TRUE
## #' @param subsample.frac optional arg specifying fraction of concatemers to subsample [NULL]
## #' @param seed seed for subsampling
## #' @return GRanges of concatemers labeled by $c mmunity which specifies community id
## concatemer_communities = function (concatemers, k.knn = 25, k.min = 5, 
##     drop.small = FALSE,small = 1e4, 
##     subsample.frac = NULL, seed = 42, verbose = TRUE) 
## {
##   reads = concatemers

##   if (is.null(reads$cid))
##   {
##     if ('read_idx' %in% names(values(reads)))
##       names(values(reads))[match('read_idx', names(values(reads)))] = 'cid'
##     else
##       stop("concatemer GRanges must have metadata column $cid or $read_idx specifying the concatemer id")
##   }
  
##   if (drop.small) {
##     if (verbose) cmessage(paste0("Filtering out reads < ", small))
##     reads = gr2dt(reads)
##     setkeyv(reads, c("seqnames", "start"))
##     reads[, `:=`(max.local.dist, end[.N] - start[1]), by = cid]
##     reads = reads[max.local.dist > small]
##     reads = dt2gr(reads)
##   }

##   if (verbose) cmessage("Matching reads to tiles")
##   reads = as.data.table(reads)[, `:=`(count, .N), by = cid]
##   mat = dcast.data.table(reads[count > 2, ] %>% gr2dt, cid ~  binid, value.var = "strand", fun.aggregate = length, fill = 0)                                                        
##   mat2 = mat[, c(list(cid = cid), lapply(.SD, function(x) x >= 1)), .SDcols = names(mat)[-1]]                                                          
##   mat2 = suppressWarnings(mat2[, `:=`("NA", NULL)])
##   reads.ids = mat2$cid
##   mat2 = as.data.table(lapply(mat2, as.numeric))
##   if (!is.null(subsample.frac)) {
##     if (verbose) cmessage("Subsampling concatemers")
##     tot.num = nrow(mat2[rowSums(mat2[, -1]) > 1, ])
##     if (verbose) cmessage(paste0("Total number of rows are: ", tot.num))
##     if (verbose) cmessage("taking a subsample")
##     number.to.subsample = pmax(round(tot.num * subsample.frac), 1000)
##     if (verbose) cmessage(paste0("Number sampled: ", number.to.subsample))
##     set.seed(seed)
##     concatm = mat2[rowSums(mat2[, -1]) > 1, ][sample(.N, min(c(.N, number.to.subsample))), ]
##   }
##   else {
##     concatm = mat2[rowSums(mat2[, -1]) > 1, ]
##   }
##   ubx = concatm$cid

##   if (verbose) cmessage("Matrices made")
##   gc()

##   concatm = concatm[, setdiff(which(colSums(concatm) > 1), 1), with = FALSE]

##   if (!ncol(concatm))
##   {
##     warning('No concatemers found hitting two bins, returning empty result')
##     return(reads[, chid := NA][c(), ])
##   }

##   pairs = t(do.call(cbind, apply(concatm[, setdiff(which(colSums(concatm) > 1), 1), with = FALSE] %>% as.matrix, 2, function(x) combn(which(x != 0), 2))))
                                                    
##   concatm = as(as.matrix(as.data.frame(concatm)), "sparseMatrix")    
##   p1 = concatm[pairs[, 1], -1]
##   p2 = concatm[pairs[, 2], -1]
##   matching = Matrix::rowSums(p1 & p2)
##   total = Matrix::rowSums(p1 | p2)
##   dt = data.table(bx1 = pairs[, 1], bx2 = pairs[, 2], mat = matching, 
##                   tot = total)[, `:=`(frac, mat/tot)]
##   dt2 = copy(dt)
##   dt2$bx2 = dt$bx1
##   dt2$bx1 = dt$bx2
##   dt3 = rbind(dt, dt2)
##   dt3$nmat = dt3$mat
##   dt3$nfrac = dt3$frac
##   setkeyv(dt3, c("nfrac", "nmat"))
##   dt3 = unique(dt3)
##   dt3.2 = dt3[order(nfrac, nmat, decreasing = T)]
##   if (verbose) cmessage("Pairs made")
##   gc()
##   k = k.knn
##   knn.dt = dt3.2[mat > 2 & tot > 2, .(knn = bx2[1:k]), by = bx1][!is.na(knn), ]
##   setkey(knn.dt)
##   knn = sparseMatrix(knn.dt$bx1, knn.dt$knn, x = 1)
##   knn.shared = knn %*% knn
##   if (verbose) cmessage("KNN done")
##   KMIN = k.min
##   A = knn.shared * sign(knn.shared > KMIN)
##   A[cbind(1:nrow(A), 1:nrow(A))] = 0
##   A <- as(A, "matrix")
##   A <- as(A, "sparseMatrix")
##   A = A + t(A)
##   G = graph.adjacency(A, weighted = TRUE, mode = "undirected")
##   cl.l = cluster_fast_greedy(G)
##   cl = cl.l$membership
##   if (verbose) cmessage("Communities made")
##   memb.dt = data.table(cid = ubx[1:nrow(A)], chid = cl)
##   reads = merge(reads, memb.dt, by = "cid")
##   reads[, `:=`(support, length(unique(cid))), by = chid]
##   reads = dt2gr(reads)
##   return(reads)
## }

## #' @name smessage
## #' @description
## #'
## #' @author Aditya Deshpande, Marcin Imielinski
## #' @export
## #' @private 
## smessage = function(..., pre = 'Synergy')
##   message(pre, ' ', paste0(as.character(Sys.time()), ': '), ...)

## #' @name cmessage
## #' @description
## #'
## #' @author Aditya Deshpande, Marcin Imielinski
## #' @export
## #' @private 
## cmessage = function(..., pre = 'Chromunity')
##   message(pre, ' ', paste0(as.character(Sys.time()), ': '), ...)
mskilab/chromunity documentation built on June 24, 2024, 1:50 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
mskilab/chromunity
Discovery of communities in Pore-C concatemers

R/chromunity.R
In mskilab/chromunity: Discovery of communities in Pore-C concatemers

Defines functions parquet2gr

R Package Documentation

Browse R Packages

We want your feedback!

mskilab/chromunity Discovery of communities in Pore-C concatemers

R/chromunity.R In mskilab/chromunity: Discovery of communities in Pore-C concatemers

Defines functions parquet2gr

R Package Documentation

Browse R Packages

We want your feedback!

mskilab/chromunity
Discovery of communities in Pore-C concatemers

R/chromunity.R
In mskilab/chromunity: Discovery of communities in Pore-C concatemers