R/block_functions.R

Defines functions chr_ranger

chr_ranger <- function(dt){
  dt[,.(start=min(start),end=max(end)),by=chr][order(chr)][]
}


block_maker <- function(block_size,
                         stats=backgroundDT,
                         features=NULL){
  stat.ranges <- chr_ranger(stats)
  if (is.null(features)) {
    blocks <- stat.ranges[,.(start=seq(from=start,to=end,by=block_size)),by=chr][,.(chr,start,end=start+block_size-1)]
    blocks[,id:= .I]
    return(blocks[])
  } else{
    # filter blocks so that they intersect at least one feature
    #feature.ranges <- chr_ranger(data.table::rbindlist(lapply(features, function(x) chr_ranger(x))))
    #stat.ranges <- chr_ranger(data.table::rbindlist(list(stat.ranges,feature.ranges)))
    #blocks <- stat.ranges[,.(start=seq(from=start,to=end,by=block_size)),by=chr][,.(chr,start,end=start+block_size-1)]
    blocks <- stat.ranges[,.(start=seq(from=start,to=end,by=block_size)),by=chr][,.(chr,start,end=start+block_size-1)]
    blocks[,id:= .I]
    setkey(blocks,chr,start,end)
    setkey(features,chr,start,end)
    blocks <- blocks[id %in% data.table::foverlaps(blocks,features)[!gene %in% NA][,unique(id)]]
    blocks[,id:= .I]
    return(blocks[])
  }
}

block_cleaner <- function(blocks,
                          stats=backgroundDT,
                          features=list('f1','f2')){
  data.table::setkey(blocks,chr,start,end)
  n.stats <- stats[,uniqueN(stat)]
  blocks <- blocks[id %in% foverlaps(stats, blocks,nomatch = 0)[,.(n_stat=uniqueN(stat)),by=id][n_stat==n.stats]$id]
  # lapply(stats, function(x) data.table::setkey(x,chr,start,end))
  # blocks <- blocks[id %in% Reduce(union,lapply(stats, function(x) foverlaps(x,blocks,nomatch = 0)[,unique(id)]))]
  if (is.null(features)) {
    return(blocks)
  } else {
    lapply(features, function(x) data.table::setkey(x,chr,start,end))
    blocks <- blocks[id %in% Reduce(union,lapply(features, function(x) foverlaps(x,blocks,nomatch = 0,)[,unique(id)]))]
    blocks[,]
    return(blocks[])
  }
}

block_mapper <- function(dt, blocks_for_remap) {
  #type <- names(dt)[4]
  if(!identical(data.table::key(dt), c("chr", "start", "end"))){
    data.table::setkey(dt,chr,start,end)
  }
  if(!identical(data.table::key(blocks_for_remap), c("chr", "start", "end"))){
    data.table::setkey(blocks_for_remap,chr,start,end)
  }
  # if feature type file
  dt.blocks <- data.table::foverlaps(dt,blocks_for_remap,nomatch = 0)
  dt.blocks[,block.relative_start:=ifelse(start < i.start, i.start-start+1,1)]
  dt.blocks[,block.relative_end:=ifelse(i.end > end, end-start+1, i.end-start+1 )]
  dt.blocks[,block.relative_end:=ifelse(block.relative_end < block.relative_start, block.relative_start, block.relative_end)]
  if(dim(dt)[2]==4){
    type <- names(dt)[4]
    dt.blocks <- dt.blocks[,.(chr,id,start=i.start,end=i.end,block.relative_start,block.relative_end,get(type))]
    setnames(dt.blocks,"V7",eval(type))
  } else {dt.blocks <- dt.blocks[,.(chr,id,start=i.start,end=i.end,block.relative_start,block.relative_end)]}
  return(dt.blocks[])
}


remapper <- function(blocks,
                     to_remap){
  if( inherits(to_remap, "list") ){
    remapped <- lapply(to_remap,block_mapper, blocks_for_remap=blocks)
    names(remapped) <- names(to_remap)
    }
  if( inherits(to_remap, "data.table") ){
    remapped <- block_mapper(dt=to_remap,blocks_for_remap=blocks)
  }
  return(remapped)
}

trimmed_feature_finder <- function(clean_blocks,
                                   feature) {
  feature_type <- names(feature)[4]
  cols <- c(feature_type,"id")
  if(!identical(data.table::key(clean_blocks), c("chr", "start", "end"))){
    data.table::setkey(clean_blocks,chr,start,end)
  }
  if(!identical(data.table::key(feature), c("chr", "start", "end"))){
    data.table::setkey(feature,chr,start,end)
  }
  intersect <- data.table::foverlaps(feature[,.(chr,start,end,gene,length=end-start+1)],clean_blocks)
  intersect[,row:= .I]
  intersect[!id %in% NA, `:=`(int_start=max(start,i.start),int_end=min(end,i.end)), by = row]
  intersect[,int_length:=ifelse(!int_start %in% NA,int_end-int_start+1,0)]
  intersect <- unique(intersect[,.(chr,id,length,int_sum=sum(int_length)),by=gene])
  intersect[,coverage:=int_sum/length*100]
  intersect[coverage!=100,.(gene,length,coverage,id)][]
}
joshuamschmidt/multiPermr documentation built on Oct. 12, 2020, 11:42 a.m.