R/circPermuteBedpe.R

#' circPermuteBedpe
#'
#' Circular permutation of a BEDPE-df
#'
#' @param BEDPE A BEDPE-df with two BED-set in the first 6 columns
#' @param chrom.sizes A data-frame containging two columns: chromosome-name and -length
#' @param minShuff The minimal amount of shuffle in bp (default: 0.1Mb)
#' @param maxShuff The maximal amount of shuffle in bp (default: chromosome-length)
#' @return A permuted bedpe-df
#' @export
circPermuteBedpe <- function(BEDPE, chrom.sizes, minShuff = 1e5, maxShuff = NULL){
  library(data.table)
  # I tried to emulate the bedpe bedtools shuffle mode: function = ChoosePairedLocus.
  useChromLengthAsMax = F
  if(is.null(maxShuff )){
    useChromLengthAsMax <- T
  }

  # this now only works with both entries on the same chrom.
  N_before <- nrow(BEDPE)
  N_after <- nrow(BEDPE[BEDPE[,1] == BEDPE[,4],])
  if(N_before != N_after){
    warning("There seem to be entries with two different chromosome. This is not supported.")
  }

  outputBEPElist <- list()

  CHROMOSOMES <- unique(BEDPE[,1])
  for(chromosome in CHROMOSOMES){

    # get chromosome-size
    CS <- chrom.sizes[chrom.sizes[,1] == chromosome,2]
    #
    # If no max is given to the shuffle, use chrom-length.
    # This will lead to maximally 2 while-loop iterations.
    if(useChromLengthAsMax){
      maxShuff <- CS
    }

    # per chom
    BPE <- BEDPE[BEDPE[,1] == chromosome,]

    # randomly get a shuffle-values (min and max can be set here)
    s_amnt <- round(runif(1, minShuff, maxShuff))

    # add shuffle-value to the four coords
    BPE[,c(2,3,5,6)] <- BPE[,c(2,3,5,6)] + s_amnt

    # check for end > chrom.size
    # if true: start & stop - chrom.size
    while(!all(all(BPE[,3] <= CS) , all(BPE[,6] <= CS))){
      #message('Too long')
      BPE[,2:3][BPE[,3] > CS,] <- BPE[,2:3][BPE[,3] > CS,] - CS
      BPE[,5:6][BPE[,6] > CS,] <- BPE[,5:6][BPE[,6] > CS,] - CS
    }

    outputBEPElist[[chromosome]] <- BPE

  }

  return(rbindlist(outputBEPElist))
}
robinweide/RHWlib documentation built on May 7, 2019, 8:03 a.m.