R/filterCirBed.R

#' Filter the bed file generated by CircleMap
#'
#'
#' This function performs filtration of the raw bed file generated by CircleMap
#'
#' @param cirBed get by cirBed = read.table("my_unknown_circleMap.bed", sep = tab, stringsAsFactors = FALSE)
#' @param scores Threshold of Circle score. score below would be removed
#' @param discord Threshold of number of discordant read pairs supporting the detected circular DNA. Below the threshold would be reomved
#' @param splitreads Threshold of number of split reads supporting the detected circular DNA. Below the threshold would be removed
#' @param length Maximum length of the circular DNA detected.
#' @return a data frame with :
#' c("Chromosome","start_coord",
#'   "end_coord",
#'   "dis_cord",
#'   "split_reads",
#'   "circle_score",
#'   "Mean_coverage",
#'   "SD",
#'   "CISC",
#'   "CIEC",
#'   "coverage_continuity")
#' @export

filter_cirBed <- function (cirBed, scores = 50, discord = 4, splitreads =4 , length = 30000000) {

  name =   c("Chromosome","start_coord", "end_coord","dis_cord","split_reads", "circle_score",
             "Mean_coverage","SD","CISC", "CIEC", "coverage_continuity")

  if ( identical(colnames(cirBed), name) == FALSE ) {
    colnames(cirBed) = name
  }

  cirBed = filter(cirBed, circle_score >= scores) %>%
           filter(dis_cord >= discord) %>%
           filter(split_reads >= splitreads)

  cirBed <- cirBed[which( (cirBed$end_coord - cirBed$start_coord) <= length),]
  return (cirBed)
}
huangyizR/test documentation built on June 17, 2020, 12:32 a.m.