R/binBoundary.R

Defines functions mappable_data_1 mappable_data bin_boundaries

Documented in bin_boundaries

#'Determine bin boundaries
#'
#'Make bin boundary files: bin.boundaries.nk.sorted.txt
#'@param data_dir The directory that contains mappable region file (mappable.regions.txt) generated by simulation of reference genome.
#' Also contains the output files: bin boundary file (bin.boundaries.nk.sorted.txt), mappable.regions.sorted.txt, chrom.mappable.txt.
#'@param bincount An integer. The number of bins. Default value: 5000.
#'@export


bin_boundaries <- function(data_dir, bincount = 5000){
  mappable_data(data_dir)
  mappable_data_1(data_dir)
  rPython::python.load(system.file("bin_boundary.py", package = "SCclust"))
  rPython::python.call("bin_boundary", data_dir, bincount)
  nK <- bincount/1000
  cmd <- paste("sort -k 3,3n ", data_dir, "/bin.boundaries.txt > ", data_dir,
               "/bin.boundaries.", nK, "k.sorted.txt", sep = "")
  system(cmd)
  cmd2 <- paste("rm -rf ", data_dir, "/bin.boundaries.txt", sep = "")
  system(cmd2)
}



## generate mappable.regions.sorted.txt and chrom.mappable.txt

mappable_data <- function(data_dir){
  file1 <- paste(data_dir, "/mappable.regions.txt", sep = "")
  file2 <- paste(data_dir, "/mappable.regions.sorted.txt", sep = "")
  cmd <- paste("sort -k 1,1 -k 2,2n ", file1, " > ", file2, sep = "")
  system(cmd)
  print("sorted ... mappable.regions.sorted.txt generated!")}

mappable_data_1 <- function(data_dir){
  temp_dat <- read.table(paste(data_dir, "/mappable.regions.sorted.txt", sep = ""), header = F, sep = "\t")
  temp_dat$length <- temp_dat[,3] - temp_dat[,2]
  chrMAP <- aggregate(temp_dat$length, by = list(temp_dat[,1]), FUN = sum, na.rm = TRUE)
  file_name <- paste(data_dir, "/chrom.mappable.txt", sep = "")
  write.table(chrMAP,file = file_name, sep = "\t", col.names = F,row.names = F,quote = F)
  print("chrom.mappable.txt generated!")
}
JunyanSong/SCclust documentation built on April 16, 2022, 8:44 p.m.