#'Determine bin boundaries
#'
#'Make bin boundary files: bin.boundaries.nk.sorted.txt
#'@param data_dir The directory that contains mappable region file (mappable.regions.txt) generated by simulation of reference genome.
#' Also contains the output files: bin boundary file (bin.boundaries.nk.sorted.txt), mappable.regions.sorted.txt, chrom.mappable.txt.
#'@param bincount An integer. The number of bins. Default value: 5000.
#'@export
bin_boundaries <- function(data_dir, bincount = 5000){
mappable_data(data_dir)
mappable_data_1(data_dir)
rPython::python.load(system.file("bin_boundary.py", package = "SCclust"))
rPython::python.call("bin_boundary", data_dir, bincount)
nK <- bincount/1000
cmd <- paste("sort -k 3,3n ", data_dir, "/bin.boundaries.txt > ", data_dir,
"/bin.boundaries.", nK, "k.sorted.txt", sep = "")
system(cmd)
cmd2 <- paste("rm -rf ", data_dir, "/bin.boundaries.txt", sep = "")
system(cmd2)
}
## generate mappable.regions.sorted.txt and chrom.mappable.txt
mappable_data <- function(data_dir){
file1 <- paste(data_dir, "/mappable.regions.txt", sep = "")
file2 <- paste(data_dir, "/mappable.regions.sorted.txt", sep = "")
cmd <- paste("sort -k 1,1 -k 2,2n ", file1, " > ", file2, sep = "")
system(cmd)
print("sorted ... mappable.regions.sorted.txt generated!")}
mappable_data_1 <- function(data_dir){
temp_dat <- read.table(paste(data_dir, "/mappable.regions.sorted.txt", sep = ""), header = F, sep = "\t")
temp_dat$length <- temp_dat[,3] - temp_dat[,2]
chrMAP <- aggregate(temp_dat$length, by = list(temp_dat[,1]), FUN = sum, na.rm = TRUE)
file_name <- paste(data_dir, "/chrom.mappable.txt", sep = "")
write.table(chrMAP,file = file_name, sep = "\t", col.names = F,row.names = F,quote = F)
print("chrom.mappable.txt generated!")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.