R/split_pacbam_bychrom.R

Defines functions split_pacbam_bychrom

Documented in split_pacbam_bychrom

#' Split PaCBAM ouputs by chromosomes
#' @param targetbed Genomic regions in the BED tab-delimited format.
#' @param pacbamfolder Folder with original outputs generated by PaCBAM. This folder should contain both .pabs and .pileups files.
#' @param pacbamfolder_bychrom Output folder in which subfolders for each sample will be created.
#' @return Original data from PaCBAM split by chromosome in separate files, these are saved in two sub-folders 'pileup' and 'snvs', organized by sample.
#' @import data.table
#' @examples
#' targetbed <- system.file("extdata", "regions_toy.bed", package = "abemus")
#' pacbamfolder <- system.file("extdata", "pacbam_data", package = "abemus")
#' pacbamfolder_bychrom <- tempdir()
#' split_pacbam_bychrom(targetbed,pacbamfolder,pacbamfolder_bychrom)
#' @export
split_pacbam_bychrom <- function(targetbed,
                                 pacbamfolder,
                                 pacbamfolder_bychrom){

  old_wd <- getwd()
  on.exit(setwd(old_wd))

  pacbamlist = list.files(pacbamfolder,full.names = TRUE, pattern = "\\.pabs$|\\.pileup$")
  samplelist = unique(gsub(basename(pacbamlist),pattern = '.pabs|.pileup',replacement = ''))

  message(paste("[",Sys.time(),"]\tReading chromosomes from 'targetbed'"))
  CHR <- bed2positions(targetbed = targetbed,get_only_chromosomes = TRUE)[[1]]

  for(sname in samplelist){
    dirname <- sname
    message(paste("[",Sys.time(),"]\t",dirname,"writing pileup_bychrom"))
    dir.create(file.path(pacbamfolder_bychrom,dirname))
    dir.create(file.path(pacbamfolder_bychrom,dirname,"pileup"))
    setwd(file.path(pacbamfolder_bychrom,dirname,"pileup"))
    pileup_list = grep(pacbamlist,pattern = "\\.pileup$",value = TRUE)
    pileup_file = grep(pileup_list,pattern = paste(dirname,"pileup",sep="."),value = TRUE)
    m <- fread(file = pileup_file,header = TRUE,stringsAsFactors = FALSE,data.table = FALSE,showProgress = FALSE)
    out <- split(m, f = m[,1] )
    sapply(names(out),function (x) write.table(out[[x]],file=gsub(basename(pileup_file),pattern = ".pileup",replacement = paste0("_chr",gsub(x,pattern = "chr",replacement = ""),".pileup")),quote=FALSE,col.names=TRUE,row.names = FALSE,sep = "\t"))

    message(paste("[",Sys.time(),"]\t",dirname,"writing pabs_bychrom"))
    dir.create(file.path(pacbamfolder_bychrom,dirname,"snvs"))
    setwd(file.path(pacbamfolder_bychrom,dirname,"snvs"))
    pabs_list = grep(pacbamlist,pattern = "\\.pabs$",value = TRUE)
    pabs_file = grep(pabs_list,pattern = paste(dirname,"pabs",sep="."),value = TRUE)
    m <- fread(file = pabs_file,header = TRUE,stringsAsFactors = FALSE,data.table = FALSE,showProgress = FALSE)
    out <- split(m, f = m[,1] )
    sapply(names(out),function (x) write.table(out[[x]],file=gsub(basename(pabs_file),pattern = ".pabs",replacement = paste0("_chr",gsub(x,pattern = "chr",replacement = ""),".pabs")),quote=FALSE,col.names=TRUE,row.names = FALSE,sep = "\t"))
  }
}

Try the abemus package in your browser

Any scripts or data that you put into this service are public.

abemus documentation built on Dec. 19, 2019, 1:07 a.m.