R/snvPreprocessing.R

Defines functions snvPreprocessing

Documented in snvPreprocessing

#' @title Running indel relignment and quality recalibration on bam generated by bwa function NOT READY TO GO ON STABLE
#' @description This function executes the docker container snvPreprocessing. IMPORTANT: the working folder require the presence of GenomeAnalysisTK.tar.bz2 files
#'
#' @param group, a character string. Two options: \code{"sudo"} or \code{"docker"}, depending to which group the user belongs
#' @param bam.folder, a character string indicating where bam file generated by bwa is located
#' @param scratch.folder, a character string indicating the scratch folder where docker container will be mounted
#' @param genome.folder, a character string indicating the folder where the index reference genome for bwa is located
#' @param gatk.file, a character string for GenomeAnalysisTK-X.Y.tar.bz2, this file should be located in the bam.folder
#' @param threads, a number indicating the number of cores to be used by the application
#' @author Raffaele Calogero
#'
#' @return bam and bai after indel relignment and quality recalibration.
#' @examples
#'\dontrun{
#'     #downloading examples 1 million reads of mcf7 exome mixed with 1 million of mouse derived by human exome capturing
#'     system("wget http://130.192.119.59/public/hs1m_mm1m_R1.fastq.gz")
#'     system("wget http://130.192.119.59/public/hs1m_mm1m_R2.fastq.gz")
#'     
#'     #required for bwa 61Gb At the present time this is required to run mutect1
#'     system("wget http://130.192.119.59/public/hg19_exome.tar.gz")
#'     #this genome index is prepared to be used for indel realignment and quality recalibration
#'     
#'     #running snvPreprocessing
#'     snvPreprocessing(group="docker",bam.folder=getwd(), 
#'                scratch.folder="/data/scratch",
#'                genome.folder="/data/scratch/hg19_exome", 
#'                threads=24, gatk.file="GenomeAnalysisTK-3.7.tar.bz2")
#'
#' }
#' @export
snvPreprocessing <- function(group=c("sudo","docker"), bam.folder=getwd(), scratch.folder="/data/scratch", genome.folder, threads=8, gatk.file){

    home <- getwd()
    setwd(bam.folder)
  
    #initialize status
    system("echo 0 > ExitStatusFile 2>&1")
    
    #renaming gatk.file
    system(paste("mv ",gatk.file, " GenomeAnalysisTK.tar.bz2", sep=""))
    #running time 1
    ptm <- proc.time()
    #running time 1
    test <- dockerTest()
    if(!test){
      cat("\nERROR: Docker seems not to be installed in your system\n")
      system("echo 10 > ExitStatusFile 2>&1")
      setwd(home)
      return(10)
    }
    #########check scratch folder exist###########
    if (!file.exists(scratch.folder)){
      cat(paste("\nIt seems that the ",scratch.folder, "folder does not exist\n"))
      system("echo 3 > ExitStatusFile 2>&1")
      setwd(home)
      return(3)
    }
    #############################################
    tmp.folder <- gsub(":","-",gsub(" ","-",date()))
    scrat_tmp.folder=file.path(scratch.folder, tmp.folder)
    writeLines(scrat_tmp.folder,paste(bam.folder,"/tempFolderID", sep=""))
    cat("\ncreating a folder in scratch folder\n")
    dir.create(file.path(scratch.folder, tmp.folder))
    dir.create(file.path(scratch.folder, tmp.folder,"/tmp"))
    dir <- dir(path=bam.folder)
    dir.info <- dir[which(dir=="run.info")]
    if(length(dir.info)>0){
      system(paste("chmod 777 -R", file.path(scratch.folder, tmp.folder)))
      system(paste("cp ",bam.folder,"/run.info ", scratch.folder,"/",tmp.folder,"/run.info", sep=""))

    }
    dir <- dir[grep("dedup_reads.bam", dir)]
    cat("\ncopying \n")
    if(length(dir)==0){
      cat(paste("It seems that in ", bam.folder, "there is not a bam file files"))
      system("echo 1 > ExitStatusFile 2>&1")
      setwd(home)
      return(1)
    }else if(length(dir)>1){
      cat(paste("It seems that in ", bam.folder, "there is more than one bam file"))
      system("echo 2 > ExitStatusFile 2>&1")
      setwd(home)
      return(2)
    }else if(length(dir)==1){
        system(paste("chmod 777 -R", file.path(scratch.folder, tmp.folder)))
        system(paste("cp ",bam.folder,"/",dir[1], " ",scratch.folder,"/",tmp.folder,"/", sep=""))
        system(paste("cp ",bam.folder,"/",sub("bam$","bai$", dir[1]), " ",scratch.folder,"/",tmp.folder,"/", sep=""))
        system(paste("cp GenomeAnalysisTK.tar.bz2 ",scratch.folder,"/",tmp.folder,"/", sep=""))
    }            
    system(paste("chmod 777 -R", file.path(scratch.folder, tmp.folder)))

    #fastq  linking fpr docker
    docker_bam.folder=file.path("/data/scratch", tmp.folder)
    cat("\nsetting as working dir the scratch folder and running xenome docker container\n")
    

	 params <- paste("--cidfile ",bam.folder,"/dockerID -v ",docker_bam.folder,":/data/scratch -v ",genome.folder,":/genome -d docker.io/repbioinfo/snvpre.2017.01 sh /bin/gatk.sh ", threads," ",bam.folder, sep="")
	 resultRun <- runDocker(group=group, params=params)

    if(resultRun==0){
      system(paste("cp ", scrat_tmp.folder, "/* ", bam.folder, sep=""))
    }

    #running time 2
    ptm <- proc.time() - ptm
    con <- file(paste(bam.folder,"run.info", sep="/"), "r")
    tmp.run <- readLines(con)
    close(con)
    tmp.run[length(tmp.run)+1] <- paste("user run time mins ",ptm[1]/60, sep="")
    tmp.run[length(tmp.run)+1] <- paste("system run time mins ",ptm[2]/60, sep="")
    tmp.run[length(tmp.run)+1] <- paste("elapsed run time mins ",ptm[3]/60, sep="")
    writeLines(tmp.run,paste(bam.folder,"run.info", sep="/"))

    #saving log and removing docker container
    container.id <- readLines(paste(bam.folder,"/dockerID", sep=""), warn = FALSE)
 #   system(paste("docker logs ", container.id, " >& ", substr(container.id,1,12),".log", sep=""))
    system(paste("docker logs ", container.id, " >& ","snvPreprocessing_",substr(container.id,1,12),".log", sep=""))
    system(paste("docker rm ", container.id, sep=""))
    
    
    
    #removing temporary folder
    cat("\n\nRemoving the xenome temporary file ....\n")
    system(paste("rm -R ",scrat_tmp.folder))
    system(paste("rm  -f ",bam.folder,"/dockerID", sep=""))
    system(paste("rm  -f ",bam.folder,"/tempFolderID", sep=""))
    
    system(paste("cp ",paste(path.package(package="docker4seq"),"containers/containers.txt",sep="/")," ",bam.folder, sep=""))
    setwd(home)
}
kendomaniac/docker4seq documentation built on April 29, 2024, 9:16 a.m.