R/xenome.R

Defines functions xenome

Documented in xenome

#' @title Running xenome, https://github.com/data61/gossamer/
#' @description This function executes the docker container bwa1 where BWA is installed BWA is a read alignment package that efficiently align short sequencing reads against a large reference sequence This aligner provides optimal results with DNA-seq data
#'
#' @param group, a character string. Two options: \code{"sudo"} or \code{"docker"}, depending to which group the user belongs
#' @param fastq.folder, a character string indicating where gzip fastq files are located
#' @param scratch.folder, a character string indicating the scratch folder where docker container will be mounted
#' @param xenome.folder, a character string indicating the folder where the indexed reference genomes generated by xenome  are locates
#' @param seq.type, a character string indicating the type of reads to be trimmed. Two options: \code{"se"} or \code{"pe"} respectively for single end and pair end sequencing
#' @param threads, a number indicating the number of cores to be used from the application
#' @author Raffaele Calogero
#'
#' @return ambiguous, both, neither, hs and mm fastq.gz files. xeno_hs_R1.fastq.gz and xeno_hs_R2.fastq.gz are fastq file free of mouse reads and are used for further analysis.
#' @examples
#'\dontrun{
#'     #downloading examples 1 million reads of mcf7 exome mixed with 1 million of mouse derived by human exome capturing
#'     system("wget http://130.192.119.59/public/hs1m_mm1m_R1.fastq.gz")
#'     system("wget http://130.192.119.59/public/hs1m_mm1m_R2.fastq.gz")
#'     #running xenome
#'     xenome(group="docker",fastq.folder=getwd(), scratch.folder="/data/scratch",
#'     xenome.folder="/data/scratch/hg19.mm10", seq.type="pe",
#'     threads=24)
#'
#' }
#' @export
xenome <- function(group=c("sudo","docker"),fastq.folder=getwd(), scratch.folder="/data/scratch", xenome.folder, seq.type="pe", threads=1){

    home <- getwd()
    setwd(fastq.folder)

    #initialize status
    system("echo 0 > ExitStatusFile 2>&1")

    #running time 1
    ptm <- proc.time()
    #running time 1
    test <- dockerTest()
    if(!test){
      cat("\nERROR: Docker seems not to be installed in your system\n")
      system("echo 10 > ExitStatusFile 2>&1")
      setwd(home)
      return(10)
    }
    #########check scratch folder exist###########
    if (!file.exists(scratch.folder)){
      cat(paste("\nIt seems that the ",scratch.folder, "folder does not exist\n"))
      system("echo 3 > ExitStatusFile 2>&1")
      setwd(home)
      return(3)
    }
    #############################################
    tmp.folder <- gsub(":","-",gsub(" ","-",date()))
    scrat_tmp.folder=file.path(scratch.folder, tmp.folder)
    writeLines(scrat_tmp.folder,paste(fastq.folder,"/tempFolderID", sep=""))
    cat("\ncreating a folder in scratch folder\n")
    dir.create(file.path(scratch.folder, tmp.folder))
    dir.create(file.path(scratch.folder, tmp.folder,"/tmp"))
    dir <- dir(path=fastq.folder)
    dir.info <- dir[which(dir=="run.info")]
    if(length(dir.info)>0){
      system(paste("chmod 777 -R", file.path(scratch.folder, tmp.folder)))
      system(paste("cp ",fastq.folder,"/run.info ", scratch.folder,"/",tmp.folder,"/run.info", sep=""))

    }
    dir <- dir[grep(".fastq.gz$", dir)]
    cat("\ncopying \n")
    if(length(dir)==0){
      cat(paste("It seems that in ", fastq.folder, "there are not fastq.gz files"))
      system("echo 1 > ExitStatusFile 2>&1")
      setwd(home)
      return(1)
    }else if(length(dir)>2){
      cat(paste("It seems that in ", fastq.folder, "there are more than two fastq.gz files"))
      system("echo 2 > ExitStatusFile 2>&1")
      setwd(home)
      return(2)
    }else if(length(dir)==2 & seq.type=="pe"){
        system(paste("chmod 777 -R", file.path(scratch.folder, tmp.folder)))
        system(paste("cp ",fastq.folder,"/",dir[1], " ",scratch.folder,"/",tmp.folder,"/data_R1.fastq.gz", sep=""))
        system(paste("cp ",fastq.folder,"/",dir[2], " ",scratch.folder,"/",tmp.folder,"/data_R2.fastq.gz", sep=""))
        system(paste("chmod 777 -R", file.path(scratch.folder, tmp.folder)))
    }else if(length(dir)==1 & seq.type=="se"){
      system(paste("chmod 777 -R", file.path(scratch.folder, tmp.folder)))
      system(paste("cp ",fastq.folder,"/",dir[1], " ",scratch.folder,"/",tmp.folder,"/data_R1.fastq.gz", sep=""))
      system(paste("chmod 777 -R", file.path(scratch.folder, tmp.folder)))
    }
    #fastq  linking fpr docker
    docker_fastq.folder=file.path(scratch.folder, tmp.folder)
    cat("\nsetting as working dir the scratch folder and running xenome docker container\n")

    if(seq.type=="pe"){
    	if(group=="sudo"){
		      params <- paste("--cidfile ",fastq.folder,"/dockerID -v ",docker_fastq.folder,":/data/scratch -v ",xenome.folder,":/xenome -d docker.io/repbioinfo/xenome.2017.01 sh /bin/xenome_pe.sh ", threads," ",fastq.folder, sep="")
		      resultRun <- runDocker(group="sudo", params=params)
	    }else{
	      params <- paste("--cidfile ",fastq.folder,"/dockerID -v ",docker_fastq.folder,":/data/scratch -v ",xenome.folder,":/xenome -d docker.io/repbioinfo/xenome.2017.01 sh /bin/xenome_pe.sh ", threads," ",fastq.folder, sep="")
	      resultRun <- runDocker(group="docker", params=params)
	    }
	  }else if(seq.type=="se"){
	    if(group=="sudo"){
	      params <- paste("--cidfile ",fastq.folder,"/dockerID -v ",docker_fastq.folder,":/data/scratch -v ",xenome.folder,":/xenome -d docker.io/repbioinfo/xenome.2017.01 sh /bin/xenome_se.sh ", threads," ",fastq.folder, sep="")
	      resultRun <- runDocker(group="sudo", params=params)
		  }else{
		    params <- paste("--cidfile ",fastq.folder,"/dockerID -v ",docker_fastq.folder,":/data/scratch -v ",xenome.folder,":/xenome -d docker.io/repbioinfo/xenome.2017.01 sh /bin/xenome_se.sh ", threads," ",fastq.folder, sep="")
		    resultRun <- runDocker(group="docker", params=params)
		  }
	  }
    if(resultRun==0){
      system(paste("cp -R ", docker_fastq.folder, "/* ", fastq.folder, sep=""))
    }
    #running time 2
    ptm <- proc.time() - ptm
    con <- file(paste(fastq.folder,"run.info", sep="/"), "r")
    tmp.run <- readLines(con)
    close(con)
    tmp.run[length(tmp.run)+1] <- paste("user run time mins ",ptm[1]/60, sep="")
    tmp.run[length(tmp.run)+1] <- paste("system run time mins ",ptm[2]/60, sep="")
    tmp.run[length(tmp.run)+1] <- paste("elapsed run time mins ",ptm[3]/60, sep="")
    writeLines(tmp.run,paste(fastq.folder,"run.info", sep="/"))

    #saving log and removing docker container
    container.id <- readLines(paste(fastq.folder,"/dockerID", sep=""), warn = FALSE)
#    system(paste("docker logs ", container.id, " >& ", substr(container.id,1,12),".log", sep=""))
    system(paste("docker logs ", container.id, " >& ","xenome_",substr(container.id,1,12),".log", sep=""))
    system(paste("docker rm ", container.id, sep=""))



    #removing temporary folder
    cat("\n\nRemoving the xenome temporary file ....\n")
    system(paste("rm -R ",docker_fastq.folder))
    system(paste("rm  -f ",fastq.folder,"/dockerID", sep=""))
    system(paste("rm  -f ",fastq.folder,"/tempFolderID", sep=""))

    system(paste("cp ",paste(path.package(package="docker4seq"),"containers/containers.txt",sep="/")," ",fastq.folder, sep=""))
    setwd(home)

}
kendomaniac/docker4seq documentation built on Oct. 16, 2023, 2:30 a.m.