R/salmonCounts.R

Defines functions salmonCounts

Documented in salmonCounts

#' @title A function to handle a salmon docker container
#' @description This function executes a docker that produces as output the transcripts count file generated by Salmon quasi-alignment
#' @param group, a character string. Two options: sudo or docker, depending to which group the user belongs
#' @param scratch.folder, a character string indicating the path of the scratch folder
#' @param fastq.folder, a character string indicating the folder where input data are located and where output will be written
#' @param threads, a number indicating the number of cores to be used from the application
#' @param seq.type, a character string indicating the type of reads to be generated by the sequencer. Two options: \code{"se"} or \code{"pe"} respectively for single end and pair end sequencing. Strandness is inferred by salmon.
#' @param index.folder, a character string indicating the folder where transcriptome index was created with salmonIndex.
#' @param strandness, a character string indicating the type ofsequencing protocol used for the analysis. Three options: \code{"none"}, \code{"forward"}, \code{"reverse"} respectively for non strand selection, reverse for Illumina strandness protocols, reverse for ACCESS Illumina protocol
#' @author Raffaele Calogero, raffaele.calogero [at] unito [dot] it, Bioinformatics and Genomics unit, University of Torino Italy
#'
#' @examples
#' \dontrun{
#' system("wget http://130.192.119.59/public/test_R1.fastq.gz")
#' system("wget http://130.192.119.59/public/test_R2.fastq.gz")

#' library(docker4seq)
#' wrapperSalmon(group="docker", scratch.folder="/data/scratch/",
#'               fastq.folder=getwd(), index.folder="/data/genomes/hg38salmon",
#'               threads=24, seq.type="pe", adapter5="AGATCGGAAGAGCACACGTCTGAACTCCAGTCA",
#'               adapter3="AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT", min.length=40, strandness="none")
#' }
#'
#' @export
salmonCounts <- function(group=c("sudo","docker"), scratch.folder, fastq.folder, index.folder, threads=8, seq.type=c("se","pe"), strandness=c("none", "forward", "reverse")){


  #storing the position of the home folder
  home <- getwd()
  #running time 1
  ptm <- proc.time()
  #setting the data.folder as working folder
  if (!file.exists(fastq.folder)){
    cat(paste("\nIt seems that the ",fastq.folder, " folder does not exist\n"))
    return(2)
  }
  setwd(fastq.folder)
  #initialize status
  system("echo 0 > ExitStatusFile 2>&1")

  #testing if docker is running
  test <- dockerTest()
  if(!test){
    cat("\nERROR: Docker seems not to be installed in your system\n")
    system("echo 10 > ExitStatusFile 2>&1")
    setwd(home)
    return(10)
  }

  #check  if scratch folder exist
  if (!file.exists(scratch.folder)){
    cat(paste("\nIt seems that the ",scratch.folder, " folder does not exist\n"))
    system("echo 3 > ExitStatusFile 2>&1")
    setwd(home)
    return(3)
  }
  tmp.folder <- gsub(":","-",gsub(" ","-",date()))
  scrat_tmp.folder=file.path(scratch.folder, tmp.folder)
  writeLines(scrat_tmp.folder,paste(fastq.folder,"/tempFolderID", sep=""))
  cat("\ncreating a folder in scratch folder\n")
  dir.create(file.path(scrat_tmp.folder))

  dir <- dir()
  dir <- dir[grep(".fastq.gz$", dir)]
  dir.trim <- dir[grep("trimmed", dir)]
  cat("\ncopying \n")
  if(length(dir)==0){
    cat(paste("It seems that in ", fastq.folder, "there are not fastq.gz files"))
    return(1)
  }else if(length(dir.trim)>0){
    dir <- dir.trim
    system(paste("chmod 777 -R", file.path(scratch.folder, tmp.folder)))
    for(i in dir){
      system(paste("cp ",fastq.folder,"/",i, " ",scratch.folder,"/",tmp.folder,"/",i, sep=""))
    }
    system(paste("chmod 777 -R", file.path(scratch.folder, tmp.folder)))
  }else if(length(dir)>2){
    cat(paste("It seems that in ", fastq.folder, "there are more than two fastq.gz files"))
    system("echo 2 > ExitStatusFile 2>&1")
    setwd(home)
    return(2)
  }else{
    system(paste("chmod 777 -R", file.path(scratch.folder, tmp.folder)))
    for(i in dir){
      system(paste("cp ",fastq.folder,"/",i, " ",scratch.folder,"/",tmp.folder,"/",i, sep=""))
    }
    system(paste("chmod 777 -R", file.path(scratch.folder, tmp.folder)))
  }
  #Trimmed fastq  linking fpr docker
  docker_fastq.folder=scrat_tmp.folder
  #Trimmed fastq  linking fpr docker
  system(paste("gzip -d ",docker_fastq.folder,"/*.gz", sep=""))
  fastq <- sub(".gz$", "", dir)
  cat("\nsetting as working dir the scratch folder and running  docker container\n")
  #executing the docker job
  if(strandness=="none"){
  if(group=="sudo"){
      if(seq.type=="pe"){
          params <- paste("--cidfile ",fastq.folder,"/dockerID -v ",docker_fastq.folder,":/data/scratch -v ",index.folder,":/index -d docker.io/repbioinfo/salmon.2017.01 sh /bin/salmon_countsPE.sh ", threads," ", fastq[1]," ", fastq[2]," ", fastq.folder, sep="")
          resultRun <- runDocker(group="sudo", params=params)
      }else{
          params <- paste("--cidfile ",fastq.folder,"/dockerID -v ",docker_fastq.folder,":/data/scratch -v ",index.folder,":/index -d docker.io/repbioinfo/salmon.2017.01 sh /bin/salmon_countsSE.sh ", threads," ", fastq[1]," ", fastq.folder, sep="")
          resultRun <- runDocker(group="sudo", params=params)
      }
    }else{
      if(seq.type=="pe"){
        params <- paste("--cidfile ",fastq.folder,"/dockerID -v ",docker_fastq.folder,":/data/scratch -v ",index.folder,":/index -d docker.io/repbioinfo/salmon.2017.01 sh /bin/salmon_countsPE.sh ", threads," ", fastq[1]," ", fastq[2]," ", fastq.folder, sep="")
        resultRun <- runDocker(group="docker", params=params)
      }else{
        params <- paste("--cidfile ",fastq.folder,"/dockerID -v ",docker_fastq.folder,":/data/scratch -v ",index.folder,":/index -d docker.io/repbioinfo/salmon.2017.01 sh /bin/salmon_countsSE.sh ", threads," ", fastq[1]," ", fastq.folder, sep="")
        resultRun <- runDocker(group="docker", params=params)
      }
    }
  }else{
    cat("\nNot implemented, yet\n")
    system("echo 11 > ExitStatusFile 2>&1")
    setwd(home)
    return(11)
  }


  #waiting for the end of the container work
  if(resultRun==0){
    #not saving fastq files
    dir.tmp <- dir(scrat_tmp.folder)
    dir.tmp <- setdiff(dir.tmp, dir.tmp[grep("fastq",dir.tmp)])
    for(i in dir.tmp){
      system(paste("cp ", scrat_tmp.folder, "/", i, " " , fastq.folder, sep=""))
    }

    #saving log and removing docker container
    container.id <- readLines(paste(fastq.folder,"/dockerID", sep=""), warn = FALSE)
    system(paste("docker logs ", substr(container.id,1,12), " &> ","salmonCounts_",substr(container.id,1,12),".log", sep=""))
    system(paste("docker rm ", container.id, sep=""))
    #removing temporary folder
    cat("\n\nRemoving the temporary file ....\n")
    system(paste("rm -R ",scrat_tmp.folder))
#    system("rm -fR out.info")
    system("rm -fR dockerID")
    system("rm  -fR tempFolderID")
    system(paste("cp ",paste(path.package(package="docker4seq"),"containers/containers.txt",sep="/")," ",fastq.folder, sep=""))
  }
  #running time 2
  ptm <- proc.time() - ptm
  dir <- dir(fastq.folder)
  dir <- dir[grep("run.info",dir)]
  if(length(dir)>0){
    con <- file("run.info", "r")
    tmp.run <- readLines(con)
    close(con)
    tmp.run[length(tmp.run)+1] <- paste("user run time mins ",ptm[1]/60, sep="")
    tmp.run[length(tmp.run)+1] <- paste("system run time mins ",ptm[2]/60, sep="")
    tmp.run[length(tmp.run)+1] <- paste("elapsed run time mins ",ptm[3]/60, sep="")
    writeLines(tmp.run,"run.info")
  }else{
    tmp.run <- NULL
    tmp.run[1] <- paste("run time mins ",ptm[1]/60, sep="")
    tmp.run[length(tmp.run)+1] <- paste("system run time mins ",ptm[2]/60, sep="")
    tmp.run[length(tmp.run)+1] <- paste("elapsed run time mins ",ptm[3]/60, sep="")

    writeLines(tmp.run,"run.info")
  }


  setwd(home)
}
kendomaniac/docker4seq documentation built on July 15, 2024, 12:02 a.m.