R/rnaseqCounts.R

Defines functions rnaseqCounts

Documented in rnaseqCounts

#' @title Running RNAseq counting workflow for a single sample
#' @description This function executes a set of docker containers allowing the generation of gene and isoforms counts for a single sample.
#' #params skewer
#' @param group, a character string. Two options: \code{"sudo"} or \code{"docker"}, depending to which group the user belongs
#' @param fastq.folder, a character string indicating where gzip fastq files are located
#' @param scratch.folder, a character string indicating the scratch folder where docker container will be mounted
#' @param adapter5, a character string indicating the fwd adapter
#' @param adapter3, a character string indicating the rev adapter
#' @param seq.type, a character string indicating the type of reads to be generated by the sequencer. Two options: \code{"se"} or \code{"pe"} respectively for single end and pair end sequencing.
#' @param threads, a number indicating the number of cores to be used from the application
#' @param min.length, a number indicating minimal length required to return a trimmed read
#' #params rsemstar
#' @param genome.folder, a character string indicating the folder where the indexed reference genome is located. IMPORTANT the present function only suport genomic indexes made using ensembl genom and the corresponding gtf
#' @param strandness, a character string indicating the type ofsequencing protocol used for the analysis. Three options: \code{"none"}, \code{"forward"}, \code{"reverse"} respectively for non strand selection, reverse for Illumina strandness protocols, reverse for ACCESS Illumina protocol
#' @param save.bam, a boolean value, TRUE or FALSE, to save also BAM files generated by STAR and RSEM
#' #params rsemanno
#' @param org, a character string indicating the genome assembly used for mapping and counting with \code{"rsemstar"} function only required for biocENSEMBL based annotation
#' @param annotation.type, a character string. Two options: \code{"biocENSEMBL"} or \code{"gtfENSEMBL"}. \code{"biocENSEMBL"} will annotate by Bioconductor only protein coding genes. \code{"gtfENSEMBL"} will annotate all RNAs described in \code{"annotation.type"}
#' @author Raffaele Calogero
#'
#' @return Returns the output of skewer, rsemstar, rsemannos' functions
#' @examples
#'\dontrun{
#' system("wget http://130.192.119.59/public/test_R1.fastq.gz")
#' library(docker4seq)
#' rnaseqCounts(group="docker",fastq.folder=getwd(), scratch.folder="/data/scratch/",
#'             adapter5="AGATCGGAAGAGCACACGTCTGAACTCCAGTCA",
#'             adapter3="AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT",
#'             seq.type="se", threads=24,  min.length=40,
#'             genome.folder="/data/genomes/hg38star", strandness="none", save.bam=FALSE,
#'             org="hg38", annotation.type="gtfENSEMBL")
#' }
#' @export
rnaseqCounts<- function( group="sudo",fastq.folder=getwd(), scratch.folder="/data/scratch", threads=4,
      adapter5,adapter3,seq.type="pe",   min.length=40, genome.folder="/data/genomes/hg38star",
      strandness="none", save.bam=TRUE, org="hg38", annotation.type="gtfENSEMBL"){

  
  #storing the position of the home folder
  home <- getwd()


  #FastQC
  fastqc(group="docker", data.folder=fastq.folder)
  setwd(fastq.folder)

  #initialize status
  system("echo 0 > ExitStatusFile 2>&1")
  
  #trimming adapter
  skewer(group=group,fastq.folder=fastq.folder, scratch.folder=scratch.folder,adapter5=adapter5, adapter3=adapter3, seq.type=seq.type, threads=threads,  min.length=min.length)
  #running rsemstar
  rsemstar(group=group,fastq.folder=fastq.folder, scratch.folder=scratch.folder, genome.folder=genome.folder, seq.type=seq.type, strandness=strandness,threads=threads, save.bam=save.bam)
  #running annotation
  if(annotation.type=="biocUCSC"){
#    rsemanno(group=group,rsem.folder=fastq.folder, scratch.folder=scratch.folder, org=org, truncating.expected.counts=truncating.expected.counts, protein.anno=FALSE)
  }else if(annotation.type=="gtfENSEMBL"){
    rsemannoByGtf(group=group, rsem.folder=fastq.folder, genome.folder=genome.folder)
  }else{
    cat("\nERROR: an annotatin function not implemented was selected\n")

    system("echo 1 > ExitStatusFile 2>&1")
    setwd(home)
    return(1)
  }
  setwd(fastq.folder)
  system(paste("cp ",paste(path.package(package="docker4seq"),"containers/containers.txt",sep="/")," ",fastq.folder, sep=""))
  system("rm *.fastq",intern = TRUE)
  system("rm *trimmed-pair*",intern =TRUE)
  
  system("echo 0 > ExitStatusFile 2>&1")
  setwd(home)
  return(0)
  
}
kendomaniac/docker4seq documentation built on April 8, 2024, 5:39 p.m.