R/wrapperPdx.R

Defines functions wrapperPdx

Documented in wrapperPdx

#' @title Running PDX data preprocessing TO BE REVISED
#' @description This function executes xenome, to remove mouse data, skewer, to trim adapters, bwa, to map reads to hg19 and to mark duplicates. IMPORTANT to prepare data for mutect v1 analysis it is mandatory to download the hg19 index archive indicated in the example.
#'
#' @param group, a character string. Two options: \code{"sudo"} or \code{"docker"}, depending to which group the user belongs
#' @param fastq.folder, a character string indicating where gzip fastq files are located
#' @param scratch.folder, a character string indicating the scratch folder where docker container will be mounted
#' @param xenome.folder, a character string indicating the folder where the indexed reference genomes generated by xenome  are locates
#' @param seq.type, a character string indicating the type of reads to be trimmed. Two options: \code{"se"} or \code{"pe"} respectively for single end and pair end sequencing
#' @param threads, a number indicating the number of cores to be used from the application
#'
#' @param adapter5, a character string indicating the fwd adapter
#' @param adapter3, a character string indicating the rev adapter
#' @param min.length, a number indicating minimal length required to return a trimmed read
#'
#' @param genome.folder, a character string indicating the folder where the indexed reference genome for bwa is located
#' @param sample.id, a character string indicating the unique id to be associated to the bam that will be created. IMPORTANT it is necessary to have a sample.id for each sample for further analysis.
#' @author Raffaele Calogero
#'
#'
#' @return three files: dedup_reads.bam, which is sorted and duplicates marked bam file, dedup_reads.bai, which is the index of the dedup_reads.bam, and dedup_reads.stats, which provides mapping statistics
#' @examples
#'\dontrun{
#'     #downloading examples 1 million reads of mcf7 exome mixed with 1 million of mouse derived by human exome capturing
#'     system("wget http://130.192.119.59/public/hs1m_mm1m_R1.fastq.gz")
#'     system("wget http://130.192.119.59/public/hs1m_mm1m_R2.fastq.gz")
#'
#'     #required for bwa 61Gb At the present time this is required to run mutect1
#'     system("wget http://130.192.119.59/public/hg19_exome.tar.gz")
#'
#'     #running wrapperPdx
#'     wrapperPdx(group="docker",fastq.folder=getwd(), scratch.folder="/data/scratch",
#'     xenome.folder="/data/scratch/hg19.mm10", seq.type="pe", threads=24,
#'     adapter5="AGATCGGAAGAGCACACGTCTGAACTCCAGTCA",
#'     adapter3="AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT",
#'     min.length=40, genome.folder="/data/scratch/hg19_exome", sample.id="sampleX")
#'
#' }
#' @export
wrapperPdx <- function(group=c("sudo","docker"),fastq.folder, scratch.folder, xenome.folder, seq.type, threads,
                       adapter5="AGATCGGAAGAGCACACGTCTGAACTCCAGTCA",
                       adapter3="AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT",
                       min.length=40,
                       genome.folder="/data/scratch/hg19_exome", sample.id="sampleX"){

  cat("\nrunning xenome\n")
  xenome(group=group,fastq.folder=fastq.folder, scratch.folder=scratch.folder,
         xenome.folder=xenome.folder, seq.type=seq.type,threads=threads)
  dir.create(paste(fastq.folder,"xenome.files", sep="/"))
  system(paste("mv *.gz ",paste(fastq.folder,"xenome.files", sep="/"), sep=""))
  system(paste("mv ",paste(fastq.folder,"xenome.files/xeno_hs_R1.fastq.gz", sep="/")," ",paste(fastq.folder,"xeno_hs_R1.fastq.gz", sep="/"), sep=""))
  system(paste("mv ",paste(fastq.folder,"xenome.files/xeno_hs_R2.fastq.gz", sep="/")," ",paste(fastq.folder,"xeno_hs_R2.fastq.gz", sep="/"), sep=""))

  cat("\nrunning skewer\n")
  skewer(group=group,fastq.folder=fastq.folder, scratch.folder=scratch.folder,
         adapter5=adapter5, adapter3=adapter3, seq.type=seq.type, threads=threads,
         min.length=min.length)

  cat("\nrunning bwa\n")
  bwa(group=group, fastq.folder=fastq.folder, scratch.folder=scratch.folder,
        genome.folder=genome.folder, seq.type=seq.type, threads=threads, sample.id=sample.id)
}
kendomaniac/docker4seq documentation built on April 8, 2024, 5:39 p.m.