The goal of pipelineTools is to streamline the NGS analysis pipelines and result reporting within RStudio. PipelineTools provides packages to run standard open source NGS tools
Can be installed using devtools directly from GitHub using the following commands.
# Install devtools
install.packages("devtools")
library("devtools")
#Install package from GitHub
install_github("GrahamHamilton/pipelineTools")
Load the required libraries ```{r load libraries} library("pipelineTools") library("biomaRt") library("tximport")
Set the paths to the software installed on the system
```{r software paths, echo = FALSE}
fastq.screen.path <- "/software/fastq_screen_v0.13.0/fastq_screen"
fastp.path <- "/software/bin/fastp"
kallisto.path <- "/software/kallisto_v0.45.1/kallisto"
hisat.path <- "/software/hisat_v2-2.1.0/hisat2"
multiqc.path <- "/usr/local/bin/multiqc"
samtools.path <- "/software/samtools_v1.9/samtools"
stringtie.path <- "/software/stringtie-1.3.6/stringtie"
Version numbers for the software used ```{r versions, warning=FALSE, echo = FALSE} fastqscreen.version <- run_fastq_screen(fastq_screen = fastq.screen.path, version = TRUE) fastp.version <- run_fastp(fastp = fastp.path, version = TRUE) kallisto.version <- run_kallisto(kallisto = kallisto.path, version = TRUE) hisat.version <- run_hisat2(hisat2 = hisat.path, version = TRUE) multiqc.version <- run_multiqc(multiqc = multiqc.path, version = TRUE) samtools.version <- run_samtools(samtools = samtools.path, version = TRUE) stringtie.version <- run_stringtie(stringtie = stringtie.path, version = TRUE) r.version <- getRversion() pipelineTools.version <- packageDescription("pipelineTools")$Version deseq2.version <- packageDescription("DESeq2")$Version
Create the results directories
```{r results directories}
# Trimmed reads directory
trimmed.reads.dir <- "trimmed_reads"
#Create the directory for the trimmed reads
dir.create(trimmed.reads.dir, showWarnings = FALSE)
# FastQScreen results directory
fastq.screen.dir <- "Screen"
# Create the directory for the fastq screen results
dir.create(fastq.screen.dir, showWarnings = FALSE)
# FastP results directory
fastp.results.dir <- "FastpQC"
# Create the directory for the FastP results
dir.create(fastp.results.dir, showWarnings = FALSE)
# Kallisto results directory
kalisto.results.dir <- "kallisto"
#Create the directory for the Kallisto results
dir.create(kalisto.results.dir, showWarnings = FALSE)
# Hisat2 alignment results directory
hisat2.alignments.dir <- "hisat2_alignments"
#Create the directory for the HiSat2 results
dir.create(hisat2.alignments.dir, showWarnings = FALSE)
# Stringtie results directory
stringtie.dir <- "stringtie"
#Create the directory for the HiSat2 results
dir.create(stringtie.dir, showWarnings = FALSE)
Read in the fastq file paths to lists and then create sample names lists and trimmed read file paths ```{r setup files} reads.path <- "raw_reads"
reads.patt.1 <- "_S\d{1,2}\_L001_R1_001.fastq.gz$" reads.patt.2 <- "_S\d{1,2}\_L001_R2_001.fastq.gz$"
sample.dataframe <- prepare_samples(reads.path, c(reads.patt.1,reads.patt.2),trimmed.reads.dir)
mate1 <- as.character(sample.dataframe$reads.path.1) mate1.trim <- as.character(sample.dataframe$trimmed.reads.path.1)
mate2 <- as.character(sample.dataframe$reads.path.2) mate2.trim <- as.character(sample.dataframe$trimmed.reads.path.2)
sample.names <- as.character(sample.dataframe$sample.names)
Sequence adapters
```{r sequence adapters}
adapter1 <- "AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC"
adapter2 <- "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT"
The full paths to the reference genome/transcriptome indexes and annotation files ```{r references}
transcriptome <- "/path/to/kallisto/index"
genome <- "/path/to/hisat2/index"
gtf <- "/path/to/gtf/file"
### Fastq Screen
Results are stored in the Screen folder
```{r fastqscreen, echo = FALSE, eval = eval}
fastq.screen.conf <- "/export/jessie3/gmh5n/PipelineTest/fastq_screen.conf"
fastq.screen.cmds <- run_fastq_screen(fq.files = mate1,
out.dir = fastq.screen.dir,
conf = fastq.screen.conf,
fastq_screen = fastq.screen.path)
write.table(fastq.screen.cmds,"FastqScreen_commands.sh", quote = FALSE, row.names = FALSE, col.names = FALSE)
Adapter and quality trimmed reads are stored in the r trimmed.reads.dir
directory, QC files are stored in the FastpQC folder
```{r fastp, echo = FALSE, eval = eval}
fastp.cmds <- run_fastp(mate1 = mate1,
mate2 = mate2,
mate1.out = mate1.out,
mate2.out = mate2.out,
adapter1 = adapter1,
adapter2 = adapter2,
sample.name = sample.names,
out.dir = fastp.results.dir,
fastp = fastp.path)
write.table(fastp.cmds,"FastP_commands.sh", quote = FALSE, row.names = FALSE, col.names = FALSE)
### Kallisto
Pseudo align the reads to the reference transcriptome with Kallisto
```{r kallisto, echo = FALSE, eval = eval}
strandedness <- "second"
# Paired end
kallisto.cmds <- run_kallisto(mate1 = mate1,
mate2 = mate2,
index = transcriptome,
sample.name = sample.names,
strandedness = strandedness,
out.dir = kalisto.results.dir,
kallisto = kallisto.path)
write.table(kallisto.cmds,"Kallisto_commands.sh", quote = FALSE, row.names = FALSE, col.names = FALSE)
```{r include=FALSE} mart<-"ensembl" db<-"mmusculus_gene_ensembl" # Change to organism in study filt<-"ensembl_gene_id"
ensembl = useEnsembl(biomart=mart, dataset=db)
att<-c("ensembl_transcript_id","ensembl_gene_id") txTable<-getBM(attributes=att,mart=ensembl)
colnames(txTable)<-c("tx_id","gene_id")
sampleTable <- read.csv("SampleDescription.txt", sep="\t", row.names=1)
dir <- getwd() files <- file.path(dir,kalisto.results.dir,row.names(sampleTable),"abundance.h5", fsep = .Platform$file.sep) names(files)<-row.names(sampleTable)
txi <- tximport(files, type = "kallisto", tx2gene = txTable,ignoreTxVersion = TRUE, ignoreAfterBar = TRUE) ```
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.