## ----message=FALSE,warning=FALSE,results='hide',echo=FALSE--------------------
options(digits=2)
## ----child = "Chapters/01-Convention.Rmd"-------------------------------------
## -----------------------------------------------------------------------------
a <- 1
## -----------------------------------------------------------------------------
(a <- 1)
## ----child = "Chapters/02-Introduction.Rmd"-----------------------------------
## ----child = "Chapters/04-Preprocessing.Rmd"----------------------------------
## # First, make an output directory called "qa" and a sub-directory of
## # that called "raw"
## mkdir -p ~/results/qa/raw
##
## # We can also make a directory to hold the raw data
## mkdir ~/results/raw
##
## # Then link the files in our home folder. Always try to
## # work on links or copies of the raw source data to keep them safe!
## cd ~/results/raw
## ln -s share/Day1/fastq/*.fq.gz .
##
## # Then you can run fastqc on the file(s) you have selected. FastQC can run
## # a number of files in parallel (see the -t option)
## fastqc -o ~/results/qa/raw -t 16 ~/results/raw/*.fq.gz
##
## # Finally, also run multiqc to summarize all data in one plot
## cd ~/results/qa/raw
## multiqc .
## # THIS IS AN EXAMPLE OF THE COMMANDS! GO ON READING, do NOT execute them!
##
## # First uncompress your forward and reverse reads:
## gunzip read_1.fq.gz read_2.fq.gz
##
## # Merge forward and reverse reads into a single file (this is a requirement
## # of the sortmerna software)
## merge-paired-reads.sh read_1.fq read_2.fq read-interleaved.fq
##
## # Run sortmerna
## sortmerna --ref $SORTMERNA_DB --reads read-interleaved.fq --aligned \
## read-rRNA-hits --other read-sortmerna --log -a 16 -v --paired_in --fastx
##
## # Split the file again in two separate files, one for forward, one for reverse
## # orientation
## unmerge-paired-reads.sh read-sortmerna.fq read-sortmerna_1.fq read-sortmerna_2.fq
##
## # Delete the interleaved files and compress the result files to save space
## rm read-interleaved.fq read-sortmerna.fq
## gzip read-sortmerna_1.fq read-sortmerna_2.fq
## #!/bin/bash
##
## READ_FW="$1"
## READ_RV="$2"
##
## FILEBASE=$(basename "${READ_FW/_1.fq.gz/}")
##
## echo "Uncompressing FASTQ data of $FILEBASE"
## gunzip -c "$READ_FW" > ${FILEBASE}_1.fq
## gunzip -c "$READ_RV" > ${FILEBASE}_2.fq
##
## READ_FW="${READ_FW%.gz}"
## READ_RV="${READ_RV%.gz}"
##
## echo "Merging pairs of $FILEBASE"
## merge-paired-reads.sh "$READ_FW" "$READ_RV" "${FILEBASE}_interleaved.fq"
##
## echo "Running SortMeRNA for $FILEBASE"
## sortmerna --ref $SORTMERNA_DB --reads "${FILEBASE}_interleaved.fq" --aligned \
## "${FILEBASE}-rRNA-hits" --other "${FILEBASE}-sortmerna" --log -a 16 \
## -v --paired_in --fastx
##
## echo "Unmerging SortMeRNA filtered pairs for $FILEBASE"
## unmerge-paired-reads.sh "${FILEBASE}-sortmerna.fq" \
## "${FILEBASE}-sortmerna_1.fq" "${FILEBASE}-sortmerna_2.fq"
##
## echo "Doing cleanup for $FILEBASE"
## gzip "$READ_FW" "$READ_RV" "${FILEBASE}-sortmerna_1.fq" \
## "${FILEBASE}-sortmerna_2.fq" "${FILEBASE}-rRNA-hits.fq"
## rm "${FILEBASE}_interleaved.fq" "${FILEBASE}-sortmerna.fq"
## mkdir ~/results/sortmerna
## cd ~/results/sortmerna
## find ~/results/raw -name "*.fq.gz" | sort | head -n 4 | while read READ_FW
## do
## read READ_RV
## bash ~/results/runSortMeRNA.sh $READ_FW $READ_RV
## done
## ln -s ~/share/Day1/sortmerna/* .
## cd ~/results/sortmerna
## multiqc .
## mkdir ~/results/qa/sortmerna
## fastqc -o ~/results/qa/sortmerna -t 16 ~/results/sortmerna/*sortmerna*.fq.gz
## mkdir ~/results/trimmomatic
## cd ~/results/trimmomatic
## find ~/results/sortmerna -name "*sortmerna_[12].fq.gz" | sort | head -n 4 | while read FW_READ
## do
## read RV_READ
## FILEBASE=$(basename "${FW_READ%_1.fq.gz}")
## trimmomatic PE -threads 16 -phred64 "$FW_READ" "$RV_READ" \
## "$FILEBASE-trimmomatic_1.fq.gz" "$FILEBASE-trimmomatic-unpaired_1.fq.gz" \
## "$FILEBASE-trimmomatic_2.fq.gz" "$FILEBASE-trimmomatic-unpaired_2.fq.gz" \
## ILLUMINACLIP:"/usr/share/Trimmomatic-0.38/adapters/TruSeq3-PE-2.fa":2:30:10 \
## SLIDINGWINDOW:5:20 MINLEN:50 2> "$FILEBASE-timmomatic.log"
## done
## cd ~/results/trimmomatic
## ln -s ~/share/Day1/trimmomatic/* .
## multiqc .
## mkdir ~/results/qa/trimmomatic
## fastqc -o ~/results/qa/trimmomatic -t 16 ~/results/trimmomatic/*trimmomatic_[12].fq.gz
## mkdir ~/results/qc_report
## ln -s ~/results/qa/raw/*zip ~/results/qc_report
## ln -s ~/results/qa/sortmerna/*zip ~/results/qc_report
## ln -s ~/results/qa/trimmomatic/*zip ~/results/qc_report
## ln -s ~/results/sortmerna/*.log ~/results/qc_report
## ln -s ~/results/trimmomatic/*.log ~/results/qc_report
## multiqc -o ~/results/qc_report ~/results/qc_report
## ----child = "Chapters/05-Pseudo-Alignment.Rmd"-------------------------------
## cd ~/results
## kallisto index -i Potra01-mRNA.idx \
## ~/share/Day1/reference/fasta/Potra01-mRNA.fa.gz
## mkdir -p ~/results/kallisto
## cd ~/results/kallisto
## find ../trimmomatic -name "*trimmomatic_[12].fq.gz" | sort | head -n 4 | while read FW_READ
## do
## read RV_READ
## FILEBASE=$(basename "${FW_READ%_1.fq.gz}")
## kallisto quant -i ~/results/Potra01-mRNA.idx -b 100 \
## -o . -t 16 "$FW_READ" "$RV_READ" | tee $FILEBASE.log
## # Kallisto doesn't let us specify an output filename so we rename all
## # output files
## mv "abundance.tsv" $FILEBASE"_abundance.tsv"
## mv "abundance.h5" $FILEBASE"_abundance.h5"
## mv "run_info.json" $FILEBASE"_run_info.json"
## done
## ~/results/kallisto
## multiqc.
## mkdir ~/results/qc_report
## multiqc -o ~/results/qc_report ~/results
## ----child = "Chapters/12-R-Biological-QA.Rmd"--------------------------------
## ----message=FALSE,warning=FALSE,results='hide'-------------------------------
library(RnaSeqTutorial)
## ----a1, eval=FALSE-----------------------------------------------------------
## library(tximport)
## files <- list.files("~/results/kallisto",
## pattern = ".*_abundance.tsv",
## full.names = TRUE)
## tx <- suppressMessages(tximport(files = files,
## type = "kallisto",
## txOut = TRUE))
## ----a2, eval=FALSE-----------------------------------------------------------
## tx.counts <- round(tx$counts)
## colnames(tx.counts) <- sub("_.*","",basename(files))
## ----a3,eval=FALSE------------------------------------------------------------
## tx2gene <- data.frame(
## TX=rownames(tx.counts),
## GENEID=sub("\\.\\d+$","",rownames(tx.counts)))
## ----a4,eval=FALSE------------------------------------------------------------
## count.table <- round(summarizeToGene(tx,tx2gene)$counts)
## colnames(count.table) <- sub("_.*","",basename(files))
## ----a5,eval=FALSE------------------------------------------------------------
## sel <- rowSums(count.table) == 0
## sprintf("%s percent of %s genes are not expressed",round(sum(sel) * 100/ nrow(count.table),digits=1),nrow(count.table))
## ----a6,eval=FALSE------------------------------------------------------------
## sel <- rowSums(tx.counts) == 0
## sprintf("%s percent of %s transcripts are not expressed",round(sum(sel) * 100/ nrow(tx.counts),digits=1),nrow(tx.counts))
## ----a7,eval=FALSE------------------------------------------------------------
## library(RColorBrewer)
## pal <- brewer.pal(8,"Dark2")
## mar <- par("mar")
## plot(density(log10(rowMeans(count.table))),col=pal[1],
## main="gene mean raw counts distribution",
## xlab="mean raw counts (log10)")
## ----a8,eval=FALSE------------------------------------------------------------
## plot.multidensity(log10(count.table),col=rep(pal,each=3),
## legend.x="topright",legend.cex=0.5,
## main="sample raw counts distribution",
## xlab="per gene raw counts (log10)")
## ----9,eval=FALSE-------------------------------------------------------------
## plot.multidensity(log10(tx.counts),col=rep(pal,each=3),
## legend.x="topright",legend.cex=0.5,
## main="sample raw counts distribution",
## xlab="per gene raw counts (log10)")
## ----a10,eval=FALSE-----------------------------------------------------------
## samples <- read.csv(file.path(extdata(),"sex-samples.csv"))
## sex <- samples$sex[match(colnames(count.table),samples$sample)]
## date <- factor(samples$date[match(colnames(count.table),samples$sample)])
## ----a11,eval=FALSE-----------------------------------------------------------
## dds <- DESeqDataSetFromMatrix(
## countData = count.table,
## colData = data.frame(sex=sex, date=date),
## design = ~ date + sex)
## ----a12,eval=FALSE-----------------------------------------------------------
## dds <- estimateSizeFactors(dds)
## sizes <- sizeFactors(dds)
## sizes
## boxplot(sizes,main="relative library sizes",ylab="scaling factor")
## ----a13,eval=FALSE-----------------------------------------------------------
## vsd <- varianceStabilizingTransformation(dds, blind=TRUE)
## vst <- assay(vsd)
## colnames(vst) <- colnames(count.table)
## ----a14,eval=FALSE-----------------------------------------------------------
## vst <- vst - min(vst)
## ----a15,eval=FALSE-----------------------------------------------------------
## meanSdPlot(assay(vsd)[rowSums(count.table)>0,])
## ----a16,eval=FALSE-----------------------------------------------------------
## meanSdPlot(log2(as.matrix(count.table[rowSums(count.table)>0,])))
## ----a17,eval=FALSE-----------------------------------------------------------
## meanSdPlot(log2(counts(dds,normalized=TRUE)[rowSums(count.table)>0,]))
## ----a18,eval=FALSE-----------------------------------------------------------
## pc <- prcomp(t(vst))
## percent <- round(summary(pc)$importance[2,]*100)
## smpls <- conditions
## ----a19,eval=FALSE-----------------------------------------------------------
## sex.cols<-c("pink","lightblue")
## sex.names<-c(F="Female",M="Male")
## ----a20,eval=FALSE-----------------------------------------------------------
## scatterplot3d(pc$x[,1],
## pc$x[,2],
## pc$x[,3],
## xlab=paste("Comp. 1 (",percent[1],"%)",sep=""),
## ylab=paste("Comp. 2 (",percent[2],"%)",sep=""),
## zlab=paste("Comp. 3 (",percent[3],"%)",sep=""),
## color=sex.cols[as.integer(factor(sex))],
## pch=19)
## legend("bottomright",pch=c(NA,15,15),col=c(NA,sex.cols[1:2]),
## legend=c("Color:",sex.names[levels(factor(sex))]))
## par(mar=mar)
## ----a21,eval=FALSE-----------------------------------------------------------
## dat <- date
## scatterplot3d(pc$x[,1],
## pc$x[,2],
## pc$x[,3],
## xlab=paste("Comp. 1 (",percent[1],"%)",sep=""),
## ylab=paste("Comp. 2 (",percent[2],"%)",sep=""),
## zlab=paste("Comp. 3 (",percent[3],"%)",sep=""),
## color=pal[as.integer(factor(dat))],
## pch=19)
## legend("topleft",pch=rep(c(19,23),each=10),col=rep(pal,2),legend=levels(factor(dat)),bty="n")
## par(mar=mar)
## ----a22,eval=FALSE-----------------------------------------------------------
## scatterplot3d(pc$x[,1],
## pc$x[,2],
## pc$x[,3],
## xlab=paste("Comp. 1 (",percent[1],"%)",sep=""),
## ylab=paste("Comp. 2 (",percent[2],"%)",sep=""),
## zlab=paste("Comp. 3 (",percent[3],"%)",sep=""),
## color=sex.cols[as.integer(factor(sex))],
## pch=c(19,17)[as.integer(factor(dat))])
## legend("bottomright",pch=c(NA,15,15),col=c(NA,sex.cols[1:2]),
## legend=c("Color:",sex.names[levels(factor(sex))]))
## legend("topleft",pch=c(NA,21,24),col=c(NA,1,1),
## legend=c("Symbol:",levels(factor(dat))),cex=0.85)
## par(mar=mar)
## ----a23,eval=FALSE-----------------------------------------------------------
## plot(pc$x[,1],
## pc$x[,2],
## xlab=paste("Comp. 1 (",percent[1],"%)",sep=""),
## ylab=paste("Comp. 2 (",percent[2],"%)",sep=""),
## col=sex.cols[as.integer(factor(sex))],
## pch=c(19,17)[as.integer(factor(dat))],
## main="Principal Component Analysis",sub="variance stabilized counts")
## legend("bottomleft",pch=c(NA,15,15),col=c(NA,sex.cols[1:2]),
## legend=c("Color:",sex.names[levels(factor(sex))]))
## legend("topright",pch=c(NA,21,24),col=c(NA,1,1),
## legend=c("Symbol:",levels(factor(dat))),cex=0.85)
## text(pc$x[,1],
## pc$x[,2],
## labels=colnames(count.table),cex=.5,adj=-1)
## ----a24,eval=FALSE-----------------------------------------------------------
## plot(pc$x[,2],
## pc$x[,3],
## xlab=paste("Comp. 2 (",percent[2],"%)",sep=""),
## ylab=paste("Comp. 3 (",percent[3],"%)",sep=""),
## col=sex.cols[as.integer(factor(sex))],
## pch=c(19,17)[as.integer(factor(dat))],
## main="Principal Component Analysis",sub="variance stabilized counts")
## legend("bottomleft",pch=c(NA,15,15),col=c(NA,sex.cols[1:2]),
## legend=c("Color:",sex.names[levels(factor(sex))]))
## legend("topright",pch=c(NA,21,24),col=c(NA,1,1),
## legend=c("Symbol:",levels(factor(dat))),cex=0.85)
## ----child = "Commons/17-SessionInfo.Rmd"-------------------------------------
## ----session info, echo=FALSE-------------------------------------------------
sessionInfo()
## ----child = "Commons/14-Acknowledgments.Rmd"---------------------------------
## ----child = "Commons/15-Footnotes.Rmd"---------------------------------------
## ----child = "Commons/16-Images.Rmd"------------------------------------------
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.