knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) knitr::opts_chunk$set(dev = "png")
#load packages library(RADstackshelpR) library(gridExtra)
```{bash, eval = FALSE}
files="SRR5344602 SRR5344603 SRR5344604 SRR5344605 SRR5344606 SRR5344607 SRR5344608 SRR5344609 SRR5344610 SRR5344611 SRR5344612 SRR5344613 SRR5344614 SRR5344615 SRR5344616 SRR5344617"
module load sratoolkit
for sample in files do fastq-dump $sample done
#### Note: If too many of your samples contain low-data, it may be difficult to determine an appropriate 'R80' cutoff, as there will be very few (even 0) SNPs shared at an 80% completeness threshold. Here, all samples contain a large amount of sequencing data, and were used to optimize de novo assembly in [Lost in Parameter Space](https://doi.org/10.1111/2041-210X.12775). Because of these unique circumstances, we will not perform quality control at this point. Typically, I would use this [RMarkdown script](https://github.com/DevonDeRaad/RADstackshelpR/blob/master/inst/extdata/fastqcr.Rmd) that uses the R package [fastqcr](https://github.com/kassambara/fastqcr) to generate a report visualizing the quality and quantity of sequencing for each sample, and recommend a subset of samples to be immediately dropped before parameter optimization. The only modification necessary for this script is the path to the folder containing the input .fastq.gz files and the path to your desired output folder. An example report generated using this script can be seen [here](https://devonderaad.github.io/RADstackshelpR/articles/quality.control.vignette.html). ## Step 2: Iterate over values of 'm' ranging from 3-7, while leaving all other parameters at default values. ###Bash code to execute this: ```{bash, eval = FALSE} # Build loci de novo in each sample for the single-end reads only. # -M — Maximum distance (in nucleotides) allowed between stacks (default 2). # -m — Minimum depth of coverage required to create a stack (default 3). #here, we will vary m from 3-7, and leave all other paramaters default for i in {3..7} do #create a directory to hold this unique iteration: mkdir stacks_m$i #run ustacks with m equal to the current iteration (3-7) for each sample id=1 for sample in $files do /home/d669d153/work/stacks-2.41/ustacks -f fastq/${sample}.fq.gz -o stacks_m$i -i $id -m $i -p 15 let "id+=1" done ## Run cstacks to compile stacks between samples. Popmap is a file in working directory called 'pipeline_popmap.txt' /home/d669d153/work/stacks-2.41/cstacks -P stacks_m$i -M pipeline_popmap.txt -p 15 ## Run sstacks. Match all samples supplied in the population map against the catalog. /home/d669d153/work/stacks-2.41/sstacks -P stacks_m$i -M pipeline_popmap.txt -p 15 ## Run tsv2bam to transpose the data so it is stored by locus, instead of by sample. /home/d669d153/work/stacks-2.41/tsv2bam -P stacks_m$i -M pipeline_popmap.txt -t 15 ## Run gstacks: build a paired-end contig from the metapopulation data (if paired-reads provided), ## align reads per sample, call variant sites in the population, genotypes in each individual. /home/d669d153/work/stacks-2.41/gstacks -P stacks_m$i -M pipeline_popmap.txt -t 15 ## Run populations completely unfiltered and output unfiltered vcf, for input to the RADstackshelpR package /home/d669d153/work/stacks-2.41/populations -P stacks_m$i -M pipeline_popmap.txt --vcf -t 15 done
#optimize_m function will generate summary stats on your 5 iterative runs #input can be full path to each file or just the file name if your working directory contains the files m.out<-optimize_m(m3="/Users/devder/Desktop/brown.trout/m3.vcf", m4="/Users/devder/Desktop/brown.trout/m4.vcf", m5="/Users/devder/Desktop/brown.trout/m5.vcf", m6="/Users/devder/Desktop/brown.trout/m6.vcf", m7="/Users/devder/Desktop/brown.trout/m7.vcf") #visualize the effect of varying m on the depth of each sample vis_depth(output = m.out) #visualize the effect of varying m on the number of SNPs retained vis_snps(output = m.out, stacks_param = "m") #visualize the effect of varying m on the number of loci retained vis_loci(output = m.out, stacks_param = "m") #3 is the optimal m value, and will be used next to optimize M
knitr::include_graphics("trt.m.png")
```{bash, eval = FALSE}
for i in {1..8} do
mkdir stacks_bigM$i
id=1 for sample in $files do /home/d669d153/work/stacks-2.41/ustacks -f fastq/${sample}.fq.gz -o stacks_bigM$i -i $id -m 3 -M $i -p 15 let "id+=1" done /home/d669d153/work/stacks-2.41/cstacks -P stacks_bigM$i -M pipeline_popmap.txt -p 15 /home/d669d153/work/stacks-2.41/sstacks -P stacks_bigM$i -M pipeline_popmap.txt -p 15 /home/d669d153/work/stacks-2.41/tsv2bam -P stacks_bigM$i -M pipeline_popmap.txt -t 15 /home/d669d153/work/stacks-2.41/gstacks -P stacks_bigM$i -M pipeline_popmap.txt -t 15 /home/d669d153/work/stacks-2.41/populations -P stacks_bigM$i -M pipeline_popmap.txt --vcf -t 15 done
## Step 5: Visualize the output of these 8 runs and determine the optimal value for M. ```r #optimize M M.out<-optimize_bigM(M1="/Users/devder/Desktop/brown.trout/bigM1.vcf", M2="/Users/devder/Desktop/brown.trout/bigM2.vcf", M3="/Users/devder/Desktop/brown.trout/bigM3.vcf", M4="/Users/devder/Desktop/brown.trout/bigM4.vcf", M5="/Users/devder/Desktop/brown.trout/bigM5.vcf", M6="/Users/devder/Desktop/brown.trout/bigM6.vcf", M7="/Users/devder/Desktop/brown.trout/bigM7.vcf", M8="/Users/devder/Desktop/brown.trout/bigM8.vcf") #visualize the effect of varying M on the number of SNPs retained vis_snps(output = M.out, stacks_param = "M") #visualize the effect of varying M on the number of polymorphic loci retained vis_loci(output = M.out, stacks_param = "M")
knitr::include_graphics("trt.bigM.png")
```{bash, eval = FALSE}
for i in {1..3} do
mkdir stacks_n$i
id=1 for sample in $files do /home/d669d153/work/stacks-2.41/ustacks -f fastq/${sample}.fq.gz -o stacks_n$i -i $id -m 3 -M 2 -p 15 let "id+=1" done /home/d669d153/work/stacks-2.41/cstacks -n $i -P stacks_n$i -M pipeline_popmap.txt -p 15 /home/d669d153/work/stacks-2.41/sstacks -P stacks_n$i -M pipeline_popmap.txt -p 15 /home/d669d153/work/stacks-2.41/tsv2bam -P stacks_n$i -M pipeline_popmap.txt -t 15 /home/d669d153/work/stacks-2.41/gstacks -P stacks_n$i -M pipeline_popmap.txt -t 15 /home/d669d153/work/stacks-2.41/populations -P stacks_n$i -M pipeline_popmap.txt --vcf -t 15 done
## Step 7: Visualize the output of these 3 runs and determine the optimal value for n. ```r #optimize n n.out<-optimize_n(nequalsMminus1="/Users/devder/Desktop/brown.trout/n1.vcf", nequalsM="/Users/devder/Desktop/brown.trout/n2.vcf", nequalsMplus1="/Users/devder/Desktop/brown.trout/n3.vcf") #visualize the effect of varying n on the number of SNPs retained vis_snps(output = n.out, stacks_param = "n") #visualize the effect of varying n on the number of polymorphic loci retained vis_loci(output = n.out, stacks_param = "n")
gl<-list() gl[[1]]<-vis_depth(output = m.out) gl[[2]]<-vis_snps(output = m.out, stacks_param = "m") gl[[3]]<-vis_loci(output = m.out, stacks_param = "m") gl[[4]]<-vis_snps(output = M.out, stacks_param = "M") gl[[5]]<-vis_loci(output = M.out, stacks_param = "M") gl[[6]]<-vis_snps(output = n.out, stacks_param = "n") gl[[7]]<-vis_loci(output = n.out, stacks_param = "n") grid.arrange( grobs = gl, widths = c(1,1,1,1,1,1), layout_matrix = rbind(c(1,1,2,2,3,3), c(4,4,4,5,5,5), c(6,6,6,7,7,7)) )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.