#' Remove primers using cutadapt as per the DADA2 ITS workflow
#' @param sequence_directory directory containing paired-end reads
#' @param cutadapt path of cutadapt on your machine
#' @export
cutadaptPrimers <- function(sequence_directory, cutadapt){
path <- sequence_directory
list.files(path)
fnFs <- sort(list.files(path, pattern = "_R1_001.fastq", full.names = TRUE)) #Create character vector of locations + names of forward seqs
fnRs <- sort(list.files(path, pattern = "_R2_001.fastq", full.names = TRUE)) #and same for reverse
# NC1 and NC2 primers
FWD <- "ACGTCTGGTTCAGGGTTGTT"
REV <- "TTAGTTTCTTTTCCTCCGCT"
# Function that returns all orientations (forward, reverse, their complements and reverse complements) of primers
allOrients <- function(primer) {
require(Biostrings)
dna <- DNAString(primer)
orients <- c(Forward = dna, Complement = complement(dna),
Reverse = reverse(dna), RevComp = reverseComplement(dna))
return(sapply(orients, toString))
}
# Get all of our orientation
FWD.orients <- allOrients(FWD)
REV.orients <- allOrients(REV)
# Specify location + name of reads filtered for ambiguous bases (Ns)
fnFs.filtN <- file.path(path, "filtN", basename(fnFs))
fnRs.filtN <- file.path(path, "filtN", basename(fnRs))
# Filter out all sequences containing Ns
filterAndTrim(fnFs, fnFs.filtN, fnRs, fnRs.filtN, maxN=0, compress = FALSE, multithread = TRUE)
#Function to count the number of primer hits
primerHits <- function(primer, fn) {
nhits <- vcountPattern(primer, sread(readFastq(fn)), fixed = FALSE)
return(sum(nhits > 0))
}
rbind(FWD.ForwardReads = sapply(FWD.orients, primerHits, fn = fnFs.filtN[[1]]),
FWD.ReverseReads = sapply(FWD.orients, primerHits, fn = fnRs.filtN[[1]]),
REV.ForwardReads = sapply(REV.orients, primerHits, fn = fnFs.filtN[[1]]),
REV.ReverseReads = sapply(REV.orients, primerHits, fn = fnRs.filtN[[1]]))
#For some reason cutadapt is not detecting our reverse primer...
system(paste(cutadapt, "--version")) # ensure that cutadapt is working
path.cut <- file.path(path, "cutadapt") # This will be the directory containing reads with primers removed
if(!dir.exists(path.cut)) dir.create(path.cut) # Make the directory
fnFs.cut <- file.path(path.cut, basename(fnFs)) # Specify directory + names of reads with primers removed
fnRs.cut <- file.path(path.cut, basename(fnRs))
FWD.RC <- dada2:::rc(FWD)
REV.RC <- dada2:::rc(REV)
# Specify cutadapt parameters
# g stands for front of the read,
# a stands for the adaptor (primer) to remove
# capitalized parameters correspond to the R2 read
R1.flags <- paste("-g", FWD, "-a", REV.RC)
R2.flags <- paste("-G", REV, "-A", FWD.RC)
# Run cutadapt from r
# n is the number of rounds to search and remove a primer,
# set to 5 in case primers appear more than once
# o refers to output name
# and p indicates that outputs consist of paired-end reads
for (i in seq_along(fnFs)) {system(paste(cutadapt, R1.flags, R2.flags, "-n", 5,
"-o", fnFs.cut[i], "-p", fnRs.cut[i],
fnFs.filtN[i], fnRs.filtN[i]))}
#Now count primer hits in our sequences, should all be 0
rbind(FWD.ForwardReads = sapply(FWD.orients, primerHits, fn = fnFs.cut[[1]]),
FWD.ReverseReads = sapply(FWD.orients, primerHits, fn = fnRs.cut[[1]]),
REV.ForwardReads = sapply(REV.orients, primerHits, fn = fnFs.cut[[1]]),
REV.ReverseReads = sapply(REV.orients, primerHits, fn = fnRs.cut[[1]]))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.