HaplotypR is a program for analysis of Amplicon-Seq genotyping experiments. HaplotypR provides a Shiny interface for simpler data analysis.
The HaplotypR project was developed by Anita Lerch. A paper with more details about the program is available from:
HaplotypR is distributed under the GNU General Public License, version 3.
Note: * For Windows computer: install the precomplied NGmergeR package from https://github.com/lerch-a/NGmergeR/releases) and install.packages("PathToPackage/NGmergeR_0.99.1-win_R-v3.6.zip", repos = NULL).
To install HaplotypR start R and first install ShortRead and dada2 by typing:
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install(c("ShortRead","dada2")
Then install devtools by typing
install.packages("devtools")
install.packages("git2r")
and install NGmergeR and HaplotypR by typing
library(devtools)
library(git2r)
path <- file.path(tempfile(pattern="NGmergeR-"), "NGmergeR")
dir.create(path, recursive=TRUE)
repo <- clone("https://github.com/lerch-a/NGmergeR.git", path)
clone("https://github.com/jsh58/NGmerge", file.path(path, "src", "NGmerge"))
install(path)
detach("package:HaplotypR", unload=TRUE)
devtools::install_github("lerch-a/HaplotypR")
library("HaplotypR")
library("ShortRead")
Copy example files to a working directory 'outputDir':
# Define output directory
outputDir <- "exampleHaplotypR"
# Create output directoy
if(!dir.exists(outputDir))
dir.create(outputDir, recursive=T)
# Copy example files to output directory
file.copy(from=system.file(package="HaplotypR", "extdata/ex2"), to=".", recursive = T)
# List files example files in output direcoty
dir(file.path("ex2"))
The following files should be listed with the last R command: "barcode_F.fasta", "barcode_R.fasta", "marker_file.txt", "reads2_F.fastq.gz", "reads2_R.fastq.gz", "sample_file.txt".
Run demultiplexing by sample and rename output files
# set input file path
primerFile <- "ex2/marker_file.txt"
sampleFile <- "ex2/sample_file.txt"
fnBarcodeF <- "ex2/barcode_F.fasta"
fnBarcodeR <- "ex2/barcode_R.fasta"
reads <- list.files("ex2", pattern="reads", full.names = T)
# create output subdirectory
outDeplexSample <- file.path(outputDir, "dePlexSample")
dir.create(outDeplexSample)
# demultiplex by samples
dePlexSample <- demultiplexReads(reads[1], reads[2], fnBarcodeF, fnBarcodeR, outDeplexSample)
# rename output files to sample files
sampleTab <- read.delim(sampleFile, stringsAsFactors=F)
dePlexSample <- renameDemultiplexedFiles(sampleTab, dePlexSample)
# save summary table
write.table(dePlexSample, file.path(outputDir, "demultiplexSampleSummary.txt"), sep="\t", row.names=F)
Run demultiplex by marker and truncate primer sequence
# create output subdirectory
outDeplexMarker <- file.path(outputDir, "dePlexMarker")
dir.create(outDeplexMarker)
# process each marker
markerTab <- read.delim(primerFile, stringsAsFactors=F)
dePlexMarker <- demultiplexByMarker(dePlexSample, markerTab, outDeplexMarker)
# save summary table
write.table(dePlexMarker, file.path(outputDir, "demultiplexMarkerSummary.txt"), sep="\t", row.names=F)
Fuse paired reads. Two methods are provided for fusing paired reads. First method works for non overlapping sequence read pairs. Trim to a fixed length (removes low quality bases) and then concatenate forward and reverse read.
# create output subdirectory
outProcFiles <- file.path(outputDir, "processedReads")
dir.create(outProcFiles)
# Trim options
numNtF <- 190
numNtR <- 120
postfix <- sprintf("_bind%.0f_%.0f", numNtF, numNtR)
# Adjust reference to trim options and save as fasta file
refSeq <- as.character(markerTab$ReferenceSequence)
refSeq <- DNAStringSet(paste(substr(refSeq, 1,numNtF), substr(refSeq, nchar(refSeq)+1-numNtR, nchar(refSeq)), sep=""))
names(refSeq) <- markerTab$MarkerID
lapply(seq_along(refSeq), function(i){
writeFasta(refSeq[i], file.path(outputDir, paste(names(refSeq)[i], postfix, ".fasta", sep="")))
})
# Fuse paired read
procReads <- bindAmpliconReads(as.character(dePlexMarker$FileR1), as.character(dePlexMarker$FileR2), outProcFiles,
read1Length=numNtF, read2Length=numNtR)
procReads <- cbind(dePlexMarker[,c("SampleID", "SampleName","BarcodePair", "MarkerID")], procReads)
write.table(procReads, file.path(outputDir, sprintf("processedReadSummary%s.txt", postfix)), sep="\t", row.names=F)
Second method work only for overlapping sequence read pair by merging the overlap of the forward and reverse read (using vsearch wrapper).
# create output subdirectory
outProcFiles <- file.path(outputDir, "processedReads")
dir.create(outProcFiles)
postfix <- "_merge"
refSeq <- DNAStringSet(markerTab$ReferenceSequence)
names(refSeq) <- markerTab$MarkerID
lapply(seq_along(refSeq), function(i){
writeFasta(refSeq[i], file.path(outputDir, paste(names(refSeq)[i], postfix, ".fasta", sep="")))
})
procReadsMerge <- mergeAmpliconReads(as.character(dePlexMarker$FileR1), as.character(dePlexMarker$FileR2), outProcFiles, method="NGmerge")
procReads <- cbind(dePlexMarker[,c("SampleID", "SampleName","BarcodePair", "MarkerID")], procReadsMerge)
write.table(procReads, file.path(outputDir, sprintf("processedReadSummary%s.txt", postfix)), sep="\t", row.names=F, quote=F)
# subset: remove markers without reads
procReads <- procReads[procReads$numRead>10,]
Call Haplotypes
# call haplotype options
minCov <- 3
detectionLimit <- 1/100
minOccHap <- 1
minCovSample <- 25
# remove samples without reads
procReads <- procReads[procReads$numRead>0,]
# call final haplotypes
finalTab <- createFinalHaplotypTableDADA2(
outputDir = outputDir, sampleTable = procReads, markerTable = markerTab,
referenceSequence=refSeq, filterIndel=T,
minHaplotypCoverage = minCov, minReplicate = minOccHap,
detectability = detectionLimit, minSampleCoverage = minCovSample,
multithread=FALSE, pool="pseudo", OMEGA_A=1e-120)
write.csv(finalTab, file=file.path(outputDir, "finalHaplotypList_vDADA2.csv"), row.names=F)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.