dimsum | R Documentation |
This function runs the DiMSum pipeline.
dimsum(
runDemo = F,
fastqFileDir = NULL,
fastqFileExtension = ".fastq",
gzipped = T,
stranded = T,
paired = T,
barcodeDesignPath = NULL,
barcodeErrorRate = 0.25,
experimentDesignPath = NULL,
experimentDesignPairDuplicates = F,
barcodeIdentityPath = NULL,
countPath = NULL,
synonymSequencePath = NULL,
cutadaptCut5First = NULL,
cutadaptCut5Second = NULL,
cutadaptCut3First = NULL,
cutadaptCut3Second = NULL,
cutadapt5First = NULL,
cutadapt5Second = NULL,
cutadapt3First = NULL,
cutadapt3Second = NULL,
cutadaptMinLength = 50,
cutadaptErrorRate = 0.2,
cutadaptOverlap = 3,
vsearchMinQual = 30,
vsearchMaxQual = 41,
vsearchMaxee = 0.5,
vsearchMinovlen = 10,
outputPath = "./",
projectName = "DiMSum_Project",
wildtypeSequence = NULL,
permittedSequences = NULL,
reverseComplement = F,
sequenceType = "auto",
mutagenesisType = "random",
transLibrary = F,
transLibraryReverseComplement = F,
bayesianDoubleFitness = F,
bayesianDoubleFitnessLamD = 0.025,
fitnessMinInputCountAll = "0",
fitnessMinInputCountAny = "0",
fitnessMinOutputCountAll = "0",
fitnessMinOutputCountAny = "0",
fitnessHighConfidenceCount = 10,
fitnessDoubleHighConfidenceCount = 50,
fitnessNormalise = T,
fitnessErrorModel = T,
indels = "none",
maxSubstitutions = 2,
mixedSubstitutions = F,
retainIntermediateFiles = F,
splitChunkSize = 3758096384,
retainedReplicates = "all",
startStage = 0,
stopStage = 5,
numCores = 1
)
runDemo |
Run the DiMSum demo (default:F) |
fastqFileDir |
Path to directory containing input FASTQ files (required for WRAP) |
fastqFileExtension |
FASTQ file extension (default:'.fastq') |
gzipped |
Are FASTQ files are gzipped? (default:T) |
stranded |
Is the library design stranded? (default:T) |
paired |
Is the library design paired-end? (default:T) |
barcodeDesignPath |
Path to barcode design file (tab-separated plain text file with barcode design) |
barcodeErrorRate |
Maximum allowed error rate for barcode to be matched (default:0.25) |
experimentDesignPath |
Path to Experimental Design File (required if '–runDemo'=F) |
experimentDesignPairDuplicates |
Are multiple instances of FASTQ files in the Experimental Design File permitted? (default:F) |
barcodeIdentityPath |
Path to Variant Identity File (tab-separated plain text file mapping barcodes to variants) |
countPath |
Path to Variant Count File for analysis with STEAM only (tab-separated plain text file with sample counts for all variants) |
synonymSequencePath |
Path to Synonym Sequences File with coding sequences for which synonymous variant fitness should be quantified (default: plain text file with one coding nucleotide sequence per line) |
cutadaptCut5First |
Remove fixed number of bases from start (5') of first (or only) read before constant region trimming (optional) |
cutadaptCut5Second |
Remove fixed number of bases from start (5') of second read in pair before constant region trimming (optional) |
cutadaptCut3First |
Remove fixed number of bases from end (3') of first (or only) read before constant region trimming (optional) |
cutadaptCut3Second |
Remove fixed number of bases from end (3') of second read in pair before constant region trimming (optional) |
cutadapt5First |
Sequence of 5' constant region to be trimmed from first (or only) read (optional) |
cutadapt5Second |
Sequence of 5' constant region to be trimmed from second read in pair (optional) |
cutadapt3First |
Sequence of 3' constant region to be trimmed from first (or only) read (default: reverse complement of '–cutadapt5Second') |
cutadapt3Second |
Sequence of 3' constant region to be trimmed from second read in pair (default: reverse complement of '–cutadapt5First') |
cutadaptMinLength |
Discard reads shorter than LENGTH after trimming (default:50) |
cutadaptErrorRate |
Maximum allowed error rate for trimming constant regions (default:0.2) |
cutadaptOverlap |
Minimum overlap between read and constant region for trimming (default:3) |
vsearchMinQual |
Minimum Phred base quality score required to retain read or read pair (default:30) |
vsearchMaxQual |
Maximum Phred base quality score accepted when reading (and used when writing) FASTQ files; cannot be greater than 93 (default:41) |
vsearchMaxee |
Maximum number of expected errors tolerated to retain read or read pair (default:0.5) |
vsearchMinovlen |
Discard read pair if the alignment length is shorter than this (default:10) |
outputPath |
Path to directory to use for output files (default:'./' i.e. current working directory) |
projectName |
Project name and directory where results are to be saved (default:'DiMSum_Project') |
wildtypeSequence |
Wild-type nucleotide sequence (A/C/G/T). Lower-case bases (a/c/g/t) indicate internal constant regions to be removed (required if '–runDemo'=F) |
permittedSequences |
Nucleotide sequence of IUPAC ambiguity codes (A/C/G/T/R/Y/S/W/K/M/B/D/H/V/N) with length matching the number of mutated positions (i.e upper-case letters) in '–wildtypeSequence' (default:N i.e. any substitution mutation allowed) |
reverseComplement |
Reverse complement sequence (default:F) |
sequenceType |
Coding potential of sequence: either 'noncoding', 'coding' or 'auto'. If the specified wild-type nucleotide sequence ('–wildtypeSequence') has a valid translation without a premature STOP codon, it is assumed to be 'coding' (default:'auto') |
mutagenesisType |
Whether mutagenesis was performed at the nucleotide or codon/amino acid level; either 'random' or 'codon' (default:'random') |
transLibrary |
Paired-end reads correspond to distinct molecules? (default:F) |
transLibraryReverseComplement |
Reverse complement second read in pair (default:F) |
bayesianDoubleFitness |
In development: improve double mutant fitness estimates using Bayesian framework (DISABLED: still in development) |
bayesianDoubleFitnessLamD |
In development: Poisson distribution for score likelihood (default:0.025) |
fitnessMinInputCountAll |
Minimum input read count (in all replicates) to be retained during fitness calculations (default:0) |
fitnessMinInputCountAny |
Minimum input read count (in any replicate) to be retained during fitness calculations (default:0) |
fitnessMinOutputCountAll |
Minimum output read count (in all replicates) to be retained during fitness calculations (default:0) |
fitnessMinOutputCountAny |
Minimum output read count (in any replicates) to be retained during fitness calculations (default:0) |
fitnessHighConfidenceCount |
In development: minimum mean input read count for high confidence variants (default:10) |
fitnessDoubleHighConfidenceCount |
In development: minimum input replicate read count for doubles used to derive prior for Bayesian doubles correction (default:50) |
fitnessNormalise |
Normalise fitness values to minimise inter-replicate differences (default:T) |
fitnessErrorModel |
Fit fitness error model (default:T) |
indels |
Indel variants to be retained: either 'all', 'none' or a comma-separated list of sequence lengths (default:'none') |
maxSubstitutions |
Maximum number of nucleotide or amino acid substitutions for coding or non-coding sequences respectively (default:2) |
mixedSubstitutions |
For coding sequences, are nonsynonymous variants with silent/synonymous substitutions in other codons allowed? (default:F) |
retainIntermediateFiles |
Should intermediate files be retained? Intermediate files can be many gigabytes, but are required to rerun DiMSum starting at intermediate pipeline stages (default:F) |
splitChunkSize |
Internal: FASTQ file split chunk size in bytes (default:3758096384) |
retainedReplicates |
Comma-separated list of (integer) experiment replicates to retain or 'all' (default:'all') |
startStage |
(Re-)Start DiMSum at a specific pipeline stage (default:0) |
stopStage |
Stop DiMSum at a specific pipeline stage (default:5) |
numCores |
Number of available CPU cores. All pipeline stages make use of parallel computing to decrease runtime if multiple cores are available (default:1) |
Nothing
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.