#' Builds the overlapping or non-overlapping production configs
#' @export
buildConfig <- function(overlapping,
fwd_file, fwd_primer_seq, fwd_primer_lens, fwd_min_score,
rev_file, rev_primer_seq, rev_primer_lens, rev_min_score,
fwd_pid_in_which_fragment, rev_pid_in_which_fragment,
max_seq = NULL,
min_read_length = 295,
pattern_to_chop_from_names = ' [0-9]:N:[0-9]*:[0-9]*$',
output_dir = "/fridge/data/MotifBinner2_test",
base_for_names = "CAP129_2040_009wpi_C2C3",
intermediate_reports = TRUE,
erase_history = FALSE,
verbosity = 3,
report_type = c('html'),
ncpu = 4,
bins_to_process = Inf,
merged_read_length = 100,
header_format = "MiSeq",
front_gaps_allowed = 0
)
{
if (overlapping)
{
buildConfig_ol_prod(fwd_file = fwd_file,
fwd_primer_seq = fwd_primer_seq,
fwd_primer_lens = fwd_primer_lens,
fwd_min_score = fwd_min_score,
rev_file = rev_file,
rev_primer_seq = rev_primer_seq,
rev_primer_lens = rev_primer_lens,
rev_min_score = rev_min_score,
fwd_pid_in_which_fragment = fwd_pid_in_which_fragment,
rev_pid_in_which_fragment = rev_pid_in_which_fragment,
max_seq = max_seq,
min_read_length = min_read_length,
pattern_to_chop_from_names = pattern_to_chop_from_names,
output_dir = output_dir,
base_for_names = base_for_names,
intermediate_reports = intermediate_reports,
erase_history = erase_history,
verbosity = verbosity,
report_type = report_type,
ncpu = ncpu,
bins_to_process = bins_to_process,
merged_read_length = merged_read_length,
header_format = header_format,
front_gaps_allowed = front_gaps_allowed)
} else {
buildConfig_nol_test(fwd_file = fwd_file,
fwd_primer_seq = fwd_primer_seq,
fwd_primer_lens = fwd_primer_lens,
fwd_min_score = fwd_min_score,
rev_file = rev_file,
rev_primer_seq = rev_primer_seq,
rev_primer_lens = rev_primer_lens,
rev_min_score = rev_min_score,
fwd_pid_in_which_fragment = fwd_pid_in_which_fragment,
rev_pid_in_which_fragment = rev_pid_in_which_fragment,
max_seq = max_seq,
min_read_length = min_read_length,
pattern_to_chop_from_names = pattern_to_chop_from_names,
output_dir = output_dir,
base_for_names = base_for_names,
intermediate_reports = intermediate_reports,
erase_history = erase_history,
verbosity = verbosity,
report_type = report_type,
ncpu = ncpu,
bins_to_process = bins_to_process,
header_format = header_format,
front_gaps_allowed = front_gaps_allowed)
}
}
#' Builds the config for overlapping reads - stable version
#' @export
buildConfig_ol_prod <- function(fwd_file, fwd_primer_seq, fwd_primer_lens, fwd_min_score,
rev_file, rev_primer_seq, rev_primer_lens, rev_min_score,
fwd_pid_in_which_fragment, rev_pid_in_which_fragment,
max_seq = NULL,
min_read_length = 295,
pattern_to_chop_from_names = ' [0-9]:N:[0-9]*:[0-9]*$',
output_dir = "/fridge/data/MotifBinner2_test",
base_for_names = "CAP129_2040_009wpi_C2C3",
intermediate_reports = TRUE,
erase_history = FALSE,
verbosity = 3,
report_type = c('html'),
ncpu = 4,
bins_to_process = Inf,
merged_read_length = 1000,
header_format = 'MiSeq',
front_gaps_allowed = 0
)
{
if (fwd_pid_in_which_fragment == "NULL"){fwd_pid_in_which_fragment <- NULL}
if (rev_pid_in_which_fragment == "NULL"){rev_pid_in_which_fragment <- NULL}
operation_list = list(
'n001' =
list(name = 'fwd_loadData',
op = 'loadData',
data_source = fwd_file,
max_seq = max_seq,
cache_data = TRUE),
'n002' =
list(name = 'fwd_basicQC',
op = 'basicQC',
data_source = "n001",
cache_data = FALSE),
'n003' =
list(name = 'fwd_ambigSeqs',
op = 'ambigSeqs',
data_source = "n001",
threshold = 0.02,
cache_data = TRUE),
'n004' =
list(name = 'fwd_primerDimer',
op = 'primerDimer',
data_source = "n003",
threshold = 80,
cache_data = TRUE),
'n005' =
list(name = 'fwd_seqLength',
op = 'seqLength',
data_source = "n004",
threshold = min_read_length,
cache_data = TRUE),
'n006' =
list(name = 'fwd_qualTrim',
op = 'qualTrim',
data_source = "n005",
avg_qual = 20,
bad_base_threshold = 10,
max_bad_bases = 0.15,
cache_data = TRUE),
'n007' =
list(name = 'fwd_trimAffixes',
op = 'trimAffixes',
data_source = "n006",
primer_seq = fwd_primer_seq,
primer_lens = fwd_primer_lens,
min_score = fwd_min_score,
primer_location = 'front',
front_gaps_allowed = front_gaps_allowed,
cache_data = TRUE),
'n008' =
list(name = 'rev_loadData',
op = 'loadData',
data_source = rev_file,
max_seq = max_seq,
cache_data = TRUE),
'n009' =
list(name = 'rev_basicQC',
op = 'basicQC',
data_source = "n008",
cache_data = FALSE),
'n010' =
list(name = 'rev_ambigSeqs',
op = 'ambigSeqs',
data_source = "n008",
threshold = 0.02,
cache_data = TRUE),
'n011' =
list(name = 'rev_primerDimer',
op = 'primerDimer',
data_source = "n010",
threshold = 80,
cache_data = TRUE),
'n012' =
list(name = 'rev_seqLength',
op = 'seqLength',
data_source = "n011",
threshold = min_read_length,
cache_data = TRUE),
'n013' =
list(name = 'rev_qualTrim',
op = 'qualTrim',
data_source = "n012",
avg_qual = 20,
bad_base_threshold = 10,
max_bad_bases = 0.15,
cache_data = TRUE),
'n014' =
list(name = 'rev_trimAffixes',
op = 'trimAffixes',
data_source = "n013",
primer_seq = rev_primer_seq,
primer_lens = rev_primer_lens,
min_score = rev_min_score,
primer_location = 'front',
front_gaps_allowed = front_gaps_allowed,
cache_data = TRUE),
'n015' =
list(name = 'fwd_extractPIDs',
op = 'extractPIDs',
data_source = "n007",
pid_in_which_fragment = fwd_pid_in_which_fragment,
pattern_to_chop_from_names = pattern_to_chop_from_names,
pid_gaps_allowed = 0,
cache_data = TRUE),
'n016' =
list(name = 'rev_extractPIDs',
op = 'extractPIDs',
data_source = "n014",
pid_in_which_fragment = rev_pid_in_which_fragment,
pattern_to_chop_from_names = pattern_to_chop_from_names,
pid_gaps_allowed = 0,
cache_data = TRUE),
'n017' =
list(name = 'matchPairs',
op = 'matchPairs',
data_source = c("fwd" = "n015", "rev" = "n016"),
#header_format = header_format,
cache_data = TRUE),
'n018' =
list(name = 'processBadPIDs',
op = 'processBadPIDs',
data_source = "n017",
cache_data = TRUE),
'n019' =
list(name = 'mergePEAR',
op = 'mergePEAR',
data_source = "n018",
cache_data = TRUE),
'n020' =
list(name = 'merge_qualTrim',
op = 'qualTrim',
data_source = "n019",
avg_qual = 20,
bad_base_threshold = 10,
max_bad_bases = 0.05,
cache_data = TRUE),
'n021' =
list(name = 'binSizeCheck',
op = 'binSizeCheck',
data_source = "n020",
min_bin_size = 3,
cache_data = TRUE),
'n022' =
list(name = 'alignBinsMSA',
op = 'alignBinsMSA',
bins_to_process = bins_to_process,
data_source = "n021",
cache_data = TRUE),
'n023' =
list(name = 'buildConsensus',
op = 'buildConsensus',
data_source = "n022",
cache_data = TRUE),
'n024' =
list(name = 'primerSeqErr',
op = 'primerSeqErr',
data_source = c("fwd" = "n007", "rev" = "n014"),
cache_data = FALSE),
'n025' =
list(name = 'binSeqErr',
op = 'binSeqErr',
data_source = c("bin_msa_merged" = "n022", "cons_merged" = "n023", "primer_err" = "n024"),
cache_data = FALSE),
'n026' =
list(name = 'removeGaps',
op = 'removeChars',
data_source = "n023",
char_to_remove = "-",
cache_data = TRUE
),
'n027' =
list(name = 'cons_ambigSeqs',
op = 'ambigSeqs',
data_source = "n026",
threshold = 0,
cache_data = TRUE),
'n028' =
list(name = 'cons_seqLength',
op = 'seqLength',
data_source = "n027",
threshold = merged_read_length,
cache_data = TRUE),
'n029' =
list(name = 'vsearchCluster98',
op = 'vsearchCluster',
data_source = "n028",
id = 0.98,
min_clus_size = 1,
cache_data = TRUE),
'n030' =
list(name = 'vsearchCluster95',
op = 'vsearchCluster',
data_source = "n029",
id = 0.95,
min_clus_size = 1,
cache_data = TRUE),
'n100' =
list(name = 'dataTracing',
op = 'dataTracing',
data_source = c(
"fwdReads.1" = "n001", "fwdReads.2" = "n003", "fwdReads.3" = "n004",
"fwdReads.4" = "n005", "fwdReads.5" = "n006", "fwdReads.6" = "n007",
"revReads.1" = "n008", "revReads.2" = "n010", "revReads.3" = "n011",
"revReads.4" = "n012", "revReads.5" = "n013", "revReads.6" = "n014",
"mergeReads.01" = "n017", "mergeReads.02" = "n018", "mergeReads.03" = "n019",
"mergeReads.04" = "n020", "mergeReads.05" = "n021", "mergeReads.06" = "n022",
"mergeReads.07" = "n023", "mergeReads.08" = "n027", "mergeReads.09" = "n028",
"mergeReads.10" = "n029", "mergeReads.11" = "n030"),
cache_data = FALSE),
'n101' =
list(name = 'prepConfig',
op = 'prepConfig',
cache_data = FALSE)
)
return(list(operation_list = operation_list,
output_dir = output_dir,
base_for_names = base_for_names,
header_format = header_format,
intermediate_reports = intermediate_reports,
verbosity = verbosity,
erase_history = erase_history,
report_type = report_type,
ncpu = ncpu))
}
#' Test config builder for non-overlapping reads
#' @export
buildConfig_nol_test <- function(fwd_file, fwd_primer_seq, fwd_primer_lens, fwd_min_score,
rev_file, rev_primer_seq, rev_primer_lens, rev_min_score,
fwd_pid_in_which_fragment, rev_pid_in_which_fragment,
max_seq = NULL,
min_read_length = 295,
pattern_to_chop_from_names = ' [0-9]:N:[0-9]*:[0-9]*$',
output_dir = "/fridge/data/MotifBinner2_test",
base_for_names = "CAP129_2040_009wpi_C2C3",
intermediate_reports = TRUE,
erase_history = TRUE,
verbosity = 3,
report_type = c('html'),
ncpu = 4,
bins_to_process = Inf,
header_format = 'MiSeq',
front_gaps_allowed = 0
)
{
if (!is.null(fwd_pid_in_which_fragment)){
if (fwd_pid_in_which_fragment == "NULL"){fwd_pid_in_which_fragment <- NULL}
}
if (!is.null(fwd_pid_in_which_fragment)){
if (rev_pid_in_which_fragment == "NULL"){rev_pid_in_which_fragment <- NULL}
}
operation_list = list(
'n001' =
list(name = 'fwd_loadData',
op = 'loadData',
data_source = fwd_file,
max_seq = max_seq,
cache_data = TRUE),
'n002' =
list(name = 'fwd_basicQC',
op = 'basicQC',
data_source = "n001",
cache_data = FALSE),
'n003' =
list(name = 'fwd_ambigSeqs',
op = 'ambigSeqs',
data_source = "n001",
threshold = 0.02,
cache_data = TRUE),
'n004' =
list(name = 'fwd_primerDimer',
op = 'primerDimer',
data_source = "n003",
threshold = 80,
cache_data = TRUE),
'n005' =
list(name = 'fwd_seqLength',
op = 'seqLength',
data_source = "n004",
threshold = min_read_length,
cache_data = TRUE),
'n006' =
list(name = 'fwd_qualTrim',
op = 'qualTrim',
data_source = "n005",
avg_qual = 20,
bad_base_threshold = 10,
max_bad_bases = 0.15,
cache_data = TRUE),
'n007' =
list(name = 'fwd_trimAffixes',
op = 'trimAffixes',
data_source = "n006",
primer_seq = fwd_primer_seq,
primer_lens = fwd_primer_lens,
min_score = fwd_min_score,
primer_location = 'front',
front_gaps_allowed = front_gaps_allowed,
cache_data = TRUE),
'n008' =
list(name = 'rev_loadData',
op = 'loadData',
data_source = rev_file,
max_seq = max_seq,
cache_data = TRUE),
'n009' =
list(name = 'rev_basicQC',
op = 'basicQC',
data_source = "n008",
cache_data = FALSE),
'n010' =
list(name = 'rev_ambigSeqs',
op = 'ambigSeqs',
data_source = "n008",
threshold = 0.02,
cache_data = TRUE),
'n011' =
list(name = 'rev_primerDimer',
op = 'primerDimer',
data_source = "n010",
threshold = 80,
cache_data = TRUE),
'n012' =
list(name = 'rev_seqLength',
op = 'seqLength',
data_source = "n011",
threshold = min_read_length,
cache_data = TRUE),
'n013' =
list(name = 'rev_qualTrim',
op = 'qualTrim',
data_source = "n012",
avg_qual = 20,
bad_base_threshold = 10,
max_bad_bases = 0.15,
cache_data = TRUE),
'n014' =
list(name = 'rev_trimAffixes',
op = 'trimAffixes',
data_source = "n013",
primer_seq = rev_primer_seq,
primer_lens = rev_primer_lens,
min_score = rev_min_score,
primer_location = 'front',
front_gaps_allowed = front_gaps_allowed,
cache_data = TRUE),
'n015' =
list(name = 'fwd_extractPIDs',
op = 'extractPIDs',
data_source = "n007",
pid_in_which_fragment = fwd_pid_in_which_fragment,
pattern_to_chop_from_names = pattern_to_chop_from_names,
pid_gaps_allowed = 0,
cache_data = TRUE),
'n016' =
list(name = 'rev_extractPIDs',
op = 'extractPIDs',
data_source = "n014",
pid_in_which_fragment = rev_pid_in_which_fragment,
pattern_to_chop_from_names = pattern_to_chop_from_names,
pid_gaps_allowed = 0,
cache_data = TRUE),
'n017' =
list(name = 'matchPairs',
op = 'matchPairs',
data_source = c("fwd" = "n015", "rev" = "n016"),
#header_format = header_format,
cache_data = TRUE),
'n018' =
list(name = 'processBadPIDs',
op = 'processBadPIDs',
data_source = "n017",
cache_data = TRUE),
'n019' =
list(name = 'fwd_extractReads',
op = 'extractData',
data_source = "n018",
extract_levels = c("seq_dat", "fwd"),
cache_data = TRUE),
'n020' =
list(name = 'fwd_alignBinsMSA',
op = 'alignBinsMSA',
bins_to_process = bins_to_process,
data_source = "n019",
cache_data = TRUE),
'n021' =
list(name = 'fwd_buildConsensus',
op = 'buildConsensus',
data_source = "n020",
cache_data = TRUE),
'n022' =
list(name = 'fwd_removeGaps',
op = 'removeChars',
data_source = "n021",
char_to_remove = "-",
cache_data = TRUE
),
'n023' =
list(name = 'rev_extractReads',
op = 'extractData',
data_source = "n018",
extract_levels = c("seq_dat", "rev"),
cache_data = TRUE),
'n024' =
list(name = 'rev_alignBinsMSA',
op = 'alignBinsMSA',
bins_to_process = bins_to_process,
data_source = "n023",
cache_data = TRUE),
'n025' =
list(name = 'rev_buildConsensus',
op = 'buildConsensus',
data_source = "n024",
cache_data = TRUE),
'n026' =
list(name = 'rev_removeGaps',
op = 'removeChars',
data_source = "n025",
char_to_remove = "-",
cache_data = TRUE
),
'n030' =
list(name = 'primerSeqErr',
op = 'primerSeqErr',
data_source = c("fwd" = "n007", "rev" = "n014"),
cache_data = FALSE),
'n031' =
list(name = 'binSeqErr',
op = 'binSeqErr',
data_source = c("bin_msa_fwd" = "n020", "bin_msa_rev" = "n024",
"cons_fwd" = "n021", "cons_rev" = "n025",
"primer_err" = "n030"),
cache_data = FALSE),
# todo binSeqErr for non-overlapping reads
'n100' =
list(name = 'dataTracing',
op = 'dataTracing',
data_source = c(
"fwdReads.01" = "n001", "fwdReads.02" = "n003", "fwdReads.03" = "n004",
"fwdReads.04" = "n005", "fwdReads.05" = "n006", "fwdReads.06" = "n007",
"fwdReads.07" = "n017", "fwdReads.08" = "n018", "fwdReads.09" = "n020",
"fwdReads.10" = "n021",
"revReads.01" = "n008", "revReads.02" = "n010", "revReads.03" = "n011",
"revReads.04" = "n012", "revReads.05" = "n013", "revReads.06" = "n014",
"revReads.07" = "n017", "revReads.08" = "n018", "revReads.09" = "n024",
"revReads.10" = "n025"),
cache_data = FALSE),
'n101' =
list(name = 'prepConfig',
op = 'prepConfig',
cache_data = FALSE)
)
config <- list(operation_list = operation_list,
output_dir = output_dir,
base_for_names = base_for_names,
header_format = header_format,
intermediate_reports = intermediate_reports,
verbosity = verbosity,
erase_history = erase_history,
report_type = report_type,
ncpu = ncpu)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.