## Requirement: "tibble" + "dplyr"
multiple_evaluations = function(eco_taxo_data, bin_data, metabarcode_data,
environment_list, climate_list, order_list,
intra_bin_multi_list, inter_bin_multi_list,
specific_pcr_fish, unspecific_pcr_all,
specific_pcr_fish_reference,
similarity_reference, reference_data,
save_taxo_analysis = F, save_size_subset = F){
score_list = vector(mode = "list", length = length(similarity_reference$THRESHOLD))
names(score_list) = similarity_reference$THRESHOLD
score_list = lapply(score_list, function(x) {x = vector(mode = "list", length = length(environment_list)) })
score_list = lapply(score_list, function(x) lapply(x, function(y) y = vector(mode = "list", length = length(order_list))))
taxo_list = vector(mode = "list", length = length(similarity_reference$THRESHOLD))
names(taxo_list) = similarity_reference$THRESHOLD
taxo_list = lapply(taxo_list, function(x) {x = vector(mode = "list", length = length(environment_list)) })
taxo_list = lapply(taxo_list, function(x) lapply(x, function(y) y = vector(mode = "list", length = length(order_list))))
for(similarity in similarity_reference$THRESHOLD){
names(score_list)[100 - similarity] = similarity
names(taxo_list)[100 - similarity] = similarity
for(i in 1:length(environment_list)){
names(score_list[[100 - similarity]])[i] = paste0(environment_list[[i]], "_", climate_list[[i]])
names(taxo_list[[100 - similarity]])[i] = paste0(environment_list[[i]], "_", climate_list[[i]])
for(j in 1:length(order_list)){
subset_found = F
if(order_list[[j]] == "All"){
if(all(environment_list[[i]] == "both") && all(climate_list[[i]] == "both")){
subset_data = eco_taxo_data
subset_specific_pcr_fish = specific_pcr_fish
subset_specific_pcr_fish_reference = specific_pcr_fish_reference
subset_unspecific_pcr_all = unspecific_pcr_all
}
else {
subset_data = subset(eco_taxo_data, ENVIRONMENT %in% environment_list[[i]] & CLIMATE %in% climate_list[[i]])
subset_specific_pcr_fish = subset(specific_pcr_fish, ENVIRONMENT %in% environment_list[[i]] & CLIMATE %in% climate_list[[i]])
subset_specific_pcr_fish_reference = subset(specific_pcr_fish_reference, ENVIRONMENT %in% environment_list[[i]] & CLIMATE %in% climate_list[[i]])
subset_unspecific_pcr_all = subset(unspecific_pcr_all, ENVIRONMENT %in% environment_list[[i]] & CLIMATE %in% climate_list[[i]])
}
}
else if(order_list[[j]] == "Others"){
if(all(environment_list[[i]] == "both") && all(climate_list[[i]] == "both")){
subset_data = subset(eco_taxo_data, !(ORDER %in% unlist(order_list)))
subset_specific_pcr_fish = subset(specific_pcr_fish, !(ORDER %in% unlist(order_list)))
subset_specific_pcr_fish_reference = subset(specific_pcr_fish_reference, !(ORDER %in% unlist(order_list)))
subset_unspecific_pcr_all = unspecific_pcr_all
}
else {
subset_data = subset(eco_taxo_data, ENVIRONMENT %in% environment_list[[i]] & CLIMATE %in% climate_list[[i]] &
!(ORDER %in% unlist(order_list)))
subset_specific_pcr_fish = subset(specific_pcr_fish, ENVIRONMENT %in% environment_list[[i]] & CLIMATE %in% climate_list[[i]] &
!(ORDER %in% unlist(order_list)))
subset_specific_pcr_fish_reference = subset(specific_pcr_fish_reference, ENVIRONMENT %in% environment_list[[i]] & CLIMATE %in% climate_list[[i]] &
!(ORDER %in% unlist(order_list)))
subset_unspecific_pcr_all = subset(unspecific_pcr_all, ENVIRONMENT %in% environment_list[[i]] & CLIMATE %in% climate_list[[i]])
}
}
else {
if(all(environment_list[[i]] == "both") && all(climate_list[[i]] == "both")){
subset_data = subset(eco_taxo_data, ORDER == order_list[[j]])
subset_specific_pcr_fish = subset(specific_pcr_fish, ORDER == order_list[[j]])
subset_specific_pcr_fish_reference = subset(specific_pcr_fish_reference, ORDER == order_list[[j]])
subset_unspecific_pcr_all = unspecific_pcr_all
}
else {
subset_data = subset(eco_taxo_data, ENVIRONMENT %in% environment_list[[i]] & CLIMATE %in% climate_list[[i]] &
ORDER == order_list[[j]])
subset_specific_pcr_fish = subset(specific_pcr_fish, ENVIRONMENT %in% environment_list[[i]] & CLIMATE %in% climate_list[[i]] &
ORDER == order_list[[j]])
subset_specific_pcr_fish_reference = subset(specific_pcr_fish_reference, ENVIRONMENT %in% environment_list[[i]] & CLIMATE %in% climate_list[[i]] &
ORDER == order_list[[j]])
subset_unspecific_pcr_all = subset(unspecific_pcr_all, ENVIRONMENT %in% environment_list[[i]] & CLIMATE %in% climate_list[[i]])
}
}
subset_metabarcodes = subset(metabarcode_data, ACCESSION %in% subset_data$ACCESSION)
if(length(climate_list) == 1 && length(environment_list) == 1 && length(order_list) == 1){
subset_inter_bin = inter_bin_multi_list
subset_intra_bin = intra_bin_multi_list
}
else{
subset_inter_bin = inter_bin_multi_list[[paste0(names(environment_list)[i], "_", order_list[[j]])]]
subset_intra_bin = intra_bin_multi_list[[paste0(names(environment_list)[i], "_", order_list[[j]])]]
}
if(!is.null(subset_inter_bin) && !is.null(subset_intra_bin) && all(class(subset_inter_bin) != "numeric") &&
all(class(subset_intra_bin) != "numeric")){
subset_found = T
if(length(subset_inter_bin) == 1) subset_inter_bin_list = subset_inter_bin[[1]]
else subset_inter_bin_list = subset_inter_bin
if(length(subset_intra_bin) == 1) subset_intra_bin_list = subset_intra_bin[[1]]
else subset_intra_bin_list = subset_intra_bin
subset_intra_bin_list = list(SIMILARITY = subset_intra_bin_list$SIMILARITY,
THRESHOLD = subset_intra_bin_list$THRESHOLD[[
similarity_reference[which(similarity_reference$THRESHOLD == similarity), ]$INDEX]])
cat(paste0("Performing primers evaluation for ", order_list[[j]], " in the region ", names(environment_list)[i],
" with a similarity threshold of ", similarity, "%."))
cat("\n")
cat("\n")
if(length(subset_inter_bin_list) != length(unique(metabarcode_data$PRIMERS))) stop("Missing element in subset_inter_bin_list. Modify function!")
if(length(subset_inter_bin_list) != length(unique(metabarcode_data$PRIMERS))) stop("Missing element in subset_intra_bin_list. Modify function!")
score = evaluate_primers(specific_pcr_fish = subset_specific_pcr_fish,
unspecific_pcr_all = subset_unspecific_pcr_all,
specific_pcr_fish_reference = subset_specific_pcr_fish_reference,
metabarcode_data = subset_metabarcodes,
intra_bin_list = subset_intra_bin_list,
inter_bin_list = subset_inter_bin_list,
reference_data = reference_data,
taxo_bin_data = subset_data,
similarity_threshold = similarity,
plot = F)
if(save_taxo_analysis) {
subset_metabarcodes_taxo = tibble(merge(subset_metabarcodes, subset_data))
number_cases_intra_bin = subset(subset_metabarcodes_taxo, PRIMERS == unique(subset_metabarcodes_taxo$PRIMERS)[1]) %>% group_by(BIN) %>% summarise(N = n())
number_cases_intra_bin = sapply(split(number_cases_intra_bin, number_cases_intra_bin$BIN),
function(x) length(matrix(ncol = x$N, nrow = x$N)[lower.tri(matrix(ncol = x$N, nrow = x$N), diag = F)]))
total_number_cases_intra_bin = sum(number_cases_intra_bin)
number_cases_inter_bin = sapply(split(subset_data, subset_data$BIN),
function(x) length(unique(x$ACCESSION)))
number_cases_inter_bin = tibble(data.frame(BIN_INDEX = 1:length(number_cases_inter_bin),
BIN = names(number_cases_inter_bin),
NUMBER_ACCESSION = number_cases_inter_bin))
number_cases_inter_bin = number_cases_inter_bin$NUMBER_ACCESSION %o% number_cases_inter_bin$NUMBER_ACCESSION
total_number_cases_inter_bin = sum(number_cases_inter_bin[lower.tri(number_cases_inter_bin, diag = F)])
taxonomic_resolution_summary = tibble(merge(score$INTRA_BIN_ANALYSIS[,1:3], score$INTER_BIN_ANALYSIS[,1:3]))
taxonomic_resolution_summary$INTRA_BIN_NUMBER_CASES = total_number_cases_intra_bin
taxonomic_resolution_summary$INTER_BIN_NUMBER_CASES = total_number_cases_inter_bin
taxonomic_resolution_summary$TOTAL_NUMBER_CASES = total_number_cases_intra_bin + total_number_cases_inter_bin
taxonomic_resolution_summary$NUMBER_MISIDENTIFIED_CASES = taxonomic_resolution_summary$NUMBER_WRONG_DISCRIMINED_CASES +
taxonomic_resolution_summary$NUMBER_WRONG_UNDISCRIMINED_CASES
taxonomic_resolution_summary$PERCENT_MISIDENTIFIED_CASES = (taxonomic_resolution_summary$NUMBER_MISIDENTIFIED_CASES /
(taxonomic_resolution_summary$TOTAL_NUMBER_CASES)) * 100
taxonomic_resolution_summary$PERCENT_MISIDENTIFIED_CASES = (taxonomic_resolution_summary$NUMBER_MISIDENTIFIED_CASES /
(taxonomic_resolution_summary$TOTAL_NUMBER_CASES)) * 100
taxonomic_resolution_summary$PERCENT_MISIDENTIFIED_CASES = (taxonomic_resolution_summary$NUMBER_MISIDENTIFIED_CASES /
(taxonomic_resolution_summary$TOTAL_NUMBER_CASES)) * 100
taxonomic_resolution_summary$PERCENT_INTRA_BIN_ERRORS_IN_TOTAL_MISIDENTIFIED_CASES = (taxonomic_resolution_summary$NUMBER_WRONG_DISCRIMINED_CASES /
(taxonomic_resolution_summary$TOTAL_NUMBER_CASES)) * 100
taxonomic_resolution_summary$PERCENT_INTER_BIN_ERRORS_IN_TOTAL_MISIDENTIFIED_CASES = (taxonomic_resolution_summary$NUMBER_WRONG_UNDISCRIMINED_CASES /
(taxonomic_resolution_summary$TOTAL_NUMBER_CASES)) * 100
colnames(taxonomic_resolution_summary)[c(3,5)] = c("PERCENT_INTRA_BIN_ERRORS", "PERCENT_INTER_BIN_ERRORS")
taxonomic_resolution_summary$PERCENT_MISIDENTIFIED_CASES_PER_ANALYSIS = taxonomic_resolution_summary$PERCENT_INTRA_BIN_ERRORS +
taxonomic_resolution_summary$PERCENT_INTER_BIN_ERRORS
taxonomic_resolution_summary = tibble(cbind(taxonomic_resolution_summary[,1:2], taxonomic_resolution_summary[,6], taxonomic_resolution_summary[,3:4],
taxonomic_resolution_summary[,7], taxonomic_resolution_summary[,5], taxonomic_resolution_summary[,13],
taxonomic_resolution_summary[,9], taxonomic_resolution_summary[,8], taxonomic_resolution_summary[,10],
taxonomic_resolution_summary[,11:12]))
taxo = taxonomic_resolution_summary
}
if(save_size_subset) score = tibble(cbind(ORDER = order_list[[j]], FISH_DNA = nrow(subset_metabarcodes),
FISH_PCR = nrow(subset_specific_pcr_fish),
NON_FISH_PCR = nrow(subset_unspecific_pcr_all), score$SCORES_LONG))
else score = tibble(cbind(ORDER = rep(order_list[[j]], nrow(score$SCORES_LONG)), score$SCORES_LONG))
if(subset_found) {
score_list[[100 - similarity]][[i]][[j]] = score
names(score_list[[100 - similarity]][[i]])[j] = order_list[[j]]
if(save_taxo_analysis){
taxo_list[[100 - similarity]][[i]][[j]] = taxo
names(taxo_list[[100 - similarity]][[i]])[j] = order_list[[j]]
}
}
}
else if(similarity == max(similarity_reference$THRESHOLD))
warning(paste0("No sequences of ", order_list[[j]], " exist for the region ", names(environment_list)[i]))
}
}
}
final_scores = dplyr::bind_rows(lapply(score_list, function(x) bind_rows(lapply(x, function(y)
bind_rows(y, .id = "ORDER")), .id = "CLIMATE")), .id = "SIMILARITY")
final_scores$ENVIRONMENT = word(final_scores$CLIMATE, 1, sep = fixed("_"))
final_scores$CLIMATE = word(final_scores$CLIMATE, 2, sep = fixed("_"))
if(save_size_subset) final_scores = tibble(cbind(final_scores[,1:2], final_scores[,10], final_scores[,3:9]))
else final_scores = tibble(cbind(final_scores[,1:2], final_scores[,7], final_scores[,3:6]))
if(save_taxo_analysis){
final_taxo = dplyr::bind_rows(lapply(taxo_list, function(x) bind_rows(lapply(x, function(y)
bind_rows(y, .id = "ORDER")), .id = "CLIMATE")), .id = "SIMILARITY")
final_taxo$ENVIRONMENT = word(final_taxo$CLIMATE, 1, sep = fixed("_"))
final_taxo$CLIMATE = word(final_taxo$CLIMATE, 2, sep = fixed("_"))
final_taxo = tibble(cbind(final_taxo[,1:2], final_taxo[,17], final_taxo[,3:16]))
}
if(!save_taxo_analysis) return(final_scores)
else return(list(SCORES = final_scores, TAXO = final_taxo))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.