## Requirement: "dplyr"
multiple_resolution_analysis = function(eco_taxo_data, bin_data, metabarcode_data, type, environment_list, climate_list, order_list,
max_ratio_mean_length = 0.5, min_similarity_threshold = 90, max_similarity_threshold = 99){
result_list = list()
for(i in 1:length(environment_list)){
for(j in 1:length(order_list)){
cat("-------------------------------------------------------------------\n")
cat("-------------------------------------------------------------------\n")
cat(paste0("Environment: ", paste0(environment_list[[i]], collapse = "/"), " - Climate: ",
paste0(climate_list[[i]], collapse = "/"), " - Order: ", order_list[[j]]))
cat("\n")
cat("\n")
if(order_list[[j]] == "All"){
if(all(environment_list[[i]] == "both") && all(climate_list[[i]] == "both"))
subset_data = eco_taxo_data
else subset_data = subset(eco_taxo_data, ENVIRONMENT %in% environment_list[[i]] & CLIMATE %in% climate_list[[i]])
}
else if(order_list[[j]] == "Others"){
if(all(environment_list[[i]] == "both") && all(climate_list[[i]] == "both"))
subset_data = subset(eco_taxo_data, !(ORDER %in% unlist(order_list)))
else subset_data = subset(eco_taxo_data, ENVIRONMENT %in% environment_list[[i]] & CLIMATE %in% climate_list[[i]] &
!(ORDER %in% unlist(order_list)))
}
else {
if(all(environment_list[[i]] == "both") && all(climate_list[[i]] == "both"))
subset_data = subset(eco_taxo_data, ORDER == order_list[[j]])
else subset_data = subset(eco_taxo_data, ENVIRONMENT %in% environment_list[[i]] & CLIMATE %in% climate_list[[i]] &
ORDER == order_list[[j]])
}
subset_metabarcodes = subset(metabarcode_data, ACCESSION %in% subset_data$ACCESSION)
subset_bin_data = subset(bin_data, ACCESSION %in% subset_data$ACCESSION)
if(nrow(subset_metabarcodes) != 0){
subset_metabarcodes = subset(metabarcode_data, ACCESSION %in% subset_data$ACCESSION)
if(type == "intra") {
intra_bin_comparisons = intra_taxa_similarity(subset_metabarcodes, subset_bin_data, subset_data, taxa = "BIN")
number_sequences_compared_per_bin = subset_bin_data %>% group_by(BIN) %>% summarise(N = n())
number_comparisons_per_bin = sapply(split(number_sequences_compared_per_bin, number_sequences_compared_per_bin$BIN),
function(x) length(matrix(ncol = x$N, nrow = x$N)[lower.tri(matrix(ncol = x$N, nrow = x$N), diag = F)]))
reference_bin = tibble(BIN = names(number_comparisons_per_bin), NUMBER_ACCESSION = number_sequences_compared_per_bin$N,
TOTAL_NUMBER_CASES = c(number_comparisons_per_bin))
intra_bin_similarity = bind_rows(lapply(intra_bin_comparisons, function(y) bind_rows(lapply(split(y, y$BIN1), function(x)
tibble(MIN_SIMILARITY = summary(x$SIMILARITY)[1], Q1_SIMILARITY = summary(x$SIMILARITY)[2],
MEAN_SIMILARITY = summary(x$SIMILARITY)[3], Q3_SIMILARITY = summary(x$SIMILARITY)[4],
MAX_SIMILARITY = summary(x$SIMILARITY)[5],
SD_SIMILARITY = sd(x$SIMILARITY))), .id = "BIN")), .id = "PRIMERS")
intra_bin_similarity = tibble(merge(intra_bin_similarity, reference_bin))
intra_bin_similarity = tibble(cbind(intra_bin_similarity[,2], intra_bin_similarity[,1], intra_bin_similarity[,10], intra_bin_similarity[,3:9]))
intra_bin_threshold = list()
for(similarity in min_similarity_threshold:max_similarity_threshold){
intra_bin_threshold[[100 - similarity]] = bind_rows(lapply(intra_bin_comparisons, function(y) bind_rows(lapply(split(y, y$BIN1), function(x)
tibble(NUMBER_ACCESSION = length(unique(c(x$ACCESSION1, x$ACCESSION2))),
NUMBER_DISCRIMINED_CASES = nrow(subset(x, SIMILARITY < similarity))/2,
TOTAL_NUMBER_CASES = nrow(x)/2,
PERCENT_DISCRIMINED_CASES = nrow(subset(x, SIMILARITY < similarity)) / nrow(x) * 100,
MEAN_SIMILARITY_DISCRIMINED_CASES = mean(subset(x, SIMILARITY < similarity)$SIMILARITY),
SD_SIMILARITY_DISCRIMINED_CASES = sd(subset(x, SIMILARITY < similarity)$SIMILARITY))
), .id = "BIN")), .id = "PRIMERS")
names(intra_bin_threshold)[100 - similarity] = similarity
}
result = list(SIMILARITY = intra_bin_similarity, THRESHOLD = intra_bin_threshold)
}
else if(type == "inter") {
similarity_comparisons_list = pairwise_similarity(subset_metabarcodes, subset_bin_data, subset_data,
min_similarity_threshold = min_similarity_threshold,
max_similarity_threshold = max_similarity_threshold,
number_hits_considered = c(0, 0))
result = lapply(similarity_comparisons_list, function(x) x[which(x$BIN1 != x$BIN2),])
}
else stop('The argument "type" must be in "intra" or "inter" for intra-BIN or inter-BIN resolution analysis.')
result_list[[paste0(names(environment_list[i]), "_", order_list[[j]])]] = result
}
else result_list[[paste0(names(environment_list[i]), "_", order_list[[j]])]] = 0
}
}
return(result_list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.