## Requirement: "tidyverse"
inter_bin_analysis = function(inter_bin_data_list, bin_data, similarity_threshold){
## Initialisation
options(dplyr.summarise.inform = F)
## Making a dataframe with the number of accession per BIN
cat("\n")
cat("Computing reference matrix:")
number_accession_per_bin = sapply(split(bin_data, bin_data$BIN),
function(x) length(unique(x$ACCESSION)))
number_accession_per_bin = tibble(data.frame(BIN_INDEX = 1:length(number_accession_per_bin),
BIN = names(number_accession_per_bin),
NUMBER_ACCESSION = number_accession_per_bin))
## Making a matrix of all combinations
multiplied_combinations = number_accession_per_bin$NUMBER_ACCESSION %o% number_accession_per_bin$NUMBER_ACCESSION
multiplied_combinations_all = multiplied_combinations
multiplied_combinations[upper.tri(multiplied_combinations, diag = T)] = NA
cat(" DONE")
cat("\n")
cat("\n")
## Loop to compute the resolution analysis for each metabarcode
cat("Computing detailed analysis for each metabarcode: ")
found = 0
for(i in 1:length(inter_bin_data_list)){
cat(paste0(i, "/", length(inter_bin_data_list), " ... "))
## Making a list with the number of cases per BIN
bin_threshold = subset(inter_bin_data_list[[i]], as.numeric(SIMILARITY) >= similarity_threshold)
bin_list = split(bin_threshold, bin_threshold$BIN1)
undiscrimined_cases_list = list()
undiscrimined_cases_list = lapply(bin_list, function(x) sapply(split(x, x$BIN2), nrow))
if(length(undiscrimined_cases_list) > 0){
## Making a dataframe with the mean similarity and the standard error for each comparison
bin_similarity = bin_threshold %>% group_by(BIN1, BIN2) %>%
summarise(MEAN_SIMILARITY_UNDISCRIMINED_CASES = mean(SIMILARITY),
SD_SIMILARITY_UNDISCRIMINED_CASES = sd(SIMILARITY))
## Making a matrix to remove duplicated cases (i.e. BIN1 - BIN2 & BIN2 - BIN1 = same combination)
matrix_inter_bin = xtabs(unlist(undiscrimined_cases_list) ~
rep(names(undiscrimined_cases_list), times = lengths(undiscrimined_cases_list)) +
unlist(lapply(undiscrimined_cases_list, names)))
matrix_inter_bin[upper.tri(matrix_inter_bin, diag = T)] = NA
inter_bin = as.data.frame(matrix_inter_bin)
colnames(inter_bin) = c("BIN1", "BIN2", "NUMBER_UNDISCRIMINED_CASES")
inter_bin = tibble(subset(inter_bin, as.numeric(NUMBER_UNDISCRIMINED_CASES) > 0))
## Adding BIN index for a fast subset of the matrix
inter_bin_index = merge(inter_bin, number_accession_per_bin[,1:2], by.x = "BIN1", by.y = "BIN", suffixes = c("1","2"))
inter_bin_index = merge(inter_bin_index, number_accession_per_bin[,1:2], by.x = "BIN2", by.y = "BIN", suffixes = c("1","2"))
inter_bin_index = left_join(inter_bin, inter_bin_index, by = c("BIN1", "BIN2"))
## Searching informations for each BIN combination
total_cases_undiscrimined = list()
for(j in 1:nrow(inter_bin_index)){
total_cases_undiscrimined[j] = multiplied_combinations_all[inter_bin_index[j,]$BIN_INDEX1, inter_bin_index[j,]$BIN_INDEX2]
}
## Creating and ordering the final detailed table
inter_bin_primer = tibble(cbind(PRIMERS = names(inter_bin_data_list)[i], inter_bin,
TOTAL_NUMBER_CASES = unlist(total_cases_undiscrimined),
PERCENT_UNDISCRIMINED_CASES = inter_bin$NUMBER_UNDISCRIMINED_CASES /
unlist(total_cases_undiscrimined) * 100))
inter_bin_primer = tibble(merge(inter_bin_primer, bin_similarity))
inter_bin_primer = tibble(cbind(inter_bin_primer[,3], inter_bin_primer[,1:2], inter_bin_primer[,4:8]))
inter_bin_primer = inter_bin_primer[order(-inter_bin_primer$TOTAL_NUMBER_CASES,
-inter_bin_primer$PERCENT_UNDISCRIMINED_CASES),]
if(nrow(inter_bin_primer) > 0) found = found + 1
else found = 0
}
if(found == 1) inter_bin_final = inter_bin_primer
else if(as.numeric(found) > 1) inter_bin_final = rbind(inter_bin_final, inter_bin_primer)
}
cat("\n")
cat("\n")
cat("Computing summary for each metabarcode:")
## Computing the summary table
total_number_cases = sum(multiplied_combinations[lower.tri(multiplied_combinations, diag = F)])
total_number_bin_pair = length(multiplied_combinations[lower.tri(multiplied_combinations, diag = F)])
if(found != 0){
inter_bin_final_list = split(inter_bin_final, inter_bin_final$PRIMERS)
inter_bin_final_list = lapply(inter_bin_final_list, function(x) x[!duplicated(x),])
primers = names(inter_bin_final_list)
undiscrimined_cases = sapply(inter_bin_final_list, function(x) sum(x$NUMBER_UNDISCRIMINED_CASES))
undiscrimined_bin_pair = sapply(inter_bin_final_list, nrow)
discrimined_bin_pair = total_number_bin_pair - undiscrimined_bin_pair
mean_undiscrimined_cases_per_bin = list()
for(m in 1:length(inter_bin_final_list)) { mean_undiscrimined_cases_per_bin[m] = mean(c(inter_bin_final_list[[m]]$NUMBER_UNDISCRIMINED_CASES,
rep(0, discrimined_bin_pair[m]))) }
mean_percent_undiscrimined_cases_per_bin = list()
for(n in 1:length(inter_bin_final_list)) { mean_percent_undiscrimined_cases_per_bin[n] = mean(c(inter_bin_final_list[[n]]$PERCENT_UNDISCRIMINED_CASES,
rep(0, discrimined_bin_pair[n]))) }
}
else{
undiscrimined_cases = rep(0, length(inter_bin_data_list))
undiscrimined_bin_pair = rep(0, length(inter_bin_data_list))
mean_undiscrimined_cases_per_bin = as.list(rep(0, length(inter_bin_data_list)))
mean_percent_undiscrimined_cases_per_bin = as.list(rep(0, length(inter_bin_data_list)))
primers = names(inter_bin_data_list)
}
summary_inter_bin = tibble(data.frame(PRIMERS = primers,
NUMBER_WRONG_UNDISCRIMINED_CASES = undiscrimined_cases,
PERCENT_WRONG_UNDISCRIMINED_CASES = round(undiscrimined_cases / total_number_cases * 100, 3),
NUMBER_BIN_PAIRS_WITH_WRONG_UNDISCRIMINED_CASES = undiscrimined_bin_pair,
PERCENT_BIN_PAIRS_WITH_WRONG_UNDISCRIMINED_CASES = round(undiscrimined_bin_pair / total_number_bin_pair * 100, 3),
MEAN_NUMBER_WRONG_UNDISCRIMINED_CASES_PER_BIN_PAIRS = round(unlist(mean_undiscrimined_cases_per_bin), 3),
MEAN_PERCENT_WRONG_UNDISCRIMINED_CASES_PER_BIN_PAIRS = round(unlist(mean_percent_undiscrimined_cases_per_bin), 3)))
summary_inter_bin = tibble(merge(tibble(PRIMERS = names(inter_bin_data_list)), summary_inter_bin, all = T))
summary_inter_bin = summary_inter_bin[order(summary_inter_bin$PRIMERS),]
summary_inter_bin[is.na(summary_inter_bin)] = 0
cat(" DONE")
cat("\n")
if(found != 0) {
inter_bin_final = inter_bin_final[!duplicated(inter_bin_final),]
return(list(SUMMARY = summary_inter_bin, DETAILS_UNDISCRIMINED_PAIRS = inter_bin_final))
}
else return(list(SUMMARY = summary_inter_bin))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.