## Requirement: "tidyverse"
evaluate_primers = function(specific_pcr_fish, unspecific_pcr_all, specific_pcr_fish_reference,
metabarcode_data, intra_bin_list, inter_bin_list,
reference_data, taxo_bin_data,
similarity_threshold, plot = T, plot_name = NULL, main_title,
order_genes_graph = c("COI", "12S", "16S", "CytB"),
colours_genes_graph = c("#AD7000", "#C500C1", "#00C4C5", "#CBD400"),
gene_name_space_major = c(1,10,21,22), gene_name_space_minor = c(0,0,15,98),
font_size_title = 22, padding_rect_title = c(25, 5)){
##### INTERNAL FUNCTIONS #####
normalize_scores = function(x, order) {
if(var(x, na.rm = T) != 0 && !all(is.na(c(x)))){
initial = c(x)
if(class(initial) == "list") initial = initial[[1]]
pos_na = which(is.na(x))
pos_no_na = which(!is.na(x))
x = x[!is.na(x)]
x = as.matrix(as.numeric(x))
minAttr = apply(x, 2, min)
maxAttr = apply(x, 2, max)
x = sweep(x, 2, minAttr, FUN = "-")
x = sweep(x, 2, maxAttr-minAttr, "/")
final = replace(initial, pos_no_na, x)
if(order == "asc") return(final)
else if(order == "dsc") return(1 - final)
else stop('The argument "order" can only be "asc" or "dsc".')
}
else{
if(all(x == 0) && order == "dsc") return(rep(1, length(x)))
else if(all(x == 0) && order == "asc") return(rep(0, length(x)))
else if(all(x == 1) && order == "asc") return(rep(1, length(x)))
else if(all(x == 1) && order == "dsc") return(rep(0, length(x)))
else if(all(is.na(x))) return(rep(NA, length(x)))
else {
warning("Impossible to normalize since all values are equal and they are not 0 or 1.")
return(rep(NA, length(x)))
}
}
}
##### INITIALISATION ####
options(dplyr.summarise.inform = F)
reference_primers_df = subset(reference_data, select = PRIMERS)
reference_primers = reference_data$PRIMERS
bin_selected = length(unique(taxo_bin_data$BIN))
##### STABILITY ANALYSIS #####
cat("\n")
cat("----------------STABILITY ANALYSIS---------------")
cat("\n")
start = Sys.time()
## Length of fragments
mean_length = metabarcode_data %>% group_by(PRIMERS, .drop = FALSE) %>%
dplyr::summarise(MEAN_LENGTH = mean(LENGTH), SD_LENGTH = sd(LENGTH))
## Informativness (i.e. concentration of diagnostic nucleotides)
aligned_sequences_splitted = split(metabarcode_data, metabarcode_data$PRIMERS)
consensus_sequences_splitted = lapply(aligned_sequences_splitted, function(x)
ConsensusSequence(DNAStringSet(x$FRAGMENT)))
diagnostic_nucleotides_vector = sapply(consensus_sequences_splitted, function(x)
sum(str_count(paste(x), pattern = names(IUPAC_CODE_MAP)[5:15])) / width(x)[1])
diagnostic_nucleotides = tibble(data.frame(PRIMERS = names(diagnostic_nucleotides_vector),
RATIO_DIAGNOSTIC_NUCLEOTIDES = diagnostic_nucleotides_vector))
## Combining all stability results together
if(nrow(mean_length) != length(reference_primers)) mean_length = tibble(merge(reference_primers_df, mean_length, all = T))
if(nrow(diagnostic_nucleotides) != length(reference_primers)) diagnostic_nucleotides = tibble(merge(reference_primers_df, diagnostic_nucleotides, all = T))
stability_analysis = tibble(cbind(mean_length, diagnostic_nucleotides[,2]))
end = Sys.time()
duration = difftime(end, start)
cat("\n")
cat(paste("Time taken:", round(duration[[1]], 2), units(duration), "\n"))
cat("\n")
cat("-------------------------------------------------\n")
cat("\n")
##### TAXONOMIC RESOLUTION ANALYSIS #####
cat("\n")
cat("----------------TAXONOMIC ANALYSIS---------------")
cat("\n")
## Intra-BIN resolution
intra_bin_list$SIMILARITY$NUMBER_ACCESSION = as.numeric(intra_bin_list$SIMILARITY$NUMBER_ACCESSION)
intra_bin_list$SIMILARITY$MEAN_SIMILARITY = as.numeric(intra_bin_list$SIMILARITY$MEAN_SIMILARITY)
intra_bin_list$SIMILARITY$SD_SIMILARITY = as.numeric(intra_bin_list$SIMILARITY$SD_SIMILARITY)
intra_bin_list$THRESHOLD$PERCENT_DISCRIMINED_CASES = as.numeric(intra_bin_list$THRESHOLD$PERCENT_DISCRIMINED_CASES)
intra_bin_list$THRESHOLD$MEAN_SIMILARITY_DISCRIMINED_CASES = as.numeric(intra_bin_list$THRESHOLD$MEAN_SIMILARITY_DISCRIMINED_CASES)
intra_bin_list$THRESHOLD$SD_SIMILARITY_DISCRIMINED_CASES = as.numeric(intra_bin_list$THRESHOLD$SD_SIMILARITY_DISCRIMINED_CASES)
intra_bin_similarity = split(intra_bin_list$SIMILARITY, intra_bin_list$SIMILARITY$PRIMERS)
intra_bin_threshold = split(intra_bin_list$THRESHOLD, intra_bin_list$THRESHOLD$PRIMERS)
number_wrong_discrimined_cases = sapply(intra_bin_threshold, function(x) sum(x$NUMBER_DISCRIMINED_CASES))
percent_wrong_discrimined_cases = sapply(intra_bin_threshold, function(x)
sum(x$NUMBER_DISCRIMINED_CASES) / sum(x$TOTAL_NUMBER_CASES) * 100)
number_bin_with_wrong_discrimined_cases = sapply(intra_bin_similarity, function(x)
nrow(subset(x, as.numeric(NUMBER_ACCESSION) > 1 & as.numeric(MEAN_SIMILARITY) < similarity_threshold)))
percent_bin_with_wrong_discrimined_cases = sapply(intra_bin_similarity, function(x)
nrow(subset(x, as.numeric(NUMBER_ACCESSION) > 1 & as.numeric(MEAN_SIMILARITY) < similarity_threshold)) /
nrow(subset(x, NUMBER_ACCESSION > 1)) * 100)
mean_number_wrong_discrimined_cases_per_bin = sapply(intra_bin_threshold, function(x)
mean(x$NUMBER_DISCRIMINED_CASES, na.rm = T))
percent_number_wrong_discrimined_cases_per_bin = sapply(intra_bin_threshold, function(x)
mean(x$PERCENT_DISCRIMINED_CASES, na.rm = T))
mean_similarity_wrong_discrimined_cases_per_bin = sapply(intra_bin_threshold, function(x)
mean(x$MEAN_SIMILARITY_DISCRIMINED_CASES, na.rm = T))
sd_similarity_wrong_discrimined_cases_per_bin = sapply(intra_bin_threshold, function(x)
mean(x$SD_SIMILARITY_DISCRIMINED_CASES, na.rm = T))
overall_mean_similarity = sapply(intra_bin_similarity, function(x) mean(as.numeric(x$MEAN_SIMILARITY), na.rm = T))
overall_sd_similarity = sapply(intra_bin_similarity, function(x) sd(as.numeric(x$MEAN_SIMILARITY)))
intra_bin_resolution = tibble(PRIMERS = names(intra_bin_similarity),
NUMBER_WRONG_DISCRIMINED_CASES = number_wrong_discrimined_cases,
PERCENT_WRONG_DISCRIMINED_CASES = round(percent_wrong_discrimined_cases, 3),
NUMBER_BIN_WITH_WRONG_DISCRIMINED_CASES = number_bin_with_wrong_discrimined_cases,
PERCENT_BIN_WITH_WRONG_DISCRIMINED_CASES = round(percent_bin_with_wrong_discrimined_cases, 3),
MEAN_NUMBER_WRONG_DISCRIMINED_CASES_PER_BIN = round(mean_number_wrong_discrimined_cases_per_bin, 3),
MEAN_PERCENT_WRONG_DISCRIMINED_CASES_PER_BIN = round(percent_number_wrong_discrimined_cases_per_bin, 3),
MEAN_SIMILARITY_WRONG_DISCRIMINED_CASES_PER_BIN = round(mean_similarity_wrong_discrimined_cases_per_bin, 3),
SD_SIMILARITY_WRONG_DISCRIMINED_CASES_PER_BIN = round(sd_similarity_wrong_discrimined_cases_per_bin, 3),
OVERALL_MEAN_SIMILARITY = round(overall_mean_similarity, 3),
OVERALL_SD_SIMILARITY = round(overall_sd_similarity, 3))
# percent_dissimilar_bin = round(sapply(intra_bin_similarity,
# function(x) nrow(subset(x, as.numeric(NUMBER_ACCESSION) > 1 & as.numeric(MEAN_SIMILARITY) < similarity_threshold)) /
# nrow(subset(x, NUMBER_ACCESSION > 1)) * 100), 3)
#
# mean_similarity_similarity = round(sapply(intra_bin_similarity,
# function(x) mean(as.numeric(x$MEAN_SIMILARITY), na.rm = T)), 3)
#
# se_similarity_similarity = round(sapply(intra_bin_similarity, function(x) mean(as.numeric(x$SE_SIMILARITY), na.rm = T)), 3)
#
# intra_bin_similarity = tibble(data.frame(PRIMERS = names(mean_similarity_similarity),
# PERCENT_BIN_WITH_DISSIMILAR_SEQUENCES = percent_dissimilar_bin,
# MEAN_OVERALL_SIMILARITY = mean_similarity_similarity,
# SE_OVERALL_SIMILARITY = se_similarity_similarity))
#
# percent_dissimilar_cases = round(sapply(intra_bin_threshold,
# function(x) mean(x$PERCENT_DISCRIMINED_CASES, na.rm = T)), 3)
#
# mean_similarity_cases = round(sapply(intra_bin_threshold, function(x)
# mean(as.numeric(x$MEAN_SIMILARITY_DISCRIMINED_CASES), na.rm = T)), 3)
#
# se_similarity_cases = round(sapply(intra_bin_threshold, function(x)
# mean(as.numeric(x$SE_SIMILARITY_DISCRIMINED_CASES), na.rm = T)), 3)
#
# intra_bin_threshold = tibble(data.frame(PRIMERS = names(mean_similarity_cases),
# MEAN_PERCENT_DISSIMILAR_CASES_PER_BIN = percent_dissimilar_cases,
# MEAN_DISCRIMINED_SIMILARITY = mean_similarity_cases,
# SE_DISCRIMINED_SIMILARITY = se_similarity_cases))
#
# intra_bin_resolution = tibble(merge(intra_bin_threshold, intra_bin_similarity))
intra_bin_resolution = intra_bin_resolution[order(intra_bin_resolution$PRIMERS),]
intra_bin_resolution$NUMBER_WRONG_DISCRIMINED_CASES[is.nan(intra_bin_resolution$NUMBER_WRONG_DISCRIMINED_CASES)] = 0
intra_bin_resolution$PERCENT_WRONG_DISCRIMINED_CASES[is.nan(intra_bin_resolution$PERCENT_WRONG_DISCRIMINED_CASES)] = 0
intra_bin_resolution$NUMBER_BIN_WITH_WRONG_DISCRIMINED_CASES[is.nan(intra_bin_resolution$NUMBER_BIN_WITH_WRONG_DISCRIMINED_CASES)] = 0
intra_bin_resolution$PERCENT_BIN_WITH_WRONG_DISCRIMINED_CASES[is.nan(intra_bin_resolution$PERCENT_BIN_WITH_WRONG_DISCRIMINED_CASES)] = 0
intra_bin_resolution$MEAN_NUMBER_WRONG_DISCRIMINED_CASES_PER_BIN[is.nan(intra_bin_resolution$MEAN_NUMBER_WRONG_DISCRIMINED_CASES_PER_BIN)] = 0
intra_bin_resolution$MEAN_PERCENT_WRONG_DISCRIMINED_CASES_PER_BIN[is.nan(intra_bin_resolution$MEAN_PERCENT_WRONG_DISCRIMINED_CASES_PER_BIN)] = 0
intra_bin_resolution$MEAN_SIMILARITY_WRONG_DISCRIMINED_CASES_PER_BIN[is.nan(intra_bin_resolution$MEAN_SIMILARITY_WRONG_DISCRIMINED_CASES_PER_BIN)] = 0
intra_bin_resolution$SD_SIMILARITY_WRONG_DISCRIMINED_CASES_PER_BIN[is.nan(intra_bin_resolution$SD_SIMILARITY_WRONG_DISCRIMINED_CASES_PER_BIN)] = 0
intra_bin_resolution$OVERALL_MEAN_SIMILARITY[is.nan(intra_bin_resolution$OVERALL_MEAN_SIMILARITY)] = 0
intra_bin_resolution$OVERALL_SD_SIMILARITY[is.nan(intra_bin_resolution$OVERALL_SD_SIMILARITY)] = 0
## Inter-BIN resolution
# if(nrow(inter_bin_list) == 0){
#
# na_details_inter_bin = data.frame(t(matrix(rep(NA, 13))))
#
# colnames(na_details_inter_bin) = c("SIMILARITY", "CLIMATE", "ENVIRONMENT", "ORDER", "PRIMERS", "MEAN_NUMBER_UNDISCRIMINED_CASES_PER_BIN",
# "MEAN_PERCENT_UNDISCRIMINED_CASES_PER_BIN", "MEAN_PERCENT_DISSIMILAR_CASES_PER_BIN", "MEAN_DISCRIMINED_SIMILARITY",
# "SE_DISCRIMINED_SIMILARITY", "PERCENT_BIN_WITH_DISSIMILAR_SEQUENCES", "MEAN_OVERALL_SIMILARITY", "SE_OVERALL_SIMILARITY")
#
# na_summary_inter_bin = data.frame(PRIMERS = reference_data$PRIMERS,
# MEAN_NUMBER_UNDISCRIMINED_CASES_PER_BIN = 0,
# MEAN_PERCENT_UNDISCRIMINED_CASES_PER_BIN = 0)
#
# inter_bin_output = list(SUMMARY = na_summary_inter_bin, DETAILS = na_details_inter_bin)
#
# }
#
# else
#inter_bin_output = suppressWarnings(inter_bin_analysis(inter_bin_data_list = inter_bin_list, bin_data = taxo_bin_data,
# similarity_threshold = similarity_threshold))
inter_bin_output = inter_bin_analysis(inter_bin_data_list = inter_bin_list, bin_data = taxo_bin_data,
similarity_threshold = similarity_threshold)
inter_bin_resolution = inter_bin_output[[1]][order(inter_bin_output[[1]]$PRIMERS),]
## Combining all taxonomic resolution results together
if(nrow(inter_bin_resolution) != length(reference_primers)){
inter_bin_resolution = tibble(merge(reference_primers_df, inter_bin_resolution, all = T))
inter_bin_resolution[is.na(inter_bin_resolution)] = 0
}
if(nrow(intra_bin_resolution) != length(reference_primers))
intra_bin_resolution = tibble(merge(reference_primers_df, intra_bin_resolution, all = T))
taxonomic_resolution_analysis = tibble(cbind(inter_bin_resolution, intra_bin_resolution[,-1]))
end = Sys.time()
duration = difftime(end, start)
cat("\n")
cat(paste("Time taken:", round(duration[[1]], 2), units(duration), "\n"))
cat("\n")
cat("-------------------------------------------------\n")
cat("\n")
##### AMPLIFICATION ANALYSIS #####
cat("\n")
cat("--------------AMPLIFICATION ANALYSIS-------------")
cat("\n")
start = Sys.time()
## Universality (i.e. number of species amplified compared to the maximum in the database)
all_species_number = length(unique(specific_pcr_fish_reference$SPECIES))
number_species = data.frame(specific_pcr_fish) %>% group_by(PRIMERS, .drop = FALSE) %>%
dplyr::summarise(NUMBER_SPECIES = n_distinct(SPECIES),
RATIO_SPECIES_AMPLIFIED = n_distinct(SPECIES) / all_species_number)
## Equitability (i.e. ratio of species amplified per order)
species_per_order = specific_pcr_fish %>% group_by(PRIMERS, ORDER, .drop = FALSE) %>% dplyr::mutate(SPECIES_PER_ORDER = n_distinct(SPECIES))
max_species_per_order = specific_pcr_fish_reference %>% group_by(ORDER, .drop = FALSE) %>% dplyr::mutate(MAX_SPECIES_PER_ORDER = n_distinct(SPECIES))
max_species_per_order = tibble(data.frame(ORDER = max_species_per_order$ORDER,
MAX_SPECIES_PER_ORDER = max_species_per_order$MAX_SPECIES_PER_ORDER))
max_species_per_order = max_species_per_order[!duplicated(max_species_per_order),]
species_per_order_ratio = tibble(merge(species_per_order, max_species_per_order)) %>% group_by(PRIMERS, .drop = FALSE) %>%
dplyr::summarise(RATIO_SPECIES_AMPLIFIED_PER_ORDER = mean(SPECIES_PER_ORDER / MAX_SPECIES_PER_ORDER))
## Hybridation efficiency
mean_hybridation_eff = specific_pcr_fish %>% group_by(PRIMERS, .drop = FALSE) %>%
dplyr::summarise(MEAN_HYBRIDATION = mean(HYBRIDATION_EFFICIENCY), SD_HYBRIDATION = sd(HYBRIDATION_EFFICIENCY))
## Elongation efficiency
mean_elongation_eff = specific_pcr_fish %>% group_by(PRIMERS, .drop = FALSE) %>%
dplyr::summarise(MEAN_ELONGATION = mean(ELONGATION_EFFICIENCY), SD_ELONGATION = sd(ELONGATION_EFFICIENCY))
## Overall amplification efficiency
mean_amplification_eff = specific_pcr_fish %>% group_by(PRIMERS, .drop = FALSE) %>%
dplyr::summarise(MEAN_AMPLIFICATION = mean(AMPLIFICATION_EFFICIENCY), SD_AMPLIFICATION = sd(AMPLIFICATION_EFFICIENCY))
# Removing potential NA lines
number_species_na = apply(number_species[, "PRIMERS"], 1, is.na)
number_species = number_species[!number_species_na,]
species_per_order_ratio_na = apply(species_per_order_ratio[, "PRIMERS"], 1, is.na)
species_per_order_ratio = species_per_order_ratio[!species_per_order_ratio_na,]
mean_hybridation_eff_na = apply(mean_hybridation_eff[, "PRIMERS"], 1, is.na)
mean_hybridation_eff = mean_hybridation_eff[!mean_hybridation_eff_na,]
mean_elongation_eff_na = apply(mean_elongation_eff[, "PRIMERS"], 1, is.na)
mean_elongation_eff = mean_elongation_eff[!mean_elongation_eff_na,]
mean_amplification_eff_na = apply(mean_amplification_eff[, "PRIMERS"], 1, is.na)
mean_amplification_eff = mean_amplification_eff[!mean_amplification_eff_na,]
## Combining all amplification analysis results together
if(nrow(number_species) != length(reference_primers)) number_species = tibble(merge(reference_primers_df, number_species, all = T))
if(nrow(species_per_order_ratio) != length(reference_primers)) species_per_order_ratio = tibble(merge(reference_primers_df, species_per_order_ratio, all = T))
if(nrow(mean_hybridation_eff) != length(reference_primers)) mean_hybridation_eff = tibble(merge(reference_primers_df, mean_hybridation_eff, all = T))
if(nrow(mean_elongation_eff) != length(reference_primers)) mean_elongation_eff = tibble(merge(reference_primers_df, mean_elongation_eff, all = T))
if(nrow(mean_amplification_eff) != length(reference_primers)) mean_amplification_eff = tibble(merge(reference_primers_df, mean_amplification_eff, all = T))
amplification_analysis = tibble(cbind(number_species, tibble(species_per_order_ratio[,-1]),
mean_hybridation_eff[,-1], mean_elongation_eff[,-1], mean_amplification_eff[,-1]))
amplification_analysis[is.na(amplification_analysis)] = 0
end = Sys.time()
duration = difftime(end, start)
cat("\n")
cat(paste("Time taken:", round(duration[[1]], 2), units(duration), "\n"))
cat("\n")
cat("-------------------------------------------------\n")
cat("\n")
##### SPECIFICITY ANALYSIS #####
cat("\n")
cat("----------------SPECIFITY ANALYSIS---------------")
cat("\n")
start = Sys.time()
## Calculating the ratio of sequences amplified belonging to Actinopterygii
ratio_fish_seq = unspecific_pcr_all %>% group_by(PRIMERS, .drop = FALSE) %>% dplyr::mutate(NUMBER_SEQUENCES = n()) %>%
filter(CLASS == "Actinopterygii") %>% dplyr::summarise(RATIO_SEQUENCES_FISH = n() / unique(NUMBER_SEQUENCES))
## Calculating the ratio of amplification of Actinopterygii sequences compared to the other taxa
ratio_amplification_eff_fish = unspecific_pcr_all %>% group_by(PRIMERS, .drop = FALSE) %>% dplyr::mutate(MEAN_AMPLIFICATION = mean(AMPLIFICATION_EFFICIENCY)) %>%
filter(CLASS == "Actinopterygii") %>% dplyr::summarise(RATIO_AMPLIFICATION_FISH = unique(MEAN_AMPLIFICATION) / mean(AMPLIFICATION_EFFICIENCY))
mean_amplification_other = unspecific_pcr_all %>% group_by(PRIMERS, .drop = FALSE) %>% filter(CLASS != "Actinopterygii") %>%
dplyr::summarise(MEAN_AMPLIFICATION_OTHER = mean(AMPLIFICATION_EFFICIENCY))
mean_amplification_fish = unspecific_pcr_all %>% group_by(PRIMERS, .drop = FALSE) %>% filter(CLASS == "Actinopterygii") %>%
dplyr::summarise(MEAN_AMPLIFICATION_FISH = mean(AMPLIFICATION_EFFICIENCY))
ratio_amplification_fish_other = tibble(merge(mean_amplification_fish, mean_amplification_other)) %>%
dplyr::mutate(RATIO_AMPLIFICATION_FISH_OTHER = MEAN_AMPLIFICATION_FISH / MEAN_AMPLIFICATION_OTHER)
## Combining all specificity analysis results together
if(nrow(ratio_fish_seq) != length(reference_primers)) ratio_fish_seq = tibble(merge(reference_primers_df, ratio_fish_seq, all = T))
if(nrow(ratio_amplification_fish_other) != length(reference_primers))
ratio_amplification_fish_other = tibble(merge(reference_primers_df, ratio_amplification_fish_other, all = T))
specificity_analysis = tibble(merge(ratio_fish_seq, ratio_amplification_fish_other[,c(1,4)]))
specificity_analysis[is.na(specificity_analysis)] = 0
end = Sys.time()
duration = difftime(end, start)
cat("\n")
cat(paste("Time taken:", round(duration[[1]], 2), units(duration), "\n"))
cat("\n")
cat("-------------------------------------------------\n")
cat("\n")
##### COMBINING ALL ANALYSIS RESULTS #####
cat("\n")
cat("----------------COMBINING RESULTS----------------")
cat("\n")
start = Sys.time()
## Ordering all tables
reference_data = reference_data %>% arrange(PRIMERS)
stability_analysis = stability_analysis %>% arrange(PRIMERS)
amplification_analysis = amplification_analysis %>% arrange(PRIMERS)
taxonomic_resolution_analysis = taxonomic_resolution_analysis %>% arrange(PRIMERS)
specificity_analysis = specificity_analysis %>% arrange(PRIMERS)
check_names = cbind(reference_data[,1], stability_analysis[,1], amplification_analysis[,1],
taxonomic_resolution_analysis[,1], specificity_analysis[,1])
check_names = which(apply(check_names, 2, function(x) length(unique(x))) == 1)
if(length(check_names) != 0) stop("Error during the binding of all results: analysis results tables are in different orders.")
## Binding all tables together
primers_evaluation = tibble(cbind(subset(reference_data, select = c("PRIMERS", "GENE")), stability_analysis[,-1],
amplification_analysis[,-1], taxonomic_resolution_analysis[,-1], specificity_analysis[,-1]))
end = Sys.time()
duration = difftime(end, start)
cat("\n")
cat(paste("Time taken:", round(duration[[1]], 2), units(duration), "\n"))
cat("\n")
cat("-------------------------------------------------\n")
cat("\n")
##### PRIMER EVALUATION #####
cat("\n")
cat("-----------------COMPUTING SCORES----------------")
cat("\n")
start = Sys.time()
## Normalisation of each score
primers_evaluation_normalized =
tibble(cbind(primers_evaluation[,1:2], MEAN_LENGTH = normalize_scores(as.numeric(primers_evaluation$MEAN_LENGTH), order = "dsc"),
RATIO_DIAGNOSTIC_NUCLEOTIDES = normalize_scores(as.numeric(primers_evaluation$RATIO_DIAGNOSTIC_NUCLEOTIDES), order = "asc"),
INTRA_BIN = normalize_scores(as.numeric(primers_evaluation$NUMBER_WRONG_DISCRIMINED_CASES), order = "dsc"),
INTER_BIN = normalize_scores(as.numeric(primers_evaluation$NUMBER_WRONG_UNDISCRIMINED_CASES), order = "dsc"),
RATIO_SPECIES_AMPLIFIED = normalize_scores(as.numeric(primers_evaluation$RATIO_SPECIES_AMPLIFIED), order = "asc"),
RATIO_SPECIES_AMPLIFIED_PER_ORDER = normalize_scores(as.numeric(primers_evaluation$RATIO_SPECIES_AMPLIFIED_PER_ORDER), order = "asc"),
MEAN_HYBRIDATION = normalize_scores(as.numeric(primers_evaluation$MEAN_HYBRIDATION), order = "asc"),
MEAN_ELONGATION = normalize_scores(as.numeric(primers_evaluation$MEAN_ELONGATION), order = "asc"),
RATIO_SEQUENCES_FISH = normalize_scores(as.numeric(primers_evaluation$RATIO_SEQUENCES_FISH), order = "asc"),
RATIO_AMPLIFICATION_FISH_OTHER = normalize_scores(as.numeric(primers_evaluation$RATIO_AMPLIFICATION_FISH_OTHER), order = "asc")))
## Computing number of sequences amplified by each primer
number_sequences = specific_pcr_fish %>% group_by(PRIMERS, .drop = FALSE) %>% dplyr::summarise(NUMBER_SEQUENCES = n())
## Commputing mean score on all criteria
primers_mean_score = tibble(cbind(primers_evaluation_normalized[,1:2],
MEAN_SCORE = apply(primers_evaluation_normalized[,-c(1:2)], 1, mean))) %>% arrange(-MEAN_SCORE)
## Pivoting table to wide format
primers_evaluation_wide = data.frame(t(primers_evaluation_normalized))
colnames(primers_evaluation_wide) = primers_evaluation_wide[2,]
primers_evaluation_wide = tibble(data.frame(SCORES = row.names(primers_evaluation_wide[-c(1:2),]),
apply(primers_evaluation_wide[-c(1:2),], 2, as.numeric)))
colnames(primers_evaluation_wide) = c("SCORES", primers_evaluation$PRIMERS)
primers_evaluation_wide = primers_evaluation_wide[c("SCORES", primers_mean_score$PRIMERS)]
## Pivoting table to long format
primers_evaluation_long = primers_evaluation_normalized[,-2] %>% pivot_longer(!PRIMERS, names_to = "CRITERION", values_to = "SCORES")
## Assembling results in a list
if(length(inter_bin_output) == 2) output_list = list(MEAN_SCORES = primers_mean_score,
SCORES_WIDE = primers_evaluation_wide,
SCORES_LONG = primers_evaluation_long,
STABILITY_ANALYSIS = stability_analysis,
INTRA_BIN_ANALYSIS = intra_bin_resolution,
INTER_BIN_ANALYSIS = inter_bin_resolution,
INTER_BIN_DETAILS = inter_bin_output[[2]],
AMPLIFICATION_ANALYSIS = amplification_analysis,
SPECIFICITY_ANALYSIS = specificity_analysis)
else output_list = list(MEAN_SCORES = primers_mean_score,
SCORES_WIDE = primers_evaluation_wide,
SCORES_LONG = primers_evaluation_long,
STABILITY_ANALYSIS = stability_analysis,
INTRA_BIN_ANALYSIS = intra_bin_resolution,
INTER_BIN_ANALYSIS = inter_bin_resolution,
AMPLIFICATION_ANALYSIS = amplification_analysis,
SPECIFICITY_ANALYSIS = specificity_analysis)
end = Sys.time()
duration = difftime(end, start)
cat("\n")
cat(paste("Time taken:", round(duration[[1]], 2), units(duration), "\n"))
cat("\n")
cat("-------------------------------------------------\n")
cat("\n")
##### PLOTTING RESULTS #####
if(plot){
cat("\n")
cat("-----------------PLOTTING RESULTS----------------")
cat("\n")
start = Sys.time()
if(is.null(plot_name)) plot_name = paste0(main_title, "_", similarity_threshold, "_evaluation.png")
number_sequences = specific_pcr_fish %>% group_by(PRIMERS, .drop = FALSE) %>% dplyr::summarise(NUMBER_SEQUENCES = n())
if(nrow(number_sequences) != length(reference_primers)) number_sequences = tibble(merge(reference_primers_df, number_sequences, all = T))
number_sequences[is.na(number_sequences)] = 0
evaluation_plot(evaluation_data = output_list, number_sequences = number_sequences,
max_number_sequences = length(unique(specific_pcr_fish_reference$ACCESSION)),
plot_name = plot_name, order_genes_graph = order_genes_graph, colours_genes_graph = colours_genes_graph,
gene_name_space_major = gene_name_space_major, gene_name_space_minor = gene_name_space_minor,
main_title = main_title, font_size_title = font_size_title, padding_rect_title = padding_rect_title)
end = Sys.time()
duration = difftime(end, start)
cat("\n")
cat("The plot was successfully saved in the working directory!")
cat("\n")
cat(paste("Time taken:", round(duration[[1]], 2), units(duration), "\n"))
cat("\n")
cat("-------------------------------------------------\n")
cat("\n")
}
##### FINAL RESULTS #####
return(output_list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.