#' @name run_report
#' @title Run the evaluation of a prediction set.
#' @description Format the input prediction set, filters it, and generates an evaluation report.
#' @author Claire Rioualen
#' @param testfile The path to a prediction file.
#' @param neg_set_id Optional, the ID of the negative set to be used from c("all_negative", "test_neg", "non_reachable", "constitutive_1", "constitutive_2")
#' @param pos_set_id Optional, the ID of the positive set to be used from c("all_positive", "test_pos", "strong_positive", "weak_positive")
#'
#' @import rmarkdown
#' @export
run_report <- function(predictions_dir, results_dir = "results", category = "test", tfs="", neg_set_id = "all_negative", pos_set_id = "all_positive") {
## Run evaluation of all sets if not already done
pred_files <- list.files(predictions_dir, full.names = T)
set_ids <- c()
set_evals <- list()
for(f in pred_files){
set <- basename(tools::file_path_sans_ext(f))
# print(set)
set_ids <- c(set_ids, set)
# if(!dir.exists(paste0(results_dir, "/", set))) {
set_evals[[set]] <- NetworkEval::run_eval(f, neg_set_id = neg_set_id, pos_set_id = pos_set_id)
# }
}
## Summary
#### todo add control stats
summary_dir <- paste0(results_dir, "/summary_report")
if(!dir.exists(summary_dir)) {
dir.create(summary_dir)
}
summary_df <- summary_table(set_ids, results_dir)
## Histogram
summary_histogram(summary_df, summary_dir)
## Upset
summary_upset(set_evals, summary_dir)
## Report
sumfile <- system.file("rmarkdown", "Summary.Rmd", package = "NetworkEval")
file.copy(sumfile, ".", overwrite = T)
rmarkdown::render("Summary.Rmd",
params = list(predictions_dir = predictions_dir, results_dir = results_dir),
output_file = paste0(predictions_dir, ".html"))
}
#' @name summary_table
#' @title Makes a summary table of all test sets statistics (+ controls to add)
#' @description
#' @author Claire Rioualen
#' @param set_ids The names of the test sets (filename without extension, as parsed form the input prediction directory).
#' @param results_dir Name of the summary directory.
#'
#' @export
summary_table <- function(set_ids, results_dir) {
Set_ID <- c()
RIs_input <- c()
TFs_input <- c()
RIs_TFfiltered <- c()
TFs_filtered <- c()
RIs_RIfiltered <- c()
# AUC <- c()
Sensitivity <- c()
Specificity <- c()
Precision <- c()
TP <- c()
FP <- c()
# set_ids <- list.files("results")
for(set in set_ids) {
confusion <- read.delim(file = paste0(results_dir, "/", set, "/", set, "_confusion_matrix.tsv"), header=T, stringsAsFactors=FALSE)
in_ris <- read.delim(file = paste0(results_dir, "/", set, "/", set, "_in_stats.tsv"), header=T, stringsAsFactors=FALSE)
out_ris <- read.delim(file = paste0(results_dir, "/", set, "/", set, "_out_stats.tsv"), header=T, stringsAsFactors=FALSE)
stats <- read.delim(file = paste0(results_dir, "/", set, "/", set, "_statistics.tsv"), header=T, stringsAsFactors=FALSE)
Set_ID <- c(Set_ID, set)
RIs_input <- c(RIs_input, in_ris$RIs)
TFs_input <- c(TFs_input, in_ris$TFs)
RIs_TFfiltered <- c(RIs_TFfiltered, out_ris$RIs)
TFs_filtered <- c(TFs_filtered, out_ris$TFs)
RIs_RIfiltered <- c(RIs_RIfiltered, confusion$total_population[1])
# AUC <- c()
Sensitivity <- c(Sensitivity, stats$sensitivity)
Specificity <- c(Specificity, stats$specificity)
Precision <- c(Precision, stats$precision)
TP <- c(TP, confusion$control_positive[1])
FP <- c(FP, confusion$control_negative[1])
}
summary_df <- data.frame(Set_ID , RIs_input, TFs_input, RIs_TFfiltered, TFs_filtered, RIs_RIfiltered, Sensitivity, Specificity, Precision, TP, FP, stringsAsFactors=F)
write.table(summary_df, file = paste0(results_dir, "/summary_report/summary_table.tsv"), col.names = T, row.names = F, quote = F, sep = "\t")
summary_df
}
#' @name summary_histogram
#' @title Makes a histogram of all test sets ss, sp and pr.
#' @description
#' @author Claire Rioualen
#' @param summary_df The dataframe of statistics generated by `summary_table()`.
#' @param results_dir Optional, name of the results directory.
#'
#' @import ggplot2
#' @export
summary_histogram <- function(summary_df, summary_dir) {
long <- tidyr::gather(summary_df, "variable", "value", c("Sensitivity", "Specificity", "Precision"))
#long <- long %>% dplyr::mutate(group=ifelse(grepl("expression", Set_ID), "expression", ifelse(grepl("physical", Set_ID), "physical", "control")))
long$variable <- factor(long$variable, levels = c("Sensitivity", "Specificity", "Precision"))
ggplot(long, aes(x = Set_ID, y = value)) +
geom_bar(stat = "identity", fill = "royalblue") +
#geom_col(aes(fill = group)) +
scale_fill_manual(guide=guide_legend(reverse=T)) +
theme(axis.text.y = element_text(size=10), axis.title.x = element_blank(), legend.title = element_blank(), strip.text.x = element_text(size = 10), strip.text.y = element_text(size = 10), axis.title.y = element_blank(), legend.position="none", legend.text = element_text(size = 11), legend.direction="vertical", axis.text.x = element_text(angle = 90, hjust=1, size=10)) +
facet_grid(variable ~ .)
ggsave(paste0(summary_dir, "/histogram.png"), device="png", width = 6, height = 4)
}
#' @name summary_upset
#' @title Makes an upset plot of all test sets + controls.
#' @description
#' @author Claire Rioualen
#' @param set_evals A `list` of `evalset` objects generated by the `run_eval()` function.
#' @param summary_dir Optional, name of the results directory.
#'
#' @import dplyr
#' @import UpSetR
#' @export
summary_upset <- function(set_evals, summary_dir) {
data <- list()
for (evalset in set_evals){
pred = evalset@pred_set@ris
pred_id = evalset@pred_set@id
pred <- pred %>% dplyr::mutate(pair = paste0(tf_bnum, "_", gene_bnum)) %>%
dplyr::arrange(pair) %>%
dplyr::distinct(pair, .keep_all = TRUE)
data[[pred_id]] <- pred$pair
}
neg_id <- set_evals[[1]]@neg_set@id
neg <- set_evals[[1]]@neg_set@ris
neg <- neg %>% dplyr::mutate(pair = paste0(tf_bnum, "_", gene_bnum)) %>%
dplyr::arrange(pair) %>%
dplyr::distinct(pair, .keep_all = TRUE)
data[[neg_id]] <- neg$pair
pos_id <- set_evals[[1]]@pos_set@id
pos <- set_evals[[1]]@pos_set@ris
pos <- pos %>% dplyr::mutate(pair = paste0(tf_bnum, "_", gene_bnum)) %>%
dplyr::arrange(pair) %>%
dplyr::distinct(pair, .keep_all = TRUE)
data[[pos_id]] <- pos$pair
png(paste0(summary_dir, "/upset.png"), width= 8, height= 4, units= "in", res= 200)
plot <- upset(fromList(data), nsets = length(data), number.angles = 30, point.size = 3, line.size = 2,
mainbar.y.label = "Sets Intersections", sets.x.label = "Set IDs",
text.scale = c(1.5, 1.5, 1.5, 1, 1.5, 1.2), order.by='freq')
print(plot)
dev.off()
# ggsave(paste0(summary_dir, "/upset.png"), device="png", width = 20, height = 20, units = "cm")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.