R/run_report.R

Defines functions summary_histogram summary_table run_report

#' @name run_report
#' @title Run the evaluation of a prediction set.
#' @description Format the input prediction set, filters it, and generates an evaluation report.
#' @author Claire Rioualen
#' @param testfile The path to a prediction file.
#' @param neg_set_id Optional, the ID of the negative set to be used from c("all_negative", "test_neg", "non_reachable", "constitutive_1", "constitutive_2")
#' @param pos_set_id Optional, the ID of the positive set to be used from c("all_positive", "test_pos", "strong_positive", "weak_positive")
#'
#' @import rmarkdown
#' @export
run_report <- function(predictions_dir, results_dir = "results", category = "test", tfs="", neg_set_id = "all_negative", pos_set_id = "all_positive") {

	## Run evaluation of all sets if not already done
	pred_files <- list.files(predictions_dir, full.names = T)
	set_ids <- c()
	set_evals <- list()
	for(f in pred_files){
		set <- basename(tools::file_path_sans_ext(f))
		# print(set)
		set_ids <- c(set_ids, set)
		# if(!dir.exists(paste0(results_dir, "/", set))) {
		set_evals[[set]] <- NetworkEval::run_eval(f, neg_set_id = neg_set_id, pos_set_id = pos_set_id)
		# }
	}

	## Summary

	#### todo add control stats

	summary_dir <- paste0(results_dir, "/summary_report")
	if(!dir.exists(summary_dir)) {
		dir.create(summary_dir)
	}

	summary_df <- summary_table(set_ids, results_dir)

	## Histogram

	summary_histogram(summary_df, summary_dir)

	## Upset

	summary_upset(set_evals, summary_dir)

	## Report
	sumfile <- system.file("rmarkdown", "Summary.Rmd", package = "NetworkEval")
	file.copy(sumfile, ".", overwrite = T)

	rmarkdown::render("Summary.Rmd",
										params = list(predictions_dir = predictions_dir, results_dir = results_dir),
										output_file = paste0(predictions_dir, ".html"))
}

#' @name summary_table
#' @title Makes a summary table of all test sets statistics (+ controls to add)
#' @description
#' @author Claire Rioualen
#' @param set_ids The names of the test sets (filename without extension, as parsed form the input prediction directory).
#' @param results_dir Name of the summary directory.
#'
#' @export
summary_table <- function(set_ids, results_dir) {
	Set_ID <- c()
	RIs_input <- c()
	TFs_input <- c()
	RIs_TFfiltered <- c()
	TFs_filtered <- c()
	RIs_RIfiltered <- c()
	# AUC <- c()
	Sensitivity <- c()
	Specificity <- c()
	Precision <- c()
	TP <- c()
	FP <- c()

	# set_ids <- list.files("results")
	for(set in set_ids) {
		confusion <- read.delim(file = paste0(results_dir, "/", set, "/", set, "_confusion_matrix.tsv"), header=T, stringsAsFactors=FALSE)
		in_ris <- read.delim(file = paste0(results_dir, "/", set, "/", set, "_in_stats.tsv"), header=T, stringsAsFactors=FALSE)
		out_ris <- read.delim(file = paste0(results_dir, "/", set, "/", set, "_out_stats.tsv"), header=T, stringsAsFactors=FALSE)
		stats <- read.delim(file = paste0(results_dir, "/", set, "/", set, "_statistics.tsv"), header=T, stringsAsFactors=FALSE)

		Set_ID <- c(Set_ID, set)
		RIs_input <- c(RIs_input, in_ris$RIs)
		TFs_input <- c(TFs_input, in_ris$TFs)
		RIs_TFfiltered <- c(RIs_TFfiltered, out_ris$RIs)
		TFs_filtered <- c(TFs_filtered, out_ris$TFs)
		RIs_RIfiltered <- c(RIs_RIfiltered, confusion$total_population[1])
		# AUC <- c()
		Sensitivity <- c(Sensitivity, stats$sensitivity)
		Specificity <- c(Specificity, stats$specificity)
		Precision <- c(Precision, stats$precision)
		TP <- c(TP, confusion$control_positive[1])
		FP <- c(FP, confusion$control_negative[1])
	}

	summary_df <- data.frame(Set_ID , RIs_input, TFs_input, RIs_TFfiltered, TFs_filtered, RIs_RIfiltered, Sensitivity, Specificity, Precision, TP, FP, stringsAsFactors=F)

	write.table(summary_df, file = paste0(results_dir, "/summary_report/summary_table.tsv"), col.names = T, row.names = F, quote = F, sep = "\t")
	summary_df
}


#' @name summary_histogram
#' @title Makes a histogram of all test sets ss, sp and pr.
#' @description
#' @author Claire Rioualen
#' @param summary_df The dataframe of statistics generated by `summary_table()`.
#' @param results_dir Optional, name of the results directory.
#'
#' @import ggplot2
#' @export
summary_histogram <- function(summary_df, summary_dir) {
long <- tidyr::gather(summary_df, "variable", "value", c("Sensitivity", "Specificity", "Precision"))
#long <- long %>% dplyr::mutate(group=ifelse(grepl("expression", Set_ID), "expression", ifelse(grepl("physical", Set_ID), "physical", "control")))
long$variable <- factor(long$variable, levels = c("Sensitivity", "Specificity", "Precision"))

ggplot(long, aes(x = Set_ID, y = value)) +
	geom_bar(stat = "identity", fill = "royalblue") +
	#geom_col(aes(fill = group)) +
	scale_fill_manual(guide=guide_legend(reverse=T)) +
	theme(axis.text.y = element_text(size=10), axis.title.x = element_blank(), legend.title = element_blank(), strip.text.x = element_text(size = 10), strip.text.y = element_text(size = 10), axis.title.y = element_blank(), legend.position="none", legend.text = element_text(size = 11), legend.direction="vertical", axis.text.x = element_text(angle = 90, hjust=1, size=10)) +
	facet_grid(variable ~ .)
ggsave(paste0(summary_dir, "/histogram.png"), device="png", width = 6, height = 4)
}

#' @name summary_upset
#' @title Makes an upset plot of all test sets + controls.
#' @description
#' @author Claire Rioualen
#' @param set_evals A `list` of `evalset` objects generated by the `run_eval()` function.
#' @param summary_dir Optional, name of the results directory.
#'
#' @import dplyr
#' @import UpSetR
#' @export
summary_upset <- function(set_evals, summary_dir) {

	data <- list()
	for (evalset in set_evals){
		pred = evalset@pred_set@ris
		pred_id = evalset@pred_set@id
		pred <- pred %>% dplyr::mutate(pair = paste0(tf_bnum, "_",  gene_bnum)) %>%
			dplyr::arrange(pair) %>%
			dplyr::distinct(pair, .keep_all = TRUE)
			data[[pred_id]] <- pred$pair
	}

	neg_id <- set_evals[[1]]@neg_set@id
	neg <- set_evals[[1]]@neg_set@ris
	neg <- neg %>% dplyr::mutate(pair = paste0(tf_bnum, "_",  gene_bnum)) %>%
		dplyr::arrange(pair) %>%
		dplyr::distinct(pair, .keep_all = TRUE)
	data[[neg_id]] <- neg$pair

	pos_id <- set_evals[[1]]@pos_set@id
	pos <- set_evals[[1]]@pos_set@ris
	pos <- pos %>% dplyr::mutate(pair = paste0(tf_bnum, "_",  gene_bnum)) %>%
		dplyr::arrange(pair) %>%
		dplyr::distinct(pair, .keep_all = TRUE)
	data[[pos_id]] <- pos$pair


	png(paste0(summary_dir, "/upset.png"), width= 8, height= 4, units= "in", res= 200)
	plot <- upset(fromList(data), nsets = length(data), number.angles = 30, point.size = 3, line.size = 2,
		mainbar.y.label = "Sets Intersections", sets.x.label = "Set IDs",
		text.scale = c(1.5, 1.5, 1.5, 1, 1.5, 1.2), order.by='freq')
	print(plot)
	dev.off()
	# ggsave(paste0(summary_dir, "/upset.png"), device="png", width = 20, height = 20, units = "cm")

}
rioualen/NetworkEval documentation built on April 10, 2021, 5:22 a.m.