#' @title Check the results files
#' @description This function can compare the results obtained by a deterministic model, in this case
#' the files are compared using the diff unix command, obtained by two different stochastic model
#' calculating the confidence intervals and do a simple form of functional data analysis using
#' the fdatest package. Produce a file log with the output of the used functions.
#' @param fname_st Path to the first file to compare
#' @param fname_nd Path to the second file to compare
#' @param fun A string that defines which control algorithm apply (det_check, sto_check, fda_test)
#' @param furl_st Url of the first file to compare
#' @param threshold A threshold used to compare the p-values generated in functional data analysis
#'
#' @export
#'
#' @author Daniele Baccega, Luca Rosso
library(fdatest)
log_it <- function(msg, fun){
write(msg,
file = file.path(paste0("./results_check/", fun, .Platform$file.sep, "results.log")),
append = TRUE)
}
compare_file <- function(furl_st, fname_st, fname_nd, fun){
if(is.null(furl_st) && is.null(fname_st)){
log_it("ERROR: Missing path of first file to compare! Abort", fun)
stop("Missing path of first file to compare! Abort")
}
if(is.null(fname_nd)){
log_it(paste("WARNING: missing path of second file to compare,
using results_reference/deterministic_model/model_analysis-1.trace
as default"), fun)
warning("WARNING: missing path of second file to compare,
using results_reference/deterministic_model/model_analysis-1.trace
as default")
path = "results_reference/deterministic_model/model_analysis-1.trace"
fname_nd = paste0(getwd(), .Platform$file.sep, path)
}
if(!is.null(furl_st)){
fst_ext = unlist(strsplit(basename(furl_st), "\\."))[2]
dest_st = paste0(getwd(), .Platform$file.sep, "file1.", fst_ext)
download.file(furl_st, dest_st)
}else{
if(!file.exists(fname_st)){
log_it(paste("ERROR:", fname_st, "file not exists! Abort"), fun)
stop(paste("ERROR:", fname_st, "file not exists! Abort"))
}
fst_ext = unlist(strsplit(basename(fname_st), "\\."))[2]
dest_st = paste0(getwd(), .Platform$file.sep, "file1.", fst_ext)
file.copy(from = fname_st, to = dest_st)
}
if(!file.exists(fname_nd)){
unlink(dest_st)
log_it(paste("ERROR:", fname_nd, "file not exists! Abort"), fun)
stop(paste(fname_nd, "file not exists! Abort"))
}
fnd_ext = unlist(strsplit(basename(fname_nd), "\\."))[2]
dest_nd = paste0(getwd(), .Platform$file.sep, "file2.", fnd_ext)
file.copy(from = fname_nd, to = dest_nd)
if(.Platform$OS.type == "unix")
{
if(system(paste("diff", basename(dest_st), basename(dest_nd)), ignore.stdout = FALSE) == 1){
log_it("The two files are not equal", fun)
suppressWarnings(out <- system(paste("diff", basename(dest_st), basename(dest_nd)), intern = TRUE))
log_it(out, fun)
}
else{
log_it("The two files are equal", fun)
}
}
unlink(dest_st)
unlink(dest_nd)
}
det_results_check <- function(fname_st, fname_nd, furl_st){
if(dir.exists("./results_check/det_check"))
unlink("./results_check/det_check", recursive = TRUE)
dir.create("./results_check/det_check")
log_it("STARTING RESULTS CHECK FOR DETERMINISTIC MODEL...", "det_check")
compare_file(furl_st, fname_st, fname_nd, "det_check");
log_it("END RESULTS CHECK FOR DETERMINISTIC MODEL...", "det_check")
}
#Here we compute the mean of the different columns for each time step on the first file and
#the confidence intervals on the second file, then we check if the calculated means respect the calculated intervals.
sto_results_check <- function(fname_st = NULL, fname_nd = NULL, furl_st = NULL, sep = " "){
fun = "sto_check"
if(dir.exists("./results_check/sto_check"))
unlink("./results_check/sto_check", recursive = TRUE)
dir.create("./results_check/sto_check")
log_it("STARTING RESULTS CHECK FOR STOCHASTIC MODEL...", fun)
if(is.null(furl_st) && is.null(fname_st)){
log_it("ERROR: Missing path of first file to compare! Abort", fun)
stop("Missing path of first file to compare! Abort")
}
if(is.null(fname_nd))
{
log_it(paste("WARNING: missing path of second file to compare,
using results_reference/stochastic_model/model_analysis-1.trace
as default"), fun)
warning("WARNING: missing path of second file to compare,
using results_reference/stochastic_model/model_analysis-1.trace
as default")
path = "results_reference/stochastic_model/model_analysis-1.trace"
fname_nd = file.path(getwd(), path, fsep = .Platform$file.sep)
}
if(!is.null(furl_st))
{
fst_ext = unlist(strsplit(basename(furl_st), "\\."))[2]
dest_st = file.path(getwd(), paste0("file1.", fst_ext), fsep = .Platform$file.sep)
#dest_st = paste0(getwd(),.Platform$file.sep,"file1.",fst_ext)
download.file(furl_st, dest_st)
trace1 = read.csv(file = dest_st, header = TRUE, sep = sep, quote = "\"", dec = ".", row.names = NULL)
}else{
if(!file.exists(fname_st))
{
log_it(paste("ERROR:", fname_st, "file not exists! Abort"), fun)
stop(paste(fname_st, "file not exists! Abort"))
}else{
trace1 = read.csv(file = fname_st, header = TRUE, sep = sep, quote = "\"", dec = ".", row.names = NULL)
}
}
if(!file.exists(fname_nd)){
if(!is.null(furl_st))
unlink(dest_st)
log_it(paste("ERROR:", fname_nd, "file not exists! Abort"), fun)
stop(paste(fname_nd, "file not exists! Abort"))
}else{
trace2 = read.csv(fname_nd, header = TRUE, sep = sep, quote = "\"", dec = ".", row.names = NULL)
}
if(nrow(trace1) != nrow(trace2)){
log_it("ERROR: The two files to compare must have the same number of elements!", fun)
stop("The two files to compare must have the same number of elements!")
}
alpha <- 0.01
n_ex <- length(trace1$Time) / length(unique(trace1$Time))
#Create frame1 with the mean of all variables in the referred file
trace1.nms <- names(trace1)
trace1.ci <- data.frame()
#For all unique values of Time column create an entry with intervals
for(j in unique(trace1$Time))
{
iteration1.ci <- data.frame()
# Get only the rows relative to the current Time
trace1.iter <- trace1[trace1$Time == j,]
#For all columns of observation's j-time compute the mean
for(n in trace1.nms)
{
if(n != "Time")
{
#All values of interested column
values <- trace1.iter[[n]]
#Correspond to one row
var1.ci <- data.frame(mean = mean(values))
#The columns' names of the row just calculated take the values of S_mean, I_mean and R_mean
names(var1.ci) <- paste0(n, "_", names(var1.ci))
#If the "building row" doesn't still have the time column, it's added
if(ncol(iteration1.ci) != 0)
{
iteration1.ci <- cbind(iteration1.ci, var1.ci)
}
else
{
iteration1.ci <- data.frame(Time = j)
iteration1.ci <- cbind(iteration1.ci, var1.ci)
}
}
}
#entry related to a specific time (for example all experiments with Time=0) with all the variables and intervals
trace1.ci <- rbind(trace1.ci, iteration1.ci)
}
#Create frame2 with the confidence interval of all variables in the referred file
trace2.nms <- names(trace2)
trace2.ci <- data.frame()
#For all unique values of Time column create an entry with intervals
for(j in unique(trace2$Time))
{
iteration2.ci <- data.frame()
# Get only the rows relative to the current Time
trace2.iter <- trace2[trace2$Time == j,]
#For all columns of observation's j-time construct its interval
for(n in trace2.nms)
{
if(n != "Time")
{
#All values of interested column
values <- trace2.iter[[n]]
#Correspond to one row
var2.ci <- data.frame(lower = mean(values) - qt(1 - alpha / 2, n_ex - 1) * sd(values) / sqrt(n_ex),
upper = mean(values) + qt(1 - alpha / 2, n_ex - 1) * sd(values) / sqrt(n_ex))
#The columns' names of the row just calculated take the values of cname_mean,cname_lower and cname_uppper
names(var2.ci) <- paste0(n, "_", names(var2.ci))
#If the "building row" doesn't still have the time column, it's added
if(ncol(iteration2.ci) != 0)
{
iteration2.ci <- cbind(iteration2.ci, var2.ci)
}
else
{
iteration2.ci <- data.frame(Time = j)
iteration2.ci <- cbind(iteration2.ci, var2.ci)
}
}
}
#Entry related to a specific time (for example all experiments with Time=0) with all the variables and intervals
trace2.ci <- rbind(trace2.ci, iteration2.ci)
}
printed_error = FALSE
#Control if all means calculated in the first dataframe respect their confidence interval
for(row in c(1:nrow(trace1.ci)))
{
referred_row = trace1.ci[trace1.ci$Time == row-1,]
#index of columns in the second frame
lb_index = 2
ub_index = 3
#for all variables' mean
for(column in c(2:ncol(trace1.ci)))
{
sing_val = referred_row[column]
lb_value = trace2.ci[trace2.ci$Time == referred_row[[1]], lb_index]
ub_value = trace2.ci[trace2.ci$Time == referred_row[[1]], ub_index]
#the mean must be in the interval calculated
if(!(lb_value <= sing_val & sing_val <= ub_value)){
log_it(paste0("Time:", referred_row[[1]], " ", names(trace1)[column], ":",
sing_val, " not in interval [", lb_value, ",", ub_value, "]"), fun)
printed_error = TRUE
}
lb_index = lb_index + 2
ub_index = ub_index + 2
}
}
if(!is.null(furl_st))
unlink(dest_st)
if(!printed_error)
log_it("The calculated means respect the confidence intervals!", fun)
log_it("END RESULTS CHECK FOR STOCHASTIC MODEL...", fun)
}
fda_check <- function(fname_st = NULL, fname_nd = NULL, furl_st = NULL, sep = " ", threshold){
fun = "fda_check"
if(dir.exists("./results_check/fda_check"))
unlink("./results_check/fda_check", recursive = TRUE)
dir.create("./results_check/fda_check")
log_it("STARTING FUNCTION DATA ANALYSIS...", fun)
if(is.null(furl_st) && is.null(fname_st))
{
log_it("ERROR: Missing path of first file to compare! Abort", fun)
stop("Missing path of first file to compare! Abort")
}
if(is.null(fname_nd))
{
log_it(paste("WARNING: missing path of second file to compare,
using results_reference/stochastic_model/model_analysis-1_SSA_copy.trace
as default"), fun)
warning("WARNING: missing path of second file to compare,
using results_reference/stochastic_model/model_analysis-1_SSA_copy.trace
as default")
path = "results_reference/stochastic_model/model_analysis-1_SSA_copy.trace"
fname_nd = file.path(getwd(), path, fsep = .Platform$file.sep)
}
if(!is.null(furl_st))
{
fst_ext = unlist(strsplit(basename(furl_st), "\\."))[2]
dest_st = file.path(getwd(), paste0("file1.", fst_ext), fsep = .Platform$file.sep)
download.file(furl_st, dest_st)
trace1 = read.csv(file = dest_st, header = TRUE, sep = sep, quote = "\"", dec = ".", row.names = NULL)
}else
{
if(!file.exists(fname_st))
{
log_it(paste("ERROR:", fname_st, "file not exists! Abort"), fun)
stop(paste(fname_st, "file not exists! Abort"))
}else{
trace1 = read.csv(file = fname_st, header = TRUE, sep = sep, quote = "\"", dec = ".", row.names = NULL)
}
}
if(!file.exists(fname_nd)){
if(!is.null(furl_st))
unlink(dest_st)
log_it(paste("ERROR:", fname_nd, "file not exists! Abort"), fun)
stop(paste(fname_nd, "file not exists! Abort"))
}else
{
trace2 = read.csv(fname_nd, header = TRUE, sep = sep, quote = "\"", dec = ".", row.names = NULL)
}
if(nrow(trace1) != nrow(trace2) || ncol(trace1) != ncol(trace2))
{
log_it("ERROR: The two files to compare must have the same number of elements!", fun)
stop("The two files to compare must have the same number of elements!")
}
if(!all(names(trace1) == names(trace2), na.rm = TRUE))
{
log_it("ERROR: The columns of the two files to compare must have the same name", fun)
stop("The columns of the two files to compare must have the same name")
}
#Values of the Time columns will be the names of columns
n_ex <- length(trace1$Time) / length(unique(trace1$Time))
#Iteration of all columns excluded the first (Time column)
#for(column_names in names(trace1)[-1])
#for(column_names in "S")
for(column_names in names(trace1)[-1])
{
trace1.ready <- data.frame()
#The interested elements of i-run
base = 1
#Construction of first data frame, which has the number of row equals to the number of run
for(i in c(1:n_ex-1))
{
ref_column_elem = trace1[base:((base + nrow(trace1) / n_ex) - 1), column_names]
trace1.ready <- rbind(trace1.ready , ref_column_elem)
base = base + nrow(trace1) / n_ex
}
cnames = "Time0"
for(i in c(2:(nrow(trace1) / n_ex)))
cnames = c(cnames, paste0("Time", i-1))
names(trace1.ready) = cnames
#Writing the new format on file
#write.table(trace1.ready,
# file = file.path(paste0("./fda_files", .Platform$file.sep, "fda_fstfile_", column_names, ".trace")),
# sep=" ", append=FALSE, row.names=FALSE)
n_ex <- length(trace2$Time) / length(unique(trace2$Time))
trace2.ready <- data.frame()
base = 1
#Construction of second data frame, which has the number of row equals to the number of run
for(i in c(1:n_ex-1))
{
ref_column_elem = trace2[base:((base + nrow(trace2) / n_ex) - 1), column_names]
trace2.ready <- rbind(trace2.ready, ref_column_elem)
base = base + nrow(trace2) / n_ex
}
cnames = "Time0"
for(i in c(2:(nrow(trace2) / n_ex)))
cnames = c(cnames,paste0("Time", i-1))
names(trace2.ready) = cnames
#Writing the new format on file
#write.table(trace2.ready,
# file = file.path(paste0("./fda_files", .Platform$file.sep, "fda_fndfile_", column_names, ".trace")),
# sep=" ", append=FALSE, row.names = FALSE)
ITP.result <- ITP2bspline(trace1.ready, trace2.ready, nknots=20, B=1000)
#Open graphic device to print plot and images on png
png(file = file.path(paste0("./results_check/fda_check", .Platform$file.sep, "place", column_names, "_%2d.png")),
width = 1300, height = 900)
for(i in c(1:nrow(trace1.ready)))
{
if(i == 1)
{
#Set margin
par(mar=c(5, 5, 4, 2) + 0.1)
#Set plot
plot(c(1:ncol(trace1.ready)), trace1.ready[i,], type="l", col="black", lty=2, ylim=c(min(trace1.ready)-100,
max(trace1.ready)+100), xlab="time", ylab="population", main=paste("Place's real traces ", column_names),
cex.lab=2, cex.main=2, yaxt="n", xaxt="n")
axis(1, cex.axis=2)
axis(2, cex.axis=2)
legend(x = "topright", # Position
inset = 0.08, # Distance from the margin as a fraction of the plot region
cex = 1.5, # Change legend size
title = "Solver type", # Title
legend = c("Trace 1 solver", "Trace 2 solver"), # Legend texts
lty = c(2, 1), # Line types
col = c("black", "red"), # Line colors
lwd = 1) # Line width
}else{
points(c(1:ncol(trace1.ready)), trace1.ready[i,], type="l", lty=2, col="black")
}
}
for(i in c(1:nrow(trace2.ready)))
{
points(c(1:ncol(trace2.ready)), trace2.ready[i,], type="l", col="red")
}
plot(ITP.result, xlab="time", ylab="population", main=paste("Place's curve fitting", column_names),
cex.lab=2, cex.main=2)
ITPimage(ITP.result)
#If p-val >= threshold the null hypotesis it's confirmed, refused otherwise
log_it(paste("\n\nP-val >=", threshold, "for place", column_names, ":"), fun)
for(i in ITP.result[["pval"]])
{
if(i >= threshold)
{
log_it(paste(i," PASS"), fun)
}else{
log_it(paste(i," FAIL"),fun)
}
}
#Writing p-val and p-val matrix on file
#lapply(ITP.result[["pval"]], write,
# file = file.path(paste0("./results_check/fda_check", .Platform$file.sep, "pval_col_", column_names, ".txt")),
# append = TRUE)
#write.matrix(ITP.result[["pval.matrix"]],
# file = file.path(paste0("./fda_files", .Platform$file.sep, "pval_matr_col_", column_names, ".txt")))
}
#Close graphic device
dev.off()
log_it("END DATA ANALYSIS...", fun)
}
results_check <- function(fname_st = NULL, fname_nd = NULL, furl_st = NULL, fun, threshold = NULL){
if(missing(fname_st) & (is.null(furl_st) | missing(furl_st)))
stop("Either fname_st or furl_st parameter is missing! Abort")
if(missing(fname_nd))
stop("fname_nd parameter is missing! Abort")
if(missing(fun) | is.null(fun))
stop("fun parameter must be specified! Abort")
if(!fun %in% c("det_check","sto_check","fda_test"))
stop("The specified function doesn't exist! You can choose between det_check, sto_check and fda_test")
if(fun == "fda_test" & (is.null(threshold) | missing(threshold)))
stop("threshold parameter must be specified! Abort")
if(!dir.exists("./results_check"))
dir.create("results_check")
switch(fun,
"det_check" = det_results_check(fname_st, fname_nd, furl_st),
"sto_check" = sto_results_check(fname_st, fname_nd, furl_st),
"fda_test" = fda_check(fname_st, fname_nd, furl_st, threshold = threshold))
}
#Example of running det_check:
#results_check("results_reference/deterministic_model/model_analysis-1.trace",
# "results_reference/deterministic_model/model_analysis-1 (modified).trace",
# "det_check")
#results_check(furl_st = "https://raw.githubusercontent.com/qBioTurin/SIR/master/Results/results_sensitivity_analysis/SIR.solver",
# fname_nd = "Results/results_model_analysis/SIR.solver",
# fun = "det_check")
#results_check("Results/results_model_analysis/SIR.solver", "Results/results_model_analysis/SIR.solver", "det_check")
#Example of running sto_check:
#results_check(fname_st = "results_reference/stochastic_model/model_analysis-1_TAUG.trace",
# fname_nd = "results_reference/stochastic_model/model_analysis-1_TAUG_copy.trace",
# fun = "sto_check")
#results_check(fname_st = "results_reference/stochastic_model/model_analysis-1_SSA.trace",
# fname_nd = "results_reference/stochastic_model/model_analysis-1_TAUG.trace",
# fun = "sto_check")
#Example of running fda_check:
#results_check(fname_st = "results_reference/stochastic_model/model_analysis-1_SSA.trace",
# fname_nd = "results_reference/stochastic_model/model_analysis-1_TAUG.trace",
# fun = "fda_test",
# threshold = 0.05)
#
#
#Useful commands to create a single .trace file with all traces into a given
#directory (e.g. results_sensitivity_analysis):
#tail -n +2 -q SIR-sensitivity-*.trace | awk '{$1=$1};1' > SIR.trace
#sed -i '1s/^/Time S I R\n/' SIR.trace
#
#WARNING!
#Maybe, in the case of sto_check and fda_test, it's necessary to check the input traces
#files or to create a .trace file that includes all .trace files into a given directory
#with the above commands.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.