#### TRONCO: a tool for TRanslational ONCOlogy
####
#### Copyright (c) 2015-2017, Marco Antoniotti, Giulio Caravagna, Luca De Sano,
#### Alex Graudenzi, Giancarlo Mauri, Bud Mishra and Daniele Ramazzotti.
####
#### All rights reserved. This program and the accompanying materials
#### are made available under the terms of the GNU GPL v3.0
#### which accompanies this distribution.
# reconstruct the best tree-like topology
# @title caprese.fit
# @param dataset a dataset describing a progressive phenomenon
# @param lambda shrinkage parameter (value in [0,1])
# @param silent execute the algorithm in silent mode
# @param epos error rate of false positive errors
# @param eneg error rate of false negative errors
# @param hypotheses hypotheses to be considered in the reconstruction. This should be NA for this algorithms.
# @return topology: the reconstructed tree-like topology
#
caprese.fit <- function(dataset,
lambda = 0.5,
silent = FALSE,
epos = 0.0,
eneg = 0.0,
hypotheses = NA) {
## Start the clock to measure the execution time.
ptm <- proc.time();
## Structure with the set of valid edges.
## I start from the complete graph, i.e., I have no prior and all
## the connections are possibly causal.
adj.matrix = array(1, c(ncol(dataset), ncol(dataset)));
colnames(adj.matrix) = colnames(dataset);
rownames(adj.matrix) = colnames(dataset);
## The diagonal of the adjacency matrix should not be considered,
## i.e., no self cause is allowed.
diag(adj.matrix) = 0;
## Consider any hypothesis.
adj.matrix = hypothesis.adj.matrix(hypotheses, adj.matrix);
## Check if the dataset is valid.
valid.dataset = check.dataset(dataset,adj.matrix,FALSE, epos, eneg)
adj.matrix = valid.dataset$adj.matrix
## marginal.probs is an array of the observed marginal
## probabilities.
marginal.probs <- valid.dataset$marginal.probs;
## joint.probs is an array of the observed joint probabilities.
joint.probs <- valid.dataset$joint.probs;
## Reconstruct the causal topology.
best.parents =
get.tree.parents(adj.matrix,
marginal.probs,
joint.probs,
lambda)
## Create the structures where to save the results.
parents.pos <-
best.parents$parents;
conditional.probs <-
array(-1, dim = c(length(parents.pos), 1));
adj.matrix <-
array(0, dim = c(length(parents.pos), length(parents.pos)));
colnames(adj.matrix) = colnames(dataset);
rownames(adj.matrix) = colnames(dataset);
confidence <- array(list(), c(3, 1));
confidence[[1, 1]] =
array(0, dim = c(length(parents.pos), length(parents.pos)));
confidence[[2, 1]] =
best.parents$pr.score;
confidence[[3, 1]] =
array(0, dim = c(length(parents.pos), length(parents.pos)));
## Set the parents names and the structures.
hypergeometric.pvalues = vector();
for (i in 1:ncol(dataset)) {
## If the node has a parent.
if (parents.pos[i, 1] != -1) {
## Note: [i,j] = 1 means that i is causing j.
adj.matrix[parents.pos[i, 1], i] = 1;
## Compute the conditional probability of
## P(CHILD=1|PARENT=1)
conditional.probs[i, 1] =
best.parents$joint.probs[parents.pos[i, 1], i] /
best.parents$marginal.probs[parents.pos[i]];
} else {
## If the node has no parent, its conditional probability
## is set to 1.
conditional.probs[i, 1] = 1;
}
## Compute the hypergeometric test.
for (j in i:ncol(dataset)) {
if (i != j) {
## Confidence in temporal priority.
confidence[[1, 1]][i, j] =
min(best.parents$marginal.probs[i] /
best.parents$marginal.probs[j],
1);
confidence[[1, 1]][j, i] =
min(best.parents$marginal.probs[j] /
best.parents$marginal.probs[i],
1);
## Compute the confidence by hypergeometric test
confidence[[3, 1]][i, j] =
phyper(best.parents$joint.probs[i, j] * nrow(dataset),
best.parents$marginal.probs[i] * nrow(dataset),
nrow(dataset) -
best.parents$marginal.probs[i] *
nrow(dataset),
best.parents$marginal.probs[j] *
nrow(dataset),
lower.tail = FALSE);
confidence[[3, 1]][j, i] = confidence[[3, 1]][i, j];
## Save all the valid pvalues.
hypergeometric.pvalues =
append(hypergeometric.pvalues,
confidence[[2, 1]][i, j]);
}
}
}
estimated.error.rates = list(error.fp = NA, error.fn = NA)
estimated.probabilities = list(marginal.probs = NA,
joint.probs = NA,
conditional.probs = NA)
error.rates = estimated.error.rates
estimated.probabilities.fit = estimated.probabilities
## Structures where to save the results.
model = list();
adj.matrix.fit = list();
adj.matrix.fit$adj.matrix.fit = adj.matrix;
probabilities.observed =
list(marginal.probs = marginal.probs,
joint.probs = joint.probs,
conditional.probs = conditional.probs);
probabilities.fit =
list(estimated.marginal.probs = estimated.probabilities.fit$marginal.probs,
estimated.joint.probs = estimated.probabilities.fit$joint.probs,
estimated.conditional.probs = estimated.probabilities.fit$conditional.probs);
probabilities =
list(probabilities.observed = probabilities.observed,
probabilities.fit = probabilities.fit);
## Save the results for the model
model[["caprese"]] =
list(probabilities = probabilities,
parents.pos = parents.pos,
error.rates = error.rates,
adj.matrix = adj.matrix.fit);
## Set the execution parameters.
parameters =
list(algorithm = "CAPRESE",
lambda = lambda,
silent = silent,
error.rates = list(epos=epos,eneg=eneg))
## Return the results.
topology =
list(dataset = dataset,
hypotheses = hypotheses,
confidence = confidence,
model = model,
parameters = parameters,
execution.time = (proc.time() - ptm))
topology = rename.reconstruction.fields(topology, dataset)
return(topology)
}
# select at the most one parent for each node based on the probability raising criteria
# @title get.tree.parents
# @param adj.matrix adjacency matrix of the valid edges
# @param marginal.probs observed marginal probabilities
# @param joint.probs observed joint probabilities
# @param lambda shrinkage parameter (value between 0 and 1)
# @return best.parents list of the best parents
#
get.tree.parents <- function(adj.matrix,
marginal.probs,
joint.probs,
lambda) {
## Compute the scores for each edge.
scores =
get.tree.scores(adj.matrix,
marginal.probs,
joint.probs,
lambda);
pr.score = scores$pr.score;
## Set to -1 the scores where there is no causation according to
## Suppes' condition [i,j] means i is causing j.
for (i in 1:ncol(pr.score)) {
for (j in i:ncol(pr.score)) {
## The diagonal has not to be considered (no self-cause)
if (i == j) {
pr.score[i, j] = -1;
} else {
## Otherwise, apply Suppes's criteria for prima facie
## cause.
## If both the scores are not greater then 0, they are
## not valid in this case the events are causally
## irrelevant, i.e., independent.
if (pr.score[i, j] <= 0 && pr.score[j, i] <= 0) {
pr.score[i, j] = -1;
pr.score[j, i] = -1;
}
## If at least one score is greater then 0, I keep the
## greater one; in this way I give a (time) direction
## to the progression. Furthermore, this constrain
## the topology to be acyclic by construction.
else {
if (pr.score[i, j] > pr.score[j, i]) {
pr.score[j, i] = -1;
} else {
pr.score[i, j] = -1;
}
}
}
}
}
## Chose at the most one parent per node.
## Here I suppose that each node has a parent spurious causes are
## considered (and removed) later.
best.parents = array(-1, dim = c(ncol(pr.score), 1));
for (i in 1:ncol(pr.score)) {
## -1 means that the best parent is the Root
curr.best = -1;
## Find the best parent for the current node.
best = which.max(pr.score[,i]);
if (pr.score[best,i]>0) {
curr.best = best;
}
## Set the best parent for the current node.
best.parents[i,1] = curr.best;
}
# remove any loop in the reconstruction
caprese.adj.matrix = array(0,c(nrow(best.parents),nrow(best.parents)))
list.of.arcs = list()
list.of.scores = NULL
for(nodes in 1:nrow(best.parents)) {
if(best.parents[nodes,1]!=-1) {
caprese.adj.matrix[best.parents[nodes,1],nodes] = 1
list.of.arcs[[(length(list.of.arcs)+1)]] = list(parent=best.parents[nodes,1],child=nodes)
list.of.scores = c(list.of.scores,pr.score[best.parents[nodes,1],nodes])
}
}
best.parents = remove.caprese.loops(best.parents,caprese.adj.matrix,list.of.arcs,list.of.scores)
## Check for spurious causes by the independent progression filter
## and complete the parents list.
parents =
verify.parents(best.parents,
marginal.probs,
joint.probs);
## Save the results
best.parents =
list(parents = parents,
marginal.probs = marginal.probs,
joint.probs = joint.probs,
pr.score = scores$pr.score);
return(best.parents);
}
remove.caprese.loops <- function( best.parents, adj.matrix, edges, scores ) {
# if I have at least one edge
if(length(edges)>0) {
ordered.scores = sort(scores,decreasing=FALSE,index.return=TRUE)
ordered.edges = edges[ordered.scores$ix]
# go through the edges in decreasing order of confidence
for (i in 1:length(edges)) {
# consider any edge i --> j
curr.edge = ordered.edges[[i]]
curr.edge.i = as.numeric(curr.edge$parent)
curr.edge.j = as.numeric(curr.edge$child)
# search for loops between curr.edge.i and curr.edge.j
curr.graph = graph.adjacency(adj.matrix,mode="directed")
is.path = suppressWarnings(get.shortest.paths(curr.graph,
curr.edge.j,
curr.edge.i)$vpath)
is.path = length(unlist(is.path))
# if there is a path between the two nodes, remove edge i --> j
if (is.path > 0) {
adj.matrix[curr.edge.i,curr.edge.j] = 0
best.parents[curr.edge.j,1] = -1
}
}
}
return(best.parents)
}
# compute the probability raising based scores
# @title get.tree.scores
# @param adj.matrix adjacency matrix of the valid edges
# @param marginal.probs observed marginal probabilities
# @param joint.probs observed joint probabilities
# @param lambda shrinkage parameter (value between 0 and 1)
# @return scores: probability raising based scores
#
get.tree.scores <- function(adj.matrix,
marginal.probs,
joint.probs,
lambda) {
## Structure where to save the probability raising scores.
pr.score =
array(-1, dim = c(nrow(marginal.probs), nrow(marginal.probs)))
## Compute the probability raising based scores.
for (i in 1:ncol(pr.score)) {
for (j in 1:ncol(pr.score)) {
## If the edge is valid.
if (adj.matrix[i, j] == 1) {
## alpha is the probability raising model of causation
## (raw model estimate).
alpha =
((joint.probs[i, j] / marginal.probs[i]) -
((marginal.probs[j] - joint.probs[i, j]) /
(1 - marginal.probs[i]))) /
((joint.probs[i,j] /
marginal.probs[i]) +
((marginal.probs[j] - joint.probs[i,j]) /
(1 - marginal.probs[i])));
## beta is the correction factor (based on time
## distance in terms of statistical dependence).
beta =
(joint.probs[i, j] -
marginal.probs[i] * marginal.probs[j]) /
(joint.probs[i, j] + marginal.probs[i] * marginal.probs[j]);
## The overall estimator is a shrinkage-like
## combination of alpha and beta.
## The scores are saved in the convention used for an
## ajacency matrix, i.e. [i, j] means causal edge
## i-->j.
pr.score[i, j] = (1 - lambda) * alpha + lambda * beta;
}
}
}
scores =
list(marginal.probs = marginal.probs,
joint.probs = joint.probs,
pr.score = pr.score);
return(scores);
}
# verify the independent progression filter
# @title verify.parents
# @param best.parents best edges to be verified
# @param marginal.probs observed marginal probabilities
# @param joint.probs observed joint probabilities
# @return best.parents: list of the best valid parents
#
verify.parents <- function(best.parents, marginal.probs, joint.probs) {
## Verify the condition for the best parent of each node.
for (i in 1:length(best.parents)) {
## If there is a connection, i.e. the node is not already
## attached to the Root.
if (best.parents[i] != -1) {
## Score for the root as the parent of this node.
w.root.node = 1 / (1 + marginal.probs[i]);
## Compute the scores for the edges to all the other
## upstream nodes.
attach.to.root = 1;
for (j in 1:length(marginal.probs)) {
## If the connection is valid and the parent node has
## greater probability, i.e. it is before the child in
## temporal order.
if (i != j && marginal.probs[j] > marginal.probs[i]) {
w.parent.node =
(marginal.probs[j] / (marginal.probs[i] + marginal.probs[j])) *
(joint.probs[i, j] / (marginal.probs[i] * marginal.probs[j]));
## The parent is valid if this condition is valid
## at least one time (i.e. for at least one of the
## upstream nodes), meaning that if we find out
## that a connection is not spurious for any node,
## the best parent is not spurious as well.
if (w.root.node <= w.parent.node) {
attach.to.root = 0;
break;
}
}
}
## Connect the node to the Root if the flag is true.
if (attach.to.root==1) {
best.parents[i] = -1;
}
}
}
return(best.parents);
}
#### end of file -- caprese.algorithm.R
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.