#### TRONCO: a tool for TRanslational ONCOlogy
####
#### Copyright (c) 2015-2017, Marco Antoniotti, Giulio Caravagna, Luca De Sano,
#### Alex Graudenzi, Giancarlo Mauri, Bud Mishra and Daniele Ramazzotti.
####
#### All rights reserved. This program and the accompanying materials
#### are made available under the terms of the GNU GPL v3.0
#### which accompanies this distribution.
#' Add a new hypothesis by creating a new event and adding it to the compliant genotypes
#' @title hypothesis add
#' @param data A TRONCO compliant dataset.
#' @param pattern.label Label of the new hypothesis.
#' @param lifted.pattern Vector to be added to the lifted genotype resolving the pattern related to the new hypothesis
#' @param pattern.effect Possibile effects for the pattern.
#' @param pattern.cause Possibile causes for the pattern.
#' @return A TRONCO compliant object with the added hypothesis
#' @export hypothesis.add
#' @importFrom stats fisher.test
#' @importFrom methods getPackageName is
#'
hypothesis.add <- function(data,
pattern.label,
lifted.pattern,
pattern.effect = "*",
pattern.cause = "*") {
pattern.label = gsub('[[:space:]]+', '_', pattern.label)
is.compliant(data)
# check if there is a reconstructed model
if(has.model(data)) {
stop('This dataset has a reconstructed model and no more hypothesis can be added.')
}
genotypes = data$genotypes;
annotations = data$annotations;
if (!is.null(data$hypotheses)) {
hypotheses = data$hypotheses;
} else {
hypotheses = NA;
}
## The Boolean functions look for a set of variables.
## If there are already variables named as the ones
## used here, make the backup of them.
do.roll.back.lifting.genotypes = FALSE;
do.roll.back.lifting.annotations = FALSE;
do.roll.back.lifting.edges = FALSE;
do.roll.back.fisher.pvalues = FALSE;
## I need a variable to save the genotypes of the
## lifted pattern.
## If there is already a variable named
## lifting.genotypes, make the backup of it.
hypotheses.env = get('hypotheses.env', asNamespace(getPackageName()))
if (exists("lifting.genotypes", hypotheses.env)) {
roll.back.lifting.genotypes = get('lifting.genotypes', envir = hypotheses.env)
do.roll.back.lifting.genotypes = TRUE;
}
assign("lifting.genotypes", genotypes, envir = hypotheses.env);
## I need a variable to save the annotations of the
## lifted pattern.
## if there is already a variable named
## lifting.annotations, make the backup of it.
if (exists("lifting.annotations", hypotheses.env)) {
roll.back.lifting.annotations = get('lifting.annotations', envir = hypotheses.env)
do.roll.back.lifting.annotations = TRUE;
}
assign("lifting.annotations", annotations, envir = hypotheses.env);
## I need a variable to save the edges of the lifted
## pattern.
## If there is already a variable named lifting.edges,
## make the backup of it.
if (exists("lifting.edges", hypotheses.env)) {
roll.back.lifting.edges = get('lifting.edges', envir = hypotheses.env)
do.roll.back.lifting.edges = TRUE
}
assign("lifting.edges", NULL, envir = hypotheses.env);
## I need a variable to save the pvalues of the lifted
## pattern.
## If there is already a variable named fisher.pvalues,
## make the backup of it.
if (exists("fisher.pvalues", hypotheses.env)) {
roll.back.fisher.pvalues = get('fisher.pvalues', envir = hypotheses.env)
do.roll.back.fisher.pvalues = TRUE;
}
assign("fisher.pvalues", NULL, envir = hypotheses.env);
## Save the lifted genotypes and its hypotheses for the
## current pattern.
curr_pattern = lifted.pattern$pattern;
curr_hypotheses = lifted.pattern$hypotheses;
curr_pvalues = get('fisher.pvalues', envir = hypotheses.env)
## Save the edges of the lifted pattern.
hstructure = get('lifting.edges', envir = hypotheses.env)
## Roll back to the previous value of the variable
## lifting.genotypes if any or remove it.
if (do.roll.back.lifting.genotypes) {
assign("lifting.genotypes",
roll.back.lifting.genotypes,
envir = hypotheses.env);
} else {
rm('lifting.genotypes', pos = hypotheses.env);
}
## Roll back to the previous value of the variable
## lifting.annotations if any or remove it.
if (do.roll.back.lifting.annotations) {
assign("lifting.annotations",
roll.back.lifting.annotations,
envir = hypotheses.env);
} else {
rm('lifting.annotations', pos = hypotheses.env)
}
## Roll back to the previous value of the variable
## lifting.edges if any or remove it.
if (do.roll.back.lifting.edges) {
assign("lifting.edges",
roll.back.lifting.edges,
envir = hypotheses.env)
} else {
rm('lifting.edges', pos = hypotheses.env)
}
## Roll back to the previous value of the variable
## fisher.pvalues if any or remove it.
if (do.roll.back.fisher.pvalues) {
assign("fisher.pvalues",
roll.back.fisher.pvalues,
envir = hypotheses.env)
} else {
rm('fisher.pvalues', pos = hypotheses.env)
}
## Set the hypotheses number.
if (!is.na(hypotheses[1])) {
num.hypotheses = hypotheses$num.hypotheses
} else {
num.hypotheses = 0
}
## * is a special pattern.effect which indicates to use all
## the events as effects for this pattern.
is.to.all.effects = FALSE;
if (pattern.effect[[1]][1] == "*") {
pattern.effect =
colnames(genotypes)[1:(length(colnames(genotypes)) - num.hypotheses)];
## Any event can not be both causes and effects for the
## pattern to be well-formed.
pattern.effect =
list(pattern.effect[-which((pattern.effect %in% unlist(curr_hypotheses$llist)))]);
is.to.all.effects = TRUE;
if (length(pattern.effect) == 0) {
stop(paste("[ERR] Missing list of effects to test or wildcard \'*\'.",
sep = ''));
}
}
## Check the pattern to be well-formed.
all.col.nums = vector();
if (length(pattern.effect) == 0) {
stop(paste("[ERR] Missing list of effects or wildcard \'*\'.",
sep = ''));
} else {
## Check the effects of the pattern to be well-formed.
for (i in 1:length(pattern.effect)) {
curr.pattern.effect = pattern.effect[[i]];
if (is.to.all.effects == FALSE) {
col.num = -1;
if (length(curr.pattern.effect) == 1) {
event.map =
emap(c(curr.pattern.effect, "*"),
genotypes,
annotations);
col.num = event.map$col.num;
events.name = event.map$events.name;
} else if (length(curr.pattern.effect) == 2) {
event.map =
emap(curr.pattern.effect,
genotypes,
annotations);
col.num = event.map$col.num;
events.name = event.map$events.name;
}
} else {
col.num = which(colnames(genotypes) %in% curr.pattern.effect);
if (length(col.num) == 0) {
col.num = -1;
}
events.name = curr.pattern.effect;
}
## Check the effect to be a valid event.
if (col.num[1] == -1) {
stop(paste("[ERR] Unknown gene among effects: \"",
curr.pattern.effect,
"\".",
sep = ''));
}
all.col.nums = append(all.col.nums,col.num);
## Check the pattern to be well-formed.
## If the effect is in the pattern, the pattern is not
## well-formed.
if (length(which(unlist(curr_hypotheses$llist) %in%
events.name)) > 0) {
stop(paste("[ERR] Bad formed pattern, event \"",
curr.pattern.effect,
"\" yields a loop.",
sep = ''));
}
}
}
## Look for duplicated effects in the pattern.
if (anyDuplicated(all.col.nums) > 0) {
stop(paste("[ERR] Bad formed pattern, duplicated events ",
paste(pattern.effect[duplicated(pattern.effect)],
collapse = ', ',
sep = ''),
"within effects.",
sep = ''));
}
## Check that the we are not duplicating any name by adding
## the new pattern.
if (length(which(colnames(genotypes) == pattern.label)) > 0) {
stop(paste("[ERR] This pattern already exists.", sep = ''));
}
## Add the pattern to the genotypes.
genotypes = cbind(genotypes,curr_pattern);
## Check that the pattern is valid according to Suppes'
## theory.
## Compute the observed and observed joint probabilities.
pair.count <-
array(0, dim = c(ncol(genotypes), ncol(genotypes)));
## Compute the probabilities on the genotypes.
for (i in 1:ncol(genotypes)) {
for (j in 1:ncol(genotypes)) {
val1 = genotypes[ , i];
val2 = genotypes[ , j];
pair.count[i, j] = (t(val1) %*% val2);
}
}
## Marginal.probs is an array of the observed marginal
## probabilities.
marginal.probs <-
array(as.matrix(diag(pair.count) / nrow(genotypes)),
dim = c(ncol(genotypes), 1));
## joint.probs is an array of the observed joint probabilities
joint.probs <- as.matrix(pair.count / nrow(genotypes));
## Check that the probability of the pattern is in (0,1)
if (marginal.probs[ncol(genotypes)] == 0
|| marginal.probs[ncol(genotypes)] == 1) {
stop(paste("[ERR] The pattern has marginal probability ",
marginal.probs[ncol(genotypes)],
", which should be in (0, 1).",
sep = ''));
}
## Check that the pattern does not duplicate any existing
## column.
i = ncol(genotypes);
for (j in 1:ncol(genotypes)) {
## If the edge is valid, i.e., not self cause.
if (i != j) {
## If the two considered events are not
## distinguishable.
if ((joint.probs[i, j] / marginal.probs[i]) == 1
&& (joint.probs[i, j] / marginal.probs[j]) == 1) {
stop(paste("[ERR] Pattern duplicates ",
paste(as.events(data)[j , ],
collapse = ' ',
sep = ''),
".",
sep = ''));
}
}
}
## * is a special pattern.cause which indicates to use all the
## events as causes for this pattern.
is.to.all.causes = FALSE;
if (pattern.cause[[1]][1] == "*") {
pattern.cause =
colnames(genotypes)[1:(length(colnames(genotypes)) - num.hypotheses-1)];
## Any event can not be both causes and effects for the
## pattern to be well-formed.
pattern.cause =
list(pattern.cause[- which((pattern.cause %in% unlist(curr_hypotheses$llist)))]);
is.to.all.causes = TRUE;
}
## Check the pattern to be well-formed.
all.col.nums = vector();
if (length(pattern.cause) > 0) {
## Check the causes of the pattern to be well-formed.
for (i in 1:length(pattern.cause)) {
curr.pattern.cause = pattern.cause[[i]];
if (is.to.all.causes == FALSE) {
col.num = -1;
if (length(curr.pattern.cause) == 1) {
event.map =
emap(c(curr.pattern.cause, "*"),
genotypes,
annotations);
col.num = event.map$col.num;
events.name = event.map$events.name;
} else if (length(curr.pattern.cause) == 2) {
event.map =
emap(curr.pattern.cause,
genotypes,
annotations);
col.num = event.map$col.num;
events.name = event.map$events.name;
}
} else {
col.num = which(colnames(genotypes) %in% curr.pattern.cause);
if (length(col.num) == 0) {
col.num = -1;
}
events.name = curr.pattern.cause;
}
## Check the cause to be a valid event.
if (col.num[1] == -1) {
stop(paste("[ERR] Unknown gene among causes: \"",
curr.pattern.cause,
"\".",
sep = ''));
}
all.col.nums = append(all.col.nums, col.num);
## Check the pattern to be well-formed.
## If the cause is in the pattern, the pattern is not
## well-formed.
if (length(which(unlist(curr_hypotheses$llist) %in% events.name)) > 0) {
stop(paste("[ERR] Bad formed pattern, event \"",
curr.pattern.cause,
"\" yields a loop.",
sep = ''));
}
}
}
## Look for duplicated causes in the pattern.
if (anyDuplicated(all.col.nums)>0) {
stop(paste("[ERR] Bad formed pattern, duplicated events ",
paste(pattern.cause[duplicated(pattern.cause)],
collapse = ', ',
sep = ''),
"within causes.",
sep = ''));
}
## Now I can finally add the hypothesis.
colnames(genotypes)[ncol(genotypes)] = pattern.label;
if (is.na(hypotheses[1])) {
hypotheses = list();
}
hypotheses$num.hypotheses = num.hypotheses + 1;
## Add the new hypothesis in the annotations.
annotations = rbind(data$annotations,c("Pattern", pattern.label));
rownames(annotations)[nrow(annotations)] = pattern.label;
## Add the color of the type "Hypothesis" is not already
## defined.
if (any(rownames(data$types) == "Pattern") == FALSE) {
types = rbind(data$types, 'slateblue');
rownames(types)[nrow(types)] = "Pattern";
data$types = types;
}
## Create the list of added hypotheses.
if (length(hypotheses$hlist)==0) {
hypotheses$hlist = vector();
}
## Add the new hypothesis to the list.
for (i in 1:length(pattern.effect)) {
curr.pattern.effect = pattern.effect[[i]];
if (is.to.all.effects == FALSE) {
if (length(curr.pattern.effect) == 1) {
event.map =
emap(c(curr.pattern.effect, "*"),
genotypes,
annotations);
col.num = event.map$col.num;
} else if (length(curr.pattern.effect) == 2) {
event.map =
emap(curr.pattern.effect,
genotypes,
annotations);
col.num = event.map$col.num;
}
} else {
col.num = which(colnames(genotypes) %in% curr.pattern.effect);
if (length(col.num) == 0) {
col.num = -1;
}
}
for (j in 1:length(col.num)) {
hypotheses$hlist =
rbind(hypotheses$hlist,
t(c(colnames(genotypes)[ncol(genotypes)],
colnames(genotypes)[col.num[j]])));
}
if (is.null(colnames(hypotheses$hlist))) {
colnames(hypotheses$hlist) = c("cause", "effect");
}
}
## Add the causes of the new hypothesis to the list.
if (length(pattern.cause) > 0) {
for (i in 1:length(pattern.cause)) {
curr.pattern.cause = pattern.cause[[i]];
if (is.to.all.causes==FALSE) {
if (length(curr.pattern.cause)==1) {
event.map =
emap(c(curr.pattern.cause, "*"),
genotypes,
annotations);
col.num = event.map$col.num;
}
else if (length(curr.pattern.cause)==2) {
event.map =
emap(curr.pattern.cause,
genotypes,
annotations);
col.num = event.map$col.num;
}
} else {
col.num = which(colnames(genotypes) %in% curr.pattern.cause);
if (length(col.num) == 0) {
col.num = -1;
}
}
for (j in 1:length(col.num)) {
hypotheses$hlist =
rbind(hypotheses$hlist,
t(c(colnames(genotypes)[col.num[j]],
colnames(genotypes)[ncol(genotypes)])));
}
if (is.null(colnames(hypotheses$hlist))) {
colnames(hypotheses$hlist) = c("cause", "effect");
}
}
}
## Create the list of hypotheses' structures.
if (length(hypotheses$hstructure) == 0) {
#hypotheses$hstructure = new.env(hash = TRUE, parent = emptyenv());
hypotheses$hstructure = list()
}
hypotheses$hstructure[pattern.label] = list(get.lifted.pattern(hstructure))
## Add the atoms of the hypothesis.
if (length(hypotheses$patterns) == 0) {
hypotheses$patterns = list();
}
hypotheses$patterns[pattern.label] = lifted.pattern$hypotheses$llist;
## Add the hypotheses of the atoms.
if (length(hypotheses$atoms) == 0) {
hypotheses$atoms =
vector(mode = "list",
length = (ncol(genotypes) - hypotheses$num.hypotheses));
names(hypotheses$atoms) =
colnames(genotypes)[1:(ncol(genotypes) - hypotheses$num.hypotheses)];
}
atoms.in.pattern =
which(names(hypotheses$atoms) %in% unlist(hypotheses$patterns[pattern.label]));
if (length(atoms.in.pattern) > 0) {
for (i in 1:length(atoms.in.pattern)) {
hypotheses$atoms[[atoms.in.pattern[i]]] =
append(hypotheses$atoms[[atoms.in.pattern[i]]], pattern.label);
}
}
## Add the fisher pvalues.
if (length(hypotheses$pvalues) == 0) {
hypotheses$pvalues = vector();
}
hypotheses$pvalues = append(hypotheses$pvalues, list(curr_pvalues))
names(hypotheses$pvalues)[length(hypotheses$pvalues)] = pattern.label;
## Return the new (compliant) data structure as result.
data$genotypes = genotypes;
data$annotations = annotations;
data$hypotheses = hypotheses;
return(data);
}
# resolve the ellipsis for the effects
hypothesis.lifted.effects <- function( ... ) {
return(list(...));
}
#' Add all the hypotheses related to a group of events
#' @title hypothesis add group
#' @param x A TRONCO compliant dataset.
#' @param FUN Type of pattern to be added, e.g., co-occurance, soft or hard exclusivity.
#' @param group Group of events to be considered.
#' @param pattern.cause Possibile causes for the pattern.
#' @param pattern.effect Possibile effects for the pattern.
#' @param dim.min Minimum cardinality of the subgroups to be considered.
#' @param dim.max Maximum cardinality of the subgroups to be considered.
#' @param min.prob Minimum probability associated to each valid group.
#' @param silent A parameter to disable/enable verbose messages.
#' @return A TRONCO compliant object with the added hypotheses
#' @export hypothesis.add.group
#' @importFrom utils flush.console combn
#'
hypothesis.add.group <- function(x,
FUN,
group,
pattern.cause = '*',
pattern.effect = '*',
dim.min = 2,
dim.max = length(group),
min.prob = 0,
silent = FALSE) {
is.compliant(x)
# check if there is a reconstructed model
if (has.model(x)) {
stop('This dataset has a reconstructed model and no more hypothesis can be added.')
}
op = deparse(substitute(FUN))
effect = paste0("c('", paste(pattern.effect, collapse = "', '"), "')")
cause = paste0("c('", paste(pattern.cause, collapse = "', '"), "')")
ngroup = length(group)
if (ngroup < 2) {
warning("No hypothesis will be created for groups with less than 2 elements.")
return(x)
}
if (!silent) {
cat("*** Adding Group Hypotheses\n")
cat(' Group:', paste(group, collapse = ", ", sep = ""), '\n')
cat(' Function:', op, '\n')
cat(' Cause:', paste(pattern.cause, collapse=", "), '; ')
cat(' Effect:', paste(pattern.effect, collapse=", "), '.\n')
}
if (min.prob > 0) {
if (!silent) {
cat('\nFiltering genes within the group with alteration frequency below',
min.prob,
'\n')
}
temp = events.selection(x, filter.in.names = group, silent = silent)
temp = as.alterations(temp, silent = silent)
temp = events.selection(temp, filter.freq = min.prob, silent = silent)
group = as.genes(temp)
if (!silent) {
cat('New group:', paste(group, collapse = ", ", sep = ""), '\n')
}
}
ngroup = length(group)
if (ngroup < 2) {
warning("No hypothesis will be created for groups with less than 2 elements.")
return(x)
}
hom.group =
lapply(group,
function(g, x) {
## Isn't (nevents(x, genes = g) > 1) enough?
if (nevents(x, genes = g) > 1) {
TRUE
} else {
FALSE
}
},
x)
hom.group = group[unlist(hom.group)]
gene.hom <- function(g, h) {
if (g %in% h) {
if (any(rowSums(as.gene(x, genes = g)) > 1) ) return(paste0("OR('", g, "')"))
else return(paste0("XOR('", g, "')"))
}
return(paste0("'", g, "'"))
}
max.groupsize = min(dim.max, ngroup)
min.groupsize = max(2, dim.min)
if (dim.min > dim.max) {
stop('ERROR - dim.min > dim.max')
}
if (min.groupsize > max.groupsize) {
stop('ERROR - min.groupsize > max.groupsize')
}
if (length(hom.group) > 0 && !silent) {
cat("Genes with multiple events: ",
paste(unlist(hom.group),
collapse=', ',
sep = ''),
"\n")
}
error.summary = data.frame()
## Get an analytical pattern... !
tot.patterns = 0
for (i in min.groupsize:max.groupsize) {
tot.patterns = tot.patterns + ncol(combn(unlist(group), i))
}
## Create a progress bar.
if (!silent) {
cat('Generating ',
tot.patterns,
'patterns [size: min =',
max.groupsize,
' - max =',
max.groupsize,
'].\n')
}
## pb <- txtProgressBar(0, tot.patterns, style = 3)
pbPos = 0
for (i in min.groupsize:max.groupsize) {
gr = combn(unlist(group), i)
for (j in 1:ncol(gr)) {
genes = as.list(gr[, j])
## Start the progress bar.
pbPos = pbPos + 1
hypo.name = paste(unlist(genes), sep = "_", collapse = "_")
hypo.genes =
paste(lapply(genes,
function(g, hom.group) {
gene.hom(g, hom.group)
},
hom.group),
collapse = ", ")
hypo.add = paste0("hypothesis.add(x, ",
"pattern.label = '", op, "_", hypo.name, "', ",
"lifted.pattern = ", op, "(", hypo.genes, "), ",
"pattern.effect = ", effect, ", ",
"pattern.cause = ", cause, ")")
err =
tryCatch({
x = eval(parse(text = hypo.add))
}, error = function(cond) {
m = paste("Error on", hypo.add, ".\n", cond)
code = strsplit(as.character(cond), " ")[[1]]
idx.errcode = which(code == "[ERR]", arr.ind = TRUE) + 1
return(
data.frame(
pattern = paste(unlist(genes), collapse = ", ", sep = ""),
error = paste(code[idx.errcode:length(code)], collapse = " ")
))
}, warning = function(cond) {
m = paste("Warning on", hypo.add, ".\n", cond)
return(genes)
})
## Dummy errors detection.
if (!("genotypes" %in% names(err)))
error.summary = rbind(error.summary, err)
}
}
if (nrow(error.summary) > 0) {
cat(paste(nrow(error.summary),
" genes pattern could not be added -- showing errors\n",
sep = ""))
print(error.summary)
} else if (!silent) {
cat("Hypothesis created for all possible patterns.\n")
}
return(x)
}
#' Add all the hypotheses related to homologou events
#' @title hypothesis.add.homologous
#' @param x A TRONCO compliant dataset.
#' @param pattern.cause Possibile causes for the pattern.
#' @param pattern.effect Possibile effects for the pattern.
#' @param genes List of genes to be considered as possible homologous. For these genes, all the types of mutations will be considered functionally equivalent.
#' @param silent A parameter to disable/enable verbose messages.
#' @return A TRONCO compliant object with the added hypotheses
#' @export hypothesis.add.homologous
#'
hypothesis.add.homologous <- function(x,
pattern.cause = '*',
pattern.effect = '*',
genes = as.genes(x),
silent = FALSE) {
is.compliant(x)
# check if there is a reconstructed model
if(has.model(x)) {
stop('This dataset has a reconstructed model and no more hypothesis can be added.')
}
hom.group =
lapply(genes,
function(g, x) {
if (nevents(x, genes = g) > 1) {
TRUE
} else {
FALSE
}
},
x)
hom.group = genes[unlist(hom.group)]
if (length(hom.group) == 0) {
warning("No genes with multiple events.")
return(x)
}
if (!silent) {
cat("*** Adding hypotheses for Homologous Patterns\n")
cat(' Genes:', paste(hom.group, collapse = ", ", sep = ""), '\n')
cat(' Cause:', paste(pattern.cause, collapse=", "), '\n')
cat(' Effect:', paste(pattern.effect, collapse=", "), '\n')
}
effect = paste0("c('", paste(pattern.effect, collapse = "', '"), "')")
cause = paste0("c('", paste(pattern.cause, collapse = "', '"), "')")
if (length(hom.group) == 0) {
warning("No genes with multiple events.")
return(x)
}
## Create a progress bar.
#pb <- txtProgressBar(0, length(hom.group), style = 3)
error.summary = data.frame()
for (i in 1:length(hom.group)) {
## Start the progress bar.
#setTxtProgressBar(pb, i)
if (!silent) {
cat('.')
}
## Check if the joint probability of homologous events is > 0,
## if yes the event will be added as 'OR', otherwise 'XOR'.
if ( any(rowSums(as.gene(x, genes = hom.group[[i]])) > 1)) {
FUN = 'OR'
} else {
FUN = 'XOR'
}
hypo.add =
paste0("hypothesis.add(x, ",
"pattern.label = '", FUN, "_", hom.group[[i]], "', ",
"lifted.pattern = ", FUN, "('", hom.group[[i]], "'), ",
"pattern.cause = ", cause, ", ",
"pattern.effect =", effect, ")")
err =
tryCatch({
x = eval(parse(text = hypo.add))
}, error = function(cond) {
m = paste("Error on", hypo.add, ".\n", cond)
code = strsplit(as.character(cond), " ")[[1]]
idx.errcode = which(code == "[ERR]", arr.ind = TRUE) + 1
return(data.frame(pattern =
paste(unlist(hom.group[[i]]),
collapse = ", ",
sep = ""),
error =
paste(code[idx.errcode:length(code)],
collapse = " ")))
}, warning = function(cond) {
m = paste("Warning on", hypo.add, ".\n", cond)
return(genes)
})
## Dummy errors detection.
if (!("genotypes" %in% names(err)))
error.summary = rbind(error.summary, err)
}
## Close progress bar.
#close(pb)
if (nrow(error.summary) > 0) {
cat(paste(nrow(error.summary),
" patterns could not be added -- showing errors\n",
sep = ""))
print(error.summary)
} else if (!silent) {
cat("Hypothesis created for all possible gene patterns.\n")
}
return(x)
}
# Internal function for hypotheses expansion
# @title hypotheses.expansion
# @param input_matrix A TRONCO adjacency matrix
# @param map hypothesis name - hypothesis adjacency matrix map
# @param expand Should I expand the hypotheses?
# @param skip.disconnected Hide disconnected node
#
hypotheses.expansion <- function(input_matrix,
map = list(),
expand = TRUE,
skip.disconnected = TRUE
) {
## Get node list.
node_list = colnames(input_matrix)
## Cut input matrix.
num_hypos = 0
if (length(map) > 0) {
num_hypos =
Reduce(sum,
lapply(ls(map),
function(x, y) {
if (x %in% y)
return(1)
},
y = node_list))
}
margin = length(node_list) - num_hypos
hypos_new_name = list()
## Check if there are hypotheses.
if (num_hypos == 0 || !expand) {
## If no hypos do nothing...
min_matrix = input_matrix
} else {
## ..else expand them.
min_matrix =
input_matrix[-(margin + 1):-length(node_list),
-(margin + 1):-length(node_list)]
## Create graph from matrix.
min_graph = graph.adjacency(min_matrix)
for (h in ls(map)) {
if (! h %in% node_list) {
next
}
hypo = map[[h]]
## Create graph from hypo.
hypo_graph = graph.adjacency(hypo)
## Name of this node.
h_mat <- rowSums(get.adjacency(hypo_graph, sparse=FALSE))
initial_node <- names(h_mat)[which(h_mat == 0)]
hypos_new_name[initial_node] = h
## Change names in confidence matrix according to
## hypotesis.
display.up = FALSE
if (length(which(input_matrix[, h] == 1)) != 0) {
display.up = TRUE
}
display.down = FALSE
if (length(which(input_matrix[h, ] == 1)) != 0) {
display.down = TRUE
}
## Display up hypo and reconnect.
if (display.up) {
hypo_pre = t(hypo)
node_names = rownames(hypo_pre)
node_names =
lapply(node_names,
function(x) {
if (is.logic.node(x)) {
paste0('UP', x)
} else {
return(x)
}
})
rownames(hypo_pre) = node_names
colnames(hypo_pre) = node_names
## Create graph from hypo.
hypo_graph_pre = graph.adjacency(hypo_pre)
## Name of this node.
h_mat_pre = colSums(get.adjacency(hypo_graph_pre, sparse = FALSE))
final_node = names(h_mat_pre)[which(h_mat == 0)]
hypos_new_name[final_node] = h
## Edge to reconstruct.
h_edge = input_matrix[, h]
initial_node_up = names(h_edge)[which(h_edge == 1)]
### Expand any hipothesis within final nodes.
#
# if (length(map) > 0) {
# initial_node_up_hyp = which(initial_node_up%in%names(map))
# if (length(initial_node_up_hyp) > 0) {
# final_node_hyp_expanded = colnames(map[[initial_node_up[initial_node_up_hyp]]])[1:(length(colnames(map[[initial_node_up[initial_node_up_hyp]]]))-1)]
# initial_node_up = initial_node_up[-initial_node_up_hyp]
# initial_node_up = c(initial_node_up, final_node_hyp_expanded)
# }
# }
## Add this graph to main graph.
min_graph = graph.union(min_graph, hypo_graph_pre)
## Recreate lost edge.
for (node in initial_node_up) {
min_graph = min_graph + edge(node, final_node)
}
}
## Display down hypo.
if (display.down) {
## Edge to reconstruct.
h_edge <- input_matrix[h,]
final_node <- names(h_edge)[which(h_edge == 1)]
### Expand any hipothesis within final nodes.
#
# if (length(map) > 0) {
# final_node_hyp = which(final_node%in%names(map))
# if (length(final_node_hyp) > 0) {
# final_node_hyp_expanded = colnames(map[[final_node[final_node_hyp]]])[1:(length(colnames(map[[final_node[final_node_hyp]]]))-1)]
# final_node = final_node[-final_node_hyp]
# final_node = c(final_node, final_node_hyp_expanded)
# }
# }
## Add this graph to main graph.
min_graph = graph.union(min_graph, hypo_graph)
## Recreate lost edge.
for (node in final_node) {
min_graph = min_graph + edge(initial_node, node)
}
}
}
min_matrix = get.adjacency(min_graph, sparse = FALSE)
}
## Sort col and row (igraph wants the same order).
min_matrix = min_matrix[,order(colnames(min_matrix))]
min_matrix = min_matrix[order(rownames(min_matrix)),]
return(list(min_matrix, hypos_new_name))
}
# Utility function to add the hypotheses
aux.log <- function( genotypes, annotations, function.name, ... ) {
hypotheses.env = get('hypotheses.env', asNamespace(getPackageName()))
if (!is.null(genotypes)
&& !is.null(annotations)
&& length(list(...)) > 0) {
clauses = list(...)
curr_genotypes = array(0, c(nrow(genotypes), length(clauses)))
hypotheses = list()
function.inputs = list()
fisher.tests = vector()
for (i in 1:length(clauses)) {
## If the clause is given by name, get the column from the
## genotypes.
if (typeof(clauses[[i]]) == "character") {
col.num = -1
## If I have the label, get the column in the
## genotypes for this event.
if (length(clauses[[i]]) == 1) {
event.map = emap(c(clauses[[i]],"*"), genotypes, annotations)
col.num = event.map$col.num
}
else if (length(clauses[[i]]) == 2) {
event.map = emap(clauses[[i]], genotypes, annotations)
col.num = event.map$col.num
}
if (col.num[1] == -1) {
stop(paste("[ERR] No events for gene ",
paste(clauses[[i]], collapse = ', ', sep = '')))
}
else {
curr_genotypes[, i] = genotypes[, col.num[1]]
if (length(col.num) > 1) {
curr_genotypes =
cbind(curr_genotypes,
genotypes[ , col.num[2:length(col.num)]])
}
if (length(hypotheses$llist) == 0) {
hypotheses$llist = list(event.map$events.name)
}
else {
hypotheses$llist = list(c(unlist(hypotheses$llist), event.map$events.name))
}
for (j in 1:length(event.map$events.name)) {
function.name =
paste(function.name, "_",
event.map$events.name[j],
sep = "")
function.inputs = c(function.inputs, event.map$events.name[j])
}
}
} else {
## Otherwise I already have the column as a vector.
curr_genotypes[,i] = clauses[[i]]$pattern
## if it is a list
if (length(hypotheses$llist) == 0) {
hypotheses$llist = clauses[[i]]$hypotheses$llist
} else {
hypotheses$llist = list(c(unlist(clauses[[i]]$hypotheses$llist),unlist(hypotheses$llist)))
}
function.name = paste(function.name, "_", clauses[[i]]$function.name, sep="")
function.inputs = c(function.inputs, clauses[[i]]$function.name)
}
}
result =
list(curr_genotypes = curr_genotypes,
hypotheses = hypotheses,
function.name = function.name,
function.inputs = function.inputs,
tests = pairwise.fisher.test(curr_genotypes))
## Save the new edges
for (k in 1:length(result$function.inputs)) {
lifting.edges.temp =
rbind(get('lifting.edges', envir = hypotheses.env),
c(result$function.inputs[[k]], result$function.name))
assign("lifting.edges", lifting.edges.temp, envir = hypotheses.env)
}
return(result)
} else {
stop("[ERR] Either the genotypes or the pattern not provided! No hypothesis will be created.")
}
return(NA)
}
#' AND hypothesis
#' @title AND
#' @param ... Atoms of the co-occurance pattern given either as labels or as partielly lifted vectors.
#' @return Vector to be added to the lifted genotype resolving the co-occurance pattern
#' @export AND
#
AND <- function( ... ) {
## Look for the variables named lifting.genotypes and
## lifting.annotations.
hypotheses.env = get('hypotheses.env', asNamespace(getPackageName()))
genotypes = get('lifting.genotypes', envir = hypotheses.env)
annotations = get('lifting.annotations', envir = hypotheses.env)
fisher.pvalues.temp = get('fisher.pvalues', envir = hypotheses.env)
if (!is.null(genotypes)
&& !is.null(annotations)
&& length(list(...)) > 0) {
## Get the vector of the clauses of the pattern from the
## genotypes.
result = aux.log(genotypes,annotations, "AND" ,...)
curr_genotypes = result$curr_genotypes
hypotheses = result$hypotheses
function.name = result$function.name
function.inputs = result$function.inputs
## Save the fisher exact tests.
if (length(result$tests) > 0) {
for (i in 1:length(result$tests)) {
curr.test = result$tests[[i]]
odds.ratio = curr.test[2]
if (curr.test[2] <= 0) {
curr.pvalue = 1
} else {
curr.pvalue = curr.test[1]
}
fisher.pvalues.temp = append(fisher.pvalues.temp, curr.pvalue)
}
assign("fisher.pvalues", fisher.pvalues.temp, envir = hypotheses.env)
}
## Evaluate the AND operator.
pattern = rep(0, nrow(genotypes))
for (i in 1:nrow(genotypes)) {
pattern[i] = sum(curr_genotypes[i, ])
if (pattern[i]<ncol(curr_genotypes)) {
pattern[i] = 0
} else {
pattern[i] = 1
}
}
pattern = as.integer(pattern)
result =
list(pattern = pattern,
hypotheses = hypotheses,
function.name = function.name,
function.inputs = function.inputs,
fisher.pvalues = fisher.pvalues.temp)
return(result)
} else {
stop("[ERR] Either the genotypes or the pattern not provided! No hypothesis will be created.");
}
return(NA)
}
#' OR hypothesis
#' @title OR
#' @param ... Atoms of the soft exclusive pattern given either as labels or as partielly lifted vectors.
#' @return Vector to be added to the lifted genotype resolving the soft exclusive pattern
#' @export OR
#
OR <- function( ... ) {
## Look for the variables named lifting.genotypes and
## lifting.annotations.
hypotheses.env = get('hypotheses.env', asNamespace(getPackageName()))
genotypes = get('lifting.genotypes', envir = hypotheses.env)
annotations = get('lifting.annotations', envir = hypotheses.env)
fisher.pvalues.temp = get('fisher.pvalues', envir = hypotheses.env)
if (!is.null(genotypes)
&& !is.null(annotations)
&& length(list(...)) > 0) {
## Get the vector of the clauses of the pattern from the
## genotypes.
result = aux.log(genotypes,annotations, "OR", ...)
curr_genotypes = result$curr_genotypes
hypotheses = result$hypotheses
function.name = result$function.name
function.inputs = result$function.inputs
## Save the fisher exact tests.
if (length(result$tests) > 0) {
for (i in 1:length(result$tests)) {
curr.test = result$tests[[i]]
curr.p.value = 1 - curr.test[1]
fisher.pvalues.temp = append(fisher.pvalues.temp, curr.p.value)
}
assign("fisher.pvalues", fisher.pvalues.temp, envir = hypotheses.env)
}
## Evaluate the OR operator.
pattern = rep(0, nrow(genotypes))
for (i in 1:nrow(genotypes)) {
pattern[i] = sum(curr_genotypes[i, ])
if (pattern[i] > 1) {
pattern[i] = 1
}
}
pattern = as.integer(pattern)
result =
list(pattern = pattern,
hypotheses = hypotheses,
function.name = function.name,
function.inputs = function.inputs,
fisher.pvalues = fisher.pvalues.temp)
return(result)
} else {
stop("[ERR] Either the genotypes or the pattern not provided! No hypothesis will be created.");
}
return(NA)
}
#' XOR hypothesis
#' @title XOR
#' @param ... Atoms of the hard exclusive pattern given either as labels or as partielly lifted vectors.
#' @return Vector to be added to the lifted genotype resolving the hard exclusive pattern
#' @export XOR
#'
XOR <- function( ... ) {
## Look for the variables named lifting.genotypes and
## lifting.annotations.
hypotheses.env = get('hypotheses.env', asNamespace(getPackageName()))
genotypes = get('lifting.genotypes', envir = hypotheses.env)
annotations = get('lifting.annotations', envir = hypotheses.env)
fisher.pvalues.temp = get('fisher.pvalues', envir = hypotheses.env)
if (!is.null(genotypes)
&& !is.null(annotations)
&& length(list(...)) > 0) {
## Get the vector of the clauses of the pattern from the
## genotypes.
result = aux.log(genotypes, annotations, "XOR", ...)
curr_genotypes = result$curr_genotypes
hypotheses = result$hypotheses
function.name = result$function.name
function.inputs = result$function.inputs
## Save the fisher exact tests.
if (length(result$tests)>0) {
for (i in 1:length(result$tests)) {
curr.test = result$tests[[i]]
odds.ratio = curr.test[2]
if (curr.test[2] >= 0) {
curr.pvalue = 1
}
else {
curr.pvalue = curr.test[1]
}
fisher.pvalues.temp = append(fisher.pvalues.temp, curr.pvalue)
}
assign("fisher.pvalues", fisher.pvalues.temp, envir = hypotheses.env)
}
## Evaluate the XOR operator.
pattern = rep(0,nrow(genotypes))
for (i in 1:nrow(genotypes)) {
pattern[i] = sum(curr_genotypes[i,])
if (pattern[i]>1) {
pattern[i] = 0
}
}
pattern = as.integer(pattern)
result =
list(pattern = pattern,
hypotheses = hypotheses,
function.name = function.name,
function.inputs = function.inputs,
fisher.pvalues = fisher.pvalues.temp)
return(result)
} else {
stop("[ERR] Either the genotypes or the pattern not provided! No hypothesis will be created.");
}
return(NA);
}
### Return the position in the genotypes of the event referring to the
### given label.event.
emap <- function(label.event, genotypes, annotations) {
col.num = -1;
events.name = "";
if (!is.null(genotypes) && !is.null(annotations)) {
if (label.event[2] != "*") {
curr.events = which(annotations[, "event"] == label.event[1] &
annotations[, "type"] == label.event[2]);
} else {
curr.events = which(annotations[, "event"] == label.event[1]);
}
if (length(curr.events) > 0) {
events.name = names(curr.events);
col.num = which(colnames(genotypes) %in% events.name);
}
} else {
stop("[ERR] A genotypes must be available in order to define any hypothesis!",call.=FALSE);
}
results = list(col.num = col.num, events.name = events.name);
return(results);
}
### Return the adjacency matrix of the pattern given the list of edges
### involving it.
get.lifted.pattern <- function(lifted.edges) {
## Structure to save the adjacency matrix.
lifted.adj.matrix =
array(0,
c(length(unique(c(lifted.edges[,1],lifted.edges[,2]))),
length(unique(c(lifted.edges[,1],lifted.edges[,2])))));
rownames(lifted.adj.matrix) = unique(c(lifted.edges[,1],lifted.edges[,2]));
colnames(lifted.adj.matrix) = rownames(lifted.adj.matrix);
## Build the matrix given the lifted.edges.
for (i in 1:nrow(lifted.edges)) {
lifted.adj.matrix[lifted.edges[i, 1], lifted.edges[i, 2]] = 1;
}
return(lifted.adj.matrix);
}
### given the hypotheses and the adj.matrix, return the updated
### adj.matrix
hypothesis.adj.matrix <- function(hypotheses, adj.matrix) {
if (!is.na(hypotheses[1])) {
## Set the invalid entries in the adj.matrix hypotheses can
## not be causing other hypotheses.
adj.matrix[(ncol(adj.matrix) - hypotheses$num.hypotheses + 1):ncol(adj.matrix),
(ncol(adj.matrix) - hypotheses$num.hypotheses + 1):ncol(adj.matrix)] = 0;
## Consider the given hypotheses only against the specified possible effects.
adj.matrix[(ncol(adj.matrix) - hypotheses$num.hypotheses + 1):ncol(adj.matrix),
1:(ncol(adj.matrix) - hypotheses$num.hypotheses)] = 0
adj.matrix[1:(ncol(adj.matrix) - hypotheses$num.hypotheses),
(ncol(adj.matrix) - hypotheses$num.hypotheses + 1):ncol(adj.matrix)] = 0
## Set the elements from the hlist.
for (i in 1:nrow(hypotheses$hlist)) {
cause = which(colnames(adj.matrix) %in% hypotheses$hlist[i, "cause"]);
effect = which(colnames(adj.matrix) %in% hypotheses$hlist[i, "effect"]);
if (length(cause)>0 && length(effect)>0) {
adj.matrix[cause,effect] = 1;
}
}
}
return(adj.matrix);
}
### performs pairwise exact fisher test.
pairwise.fisher.test <- function(data) {
## Structure to save the results.
results = vector();
if (ncol(data) > 1) {
for (i in 1:ncol(data)) {
for (j in i:ncol(data)) {
if (i != j) {
df = data[ , c(i, j)]
df_x = data[ , 1]
df_y = data[ , 2]
## Lifting xor.
df_xor = df_x + df_y
df_xor[df_xor > 1] = 0
## Lifting or.
df_or = df_x + df_y
df_or[df_or > 1] = 1
## Lifting and.
df_and = df_x + df_y
df_and[ df_and < 2] = 0
df_and[ df_and == 2] = 1
## 2x2 contingency table.
table.xor =
rbind(c(nrow(df) - sum(df_or), sum(df_or - df_y)),
c(sum(df_or - df_x), sum(df_and))
)
## Fisher 2-sided.
test = fisher.test(table.xor)
## Save the results.
curr_result = c(test$p.value, log(test$estimate['odds ratio']))
results = append(results, list(curr_result))
}
}
}
}
return(results)
}
#### end of file -- capri.hypotheses.R
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.