#' Computing the importance of each prime implicant
#'
#' A helper function for computing the importance of each prime implicant in a forest
#' @param input A dataset matrix containing all gene expression. Each value corresponds to a gene expression of an input gene at a certain time point t.
#' @param resp A vector containing the gene expression of the target gene.
#' @param forest A set of And/Or trees, each tree is a list
#'
#' @import BoolNet
#'
#' @return a vector includes the importances, the order of the importances corresponds to the order of prime implicants in the forest
#'
#' @seealso \code{\link{generateRandomNKNetwork}}
#' @examples
#' ##Generate a random Boolean network
#' net1<-generateRandomNKNetwork(n=10, k=5, topology="scale_free",linkage="uniform",simplify=TRUE,readableFunctions=TRUE)
#'
#' ##Build the time-series data
#' datalist<-buildTimeSeries(net1,2,5,0)
#'
#' ##Select the first node as target gene and find 5 putative predictive And/Or trees using very simple parameters
#' datalist[[3]]<-datalist[[3]][,1]
#' forest<-lapply(rep(1,5),function(x){saalg(datalist,8,NULL,-2,-1,200)})
#'
#' ##To see all prime implicants in the forest
#' PIs<-unique(unlist(forest,recursive = F))
#' Importances<-calImportance(datalist[[2]],datalist[[3]],forest)
#' print(PIs)
#' print(Importances)
#'
#' @references Müssel, Christoph, Martin Hopfensitz, and Hans A. Kestler. "BoolNet—an R package for generation, reconstruction and analysis of Boolean networks." Bioinformatics 26.10 (2010): 1378-1380.
#' @export
#'
calImportance<-function(input,resp,forest){
nodes<-ncol(input)
PrimeImp<-unique(unlist(forest,recursive = F))
nPrime<-length(PrimeImp)
Importances<-rep(0,nPrime)
ntree<-length(forest)
if(nPrime==1)
return(Importances)
for(i in 1:nPrime){
pi<-PrimeImp[[i]]
temp_vi<-0
for(ii in 1:length(forest)){
tree<-forest[[ii]]
temp_tree<-tree
ltree<-length(tree)
oriMSE<-evolTree(tree,input,nodes)
oriMSE<-length(which(resp!=oriMSE))/length(resp)
for(iii in 1:ltree){
if(identical(tree[[iii]],pi)){
flag<-TRUE
tree[[iii]]<-NULL
newMSE<-evolTree(tree,input,nodes)
newMSE<-length(which(resp!=newMSE))/length(resp)
eachvi<-newMSE-oriMSE
break
}
else
flag<-FALSE
}
if(flag==FALSE){
tree[[ltree+1]]<-pi
newMSE<-evolTree(tree,input,nodes)
newMSE<-length(which(resp!=newMSE))/length(resp)
eachvi<-oriMSE-newMSE
}
temp_vi<-temp_vi+eachvi
}
Importances[i]<-temp_vi/ntree
}
return(Importances)
}
#' Replace the names of new prime implicants with old prime implicants
#'
#' A helper function for replacing the names of new prime implicants (pis) and old prime implicants
#' @param newnames A string representing new names of pis
#' @param oldnames A string representing old names of pis
#' @param allnodes The number of nodes in the Boolean network
#'
#' @return the correct names of pis
#' @examples
#' ##old names
#' pis<-list(3,c(2,4),13)
#' oldnames<-sapply(pis,function(x){paste0(sort(x),collapse = "&")},simplify="array")
#' print(oldnames)
#'
#' ##new names "1&3" is expected as a combined string made up by "3&13", and assuming 10 nodes in total, then
#' newnames<-c("1&3")
#' print(newnames)#'
#' replaceName(newnames,oldnames,10)
#'
#' newnames<-c("2&4")
#' ##expect to be "2&4&13"
#' replaceName(newnames,oldnames,10)
#' @export
replaceName<-function(newnames,oldnames,allnodes){
nnodes<-length(oldnames)
for(i in 1:length(newnames)){
temp<-as.numeric(unique(unlist(strsplit(newnames[i],"&"))))
f_temp<-temp
for(ii in 1:length(temp)){
if(temp[ii]<=nnodes)
f_temp[ii]<-oldnames[temp[[ii]]]
else{
all_wait_replace<-as.numeric(unlist(strsplit(oldnames[temp[[ii]]-nnodes],"&")))
v_delete<-all_wait_replace[all_wait_replace>allnodes]-allnodes
v_add<-all_wait_replace[all_wait_replace<=allnodes]+allnodes
all_wait_replace<-c(v_delete,v_add)
all_wait_replace<-unique(all_wait_replace)
f_temp[ii]<-paste0(all_wait_replace,collapse = "&")
}
}
newnames[i]<-paste0(unique(unlist(strsplit(f_temp,"&"))),collapse = "&")
}
return(newnames)
}
#' Find the important prime implicants
#'
#' Given a time-series data, try to find the important prime implicants that are predictive for the gene expression level of a target gene
#' @param B The number of And/Or trees that used for generating a set of new prime implicants
#' @param datalist A data list generated by \link{buildTimeSeries} or \link{bootstrap}
#' @param datasamples The number of nodes in the Boolean network
#' @param parameters A vector includes 6 parameters; startT, endT and maxIter refer to the upper, lower temperature (on a log10 scale) and the maximum number of iterations used in simulated annealing algorithm
#' maxK represents the maximum number of input nodes of the target node (the maximum number of leaves in an And/Or tree), it is required when prior knowledge, e.g. the in-degree is 8, is available. If such information is not known, then it can be set as a very large value, e.g. '.Machine$integer.max'
#' rate represents how many non-important PIs are removed in each recursion.
#' nodes represents the number of node in the Boolean network
#' @param seed This seed is made for parLapply to reproduce the results
#'
#' @import BoolNet
#'
#' @seealso \code{\link{generateRandomNKNetwork}}
#'
#' @return A set of important prime implicants. Or if only a few prime implicants are found, then a final Boolean function will be generated afterwards.
#' @examples
#' ## ngenes is the number of nodes in a Boolean network
#' library(BoolNet)
#' ngenes<-10
#' ## k is the maximum number of genes
#' k<-5
#' ## call generateRandomNKNetwork to generate a Boolean network
#' net1<-generateRandomNKNetwork(ngenes, k, topology="scale_free",simplify=TRUE,readableFunctions=TRUE)
#'
#' ## build the time-series data
#' datalist<-buildTimeSeries(network=net1,numSeries=10,numPoints=10,noiseLevel=0)
#'
#' ## select a target gene
#' target<-3
#' ## Generate the bootstrap samples and oob samples according to the time-series data
#' datasamples<-bootstrap(datalist)
#' ## respinbag and respoutbag save the expression values of the target node
#' datasamples$respinbag<-matrix(datasamples$respinbag[,target])
#' datasamples$respoutbag<-matrix(datasamples$respoutbag[,target])
#'
#' ## Initilize the parameters
#' parameters<-c(startT=2,endT=-1,maxIter=5000,maxK=8,rate=0.2,nodes=ngenes)
#'
#' ## Try to find the prime implicants
#' PIs<-findPIs(B=10,datalist,datasamples,parameters,123)
#'
#'
#' @export
#'
findPIs<-function(B,datalist,datasamples,parameters,seed){
nnodes<-ncol(datalist[[1]])
no_cores <- parallel::detectCores() - 1
cl <- parallel::makeCluster(no_cores)
parallel::clusterSetRNGStream(cl = cl, seed)
parallel::clusterExport(cl,ls(environment()),envir=environment())
forest<-parallel::parLapply(cl, 1:B,function(x){
tree<-saalg(datasamples,parameters[4],NULL,parameters[1],parameters[2],parameters[3])
return(tree)})
parallel::stopCluster(cl)
rm(cl)
Importances<-calImportance(datasamples$outbag,datasamples$respoutbag,forest)
orders<-order(Importances,decreasing = T)
Importances<-Importances[orders]
PIs<-unique(unlist(forest,recursive = F))[orders]
Importances<-Importances[Importances>0]
PIs<-PIs[1:length(Importances)]
nameOfpis<-sapply(PIs,function(x){paste0(sort(x),collapse = "&")},simplify="array")
if(length(PIs)==1){
cat("only 1 prime implicant was found \n")
cat("the final Boolean function of node", target, "is returned\n")
rs<-changeName(nameOfpis,nnodes)
return(rs)
}
tl<-1:(ceiling(length(Importances)*(1-parameters[5])))
Importances<-Importances[tl]
PIs<-PIs[tl]
nameOfpis<-nameOfpis[tl]
if(Importances[1]>sum(Importances)*0.95){
new_nameOfpis<-paste0(PIs[[1]],collapse = "&")
#new_nameOfpis<-replaceName(new_nameOfpis,nameOfpis,nnodes)
rs<-sapply(new_nameOfpis,changeName,nnodes)
rs<-paste0(rs,collapse = " || ")
cat("the final Boolean function of node", target, "is returned \n")
return(rs)
}
if(length(PIs)<=4){
datalist[[2]]<-generateData(PIs,datalist)
datalist[[3]]<-matrix(datalist[[3]][,target])
tree<-saalg2(datalist,parameters[4],NULL,parameters[1],parameters[2],parameters[3],PIs,parameters[6])
PIs<-unique(unlist(tree,recursive = F))
new_nameOfpis<-sapply(PIs,function(x){paste0(x,collapse = "&")},simplify="array")
new_nameOfpis<-replaceName(new_nameOfpis,nameOfpis,nnodes)
PIs<-lapply(new_nameOfpis,function(x) as.integer(unlist(strsplit(x,split = "&"))))
PIs<-minimization(PIs,nnodes)
if(length(unlist(PIs))==1){
if(as.numeric(unlist(PIs))==1){
cat("This node is probably a self-controlled node")
name<-changeName(new_nameOfpis,nnodes)
return(name)
}
}
#if(length(new_nameOfpis)==2){
# if(abs(as.numeric(PIs[[1]])-as.numeric(PIs[[2]]))==nnodes)
# cat("the node ", target, "might be a self-controlled or an isolated node\n")
#}
new_nameOfpis<-sapply(PIs,function(x){paste0(x,collapse = "&")},simplify="array")
rs<-sapply(new_nameOfpis,changeName,nnodes)
rs<-paste0(rs,collapse = " || ")
cat("the final Boolean function of node", target, "is returned \n")
return(rs)
}
return(PIs)
}
#' RFRE and Boolean function reconstruction
#'
#' A recursive feature reconstruction and elimination (RFRE) procedure for selecting a set of important prime implicants as features. Then final solution (i.e. Boolean function) is generated.
#' @param B The number of And/Or trees that used for generating a set of prime
#' @param PIs A list of prime implicants
#' @param target The integer indicating which node is the target node
#' @param parameters A vector includes 6 parameters; startT, endT and maxIter refer to the upper, lower temperature (on a log10 scale) and the maximum number of iterations used in simulated annealing algorithm
#' maxK represents the maximum number of input nodes of the target node (the size of the tree), it is required when prior knowledge, e.g. the in-degree is 8, is available. If such information is not known, then it can be set as a very large value, e.g. '.Machine$integer.max'
#' rate represents how many non-important PIs are removed in each recursion.
#' nodes represents the number of node in the Boolean network
#' @param datalist A data list generated by \link{buildTimeSeries} or \link{bootstrap}
#' @param datasamples The number of nodes in the Boolean network
#' @param seed This seed is made for parLapply to reproduce the results
#'
#' @import BoolNet
#'
#' @seealso \code{\link{findPIs}}
#'
#' @return A final And/Or tree (i.e., a Boolean function)
#' @examples
#' ## After we get a set of prime implicants, we use this function to generate the final Boolean function
#' ## choose the same target gene
#' target<-3
#' datalist[[2]]<-generateData(PIs,datalist)
#' datalist[[3]]<-matrix(datalist[[3]][,target])
#' datasamples<-bootstrap(datalist)
#' datasamples$respinbag<-matrix(datasamples$respinbag)
#' datasamples$respoutbag<-matrix(datasamples$respoutbag)
#'
#' parameters<-c(startT=2,endT=-1,maxIter=2000,maxK=8,rate=0.2,nodes=10)
#'
#' ## Identify the final Boolean function with 5 trees in the forest
#' findBF(5,PIs,target,parameters,datalist,datasamples,123)
#'
#' @export
#'
findBF<-function(B,PIs,target,parameters,datalist,datasamples,seed){
nnodes<-ncol(datalist[[1]])
nameOfpis<-sapply(PIs,function(x){paste0(sort(x),collapse = "&")},simplify="array")
count<-1
repeat{
no_cores <- parallel::detectCores() - 1
cl <- parallel::makeCluster(no_cores)
parallel::clusterSetRNGStream(cl = cl, seed)
parallel::clusterExport(cl,ls(environment()),envir=environment())
forest<-parallel::parLapply(cl, 1:B,function(x){
tree<-saalg2(datasamples,parameters[4],NULL,parameters[1],parameters[2],parameters[3],PIs,parameters[6])
return(tree)
})
parallel::stopCluster(cl)
rm(cl)
Importances<-calImportance(datasamples$outbag,datasamples$respoutbag,forest)
orders<-order(Importances,decreasing = T)
Importances<-Importances[orders]
PIs<-unique(unlist(forest,recursive = F))
PIs<-PIs[orders]
Importances<-Importances[Importances>0]
PIs<-PIs[1:length(Importances)]
PIs<-PIs[1:(length(PIs)*(1-parameters[5]))]
PIs[sapply(PIs, is.null)] <- NULL#
if(length(PIs)==1){
new_nameOfpis<-sapply(PIs,function(x){paste0(x,collapse = "&")},simplify="array")
new_nameOfpis<-replaceName(new_nameOfpis,nameOfpis,nnodes)
nameOfpis<-new_nameOfpis
cat("the final Boolean function of node", target, "is returned\n")
rs<-changeName(nameOfpis,nnodes)
return(rs)
}
if(all(sapply(PIs,length)==1)){
new_nameOfpis<-sapply(PIs,function(x){paste0(x,collapse = "&")},simplify="array")
new_nameOfpis<-replaceName(new_nameOfpis,nameOfpis,nnodes)
nameOfpis<-new_nameOfpis
#datalist[[2]]<-datalist[[2]][,unlist(PIs)]
datalist[[2]]<-generateData(PIs,datalist)#
break
}
PIs<-minimization(PIs,length(nameOfpis))
new_nameOfpis<-sapply(PIs,function(x){paste0(x,collapse = "&")},simplify="array")
new_nameOfpis<-replaceName(new_nameOfpis,nameOfpis,nnodes)
datalist[[2]]<-generateData(PIs,datalist)
datasamples<-bootstrap(datalist)
datasamples$respinbag<-matrix(datasamples$respinbag)
datasamples$respoutbag<-matrix(datasamples$respoutbag)
if(length(setdiff(new_nameOfpis,nameOfpis))==0)#identical(sort(new_nameOfpis),sort(nameOfpis)
break
nameOfpis<-new_nameOfpis
if(count==10)
break
}
tree<-saalg2(datalist,parameters[4],NULL,parameters[1],parameters[2],parameters[3],PIs,parameters[6])
tree<-minimization(tree,ncol(datalist[[2]]))
PIs<-unique(unlist(tree,recursive = F))
PIs<-PIs[!is.na(PIs)]
new_nameOfpis<-sapply(PIs,function(x){paste0(x,collapse = "&")},simplify="array")
new_nameOfpis<-replaceName(new_nameOfpis,nameOfpis,nnodes)
PIs<-lapply(new_nameOfpis,function(x) as.integer(unlist(strsplit(x,split = "&"))))
PIs<-minimization(PIs,nnodes)
new_nameOfpis<-sapply(PIs,function(x){paste0(x,collapse = "&")},simplify="array")
rs<-sapply(new_nameOfpis,changeName,ngenes=nnodes)
rs<-paste0(rs,collapse = " || ")
cat("the final Boolean function of node", target, "is returned \n")
return(rs)
}
#' Tree growth
#'
#' A helper function for running tree-growth based on simulated annealling algorithm
#' @param data A data list generated by \link{buildTimeSeries} or \link{bootstrap}
#' @param maxK The size of the tree, or the maximum number of input genes; By defalt, maxK=8
#' @param initT the upper temperature (on a log10 scale)
#' @param endT the lower temperature (on a log10 scale)
#' @param iter the maximum number of iterations used in simulated annealing algorithm
#' @param pis A list of prime implicants
#' @param allnodes The number of genes in the Boolean network
#'
#' @return An And/Or tree
#' @export
#'
saalg2<-function(data,maxK,tree,initT,endT,iter,pis,allnodes){
if(length(data)==4){
input<-data$inbag
resp<-data$respinbag
}
else{
input<-data[[2]]
resp<-data[[3]]
}
currentnodes<-ncol(input)
if(missing(maxK)){
maxK<-8
}
if(missing(tree)||is.null(tree)||length(tree) == 0){
tree<-list(sample(1:(currentnodes*2),1))
}
T<-10^initT
count<-1
oldtree<-tree
repeat{
count<-count+1
newtree<-growtree2(oldtree,pis,maxK,currentnodes,allnodes)
prob<-min(1,exp(mse(input,resp,oldtree,newtree,currentnodes)/T))
temp<-runif(1)
cse<-abs(initT)+abs(endT)
if(is.element(count,(iter/cse)*(1:cse))){
T<-T/10
}
if(T<=10^endT){
break
}
if(temp<prob){
oldtree<-newtree
}
if(count==iter)
break
}
return(oldtree)
}
#' Tree growth based on PIs
#'
#' A helper function for running tree-growth while each leaf is a prime implicant
#' @param tree an And/Or tree
#' @param pis A list of prime implicants; Each prime implicant is a leaf
#' @param maxK The size of the tree, or the maximum number of input genes; By defalt, maxK=8
#' @param currentnodes The number of prime implicants that used for inferring a tree
#' @param allnodes The number of genes in the Boolean network
#' @param penalty TRUE or FALSE used to control the size of the tree
#' @param fast A argument used to indicate whether a sort is needed
#'
#' @return An And/Or tree
#' @export
#'
growtree2<-function(tree,pis,maxK,currentnodes,allnodes,penalty=FALSE,fast=TRUE){
if(missing(maxK))
maxK<-8
#currentnodes<-currentnodes*2 #Boolean variable and its negation
if(missing(tree)||is.null(tree)||length(tree)==0){
tree<-sample(currentnodes*2,1)
return(as.list(tree))
}
pos<-unlist(tree)[unlist(tree)<=currentnodes]
neg<-setdiff(unlist(tree),pos)-currentnodes
trueNodes<-unique(c(unlist(pis[pos]),unlist(pis[neg])))
if(length(trueNodes)>=maxK||length(tree)>=factorial(maxK)){
penalty<-TRUE
}
#if a pi fixed, making branch starting from 2
tree<-growor2(tree,pis,maxK,currentnodes,allnodes,penalty,fast)
return(tree)
}
#' Subtree (Or) growth based on PIs
#'
#' A helper function for running tree-growth while each leaf is a prime implicant
#' @param tree an And/Or tree
#' @param pis A list of prime implicants; Each prime implicant is a leaf
#' @param maxK The size of the tree, or the maximum number of input genes; By defalt, maxK=8
#' @param currentnodes The number of prime implicants that used for inferring a tree
#' @param allnodes The number of genes in the Boolean network
#' @param penalty TRUE or FALSE used to control the size of the tree
#' @param fast A argument used to indicate whether a sort is needed
#'
#' @return An And/Or tree
#' @export
#'
growor2<-function(tree,pis,maxK,currentnodes,allnodes,penalty=FALSE,fast=TRUE){
if(!is.list(tree))
tree<-list(tree)
pos<-unlist(tree)[unlist(tree)<=currentnodes]
neg<-setdiff(unlist(tree),pos)-currentnodes
trueNodes<-unique(c(unlist(pis[pos]),unlist(pis[neg])))
branch<-length(tree) #number of branch
if(length(trueNodes)>=maxK||branch>=factorial(maxK)){
penalty<-TRUE
}
PandN<-currentnodes*2
temp<-runif(1)
pselect<-selection()
if(penalty){
if(temp<0.5)
temp<-2
}
if((temp<pselect[1]||temp==2)){
if(branch>1){
tree[[sample(1:branch,1)]]<-NULL
return(tree)
}
else
temp<-pselect[4]*runif(1)+pselect[1]
}
if((temp<pselect[2]||is.null(tree))&&!penalty){
if(is.null(tree))
return(list(sample(PandN,1)))
else
tree<-c(tree,sample(PandN,1))
}
else{
nbranch<-length(tree)
nsub<-sample(1:branch,1)
if(temp<pselect[3])
dec<-1
else if(temp<pselect[4])
dec<-2
else{
dec<-3
}
if(branch==1)
dec<-sample(c(1,3),1)
subtree<-growand2(tree,tree[[nsub]],pis,(maxK - length(trueNodes)),currentnodes,allnodes,penalty,dec)#replace by a new branch, limit the size as well
if(is.list(subtree))
return(subtree)
if(is.null(subtree)){
if(length(tree)!=1)
tree[[nsub]]<-NULL
else
return(tree)
return(list(sample(PandN,1)))
#return(tree)
}
tree[[nsub]]<-unique(unlist(subtree))
}
if(length(tree)!=0&&fast==FALSE)
tree<-reorderTree(tree)
return(tree)
}
#' Subtree (And) growth based on PIs
#'
#' A helper function for running tree-growth while each leaf is a prime implicant
#'
#' A helper function for running tree-growth while each leaf is a prime implicant
#' @param tree An And/Or tree
#' @param subtree A subtree, i.e. a prime implicant
#' @param pis A list of prime implicants; Each prime implicant is a leaf
#' @param maxK The size of the tree, or the maximum number of input genes; By defalt, maxK=8
#' @param currentnodes The number of prime implicants that used for inferring a tree
#' @param allnodes The number of genes in the Boolean network
#' @param penalty TRUE or FALSE used to control the size of the tree
#' @param fast An argument used to indicate whether a sort is needed
#' @param dec An argument used to control the tree growth
#'
#' @return An And/Or tree
#' @export
#'
growand2<-function(tree,subtree,pis,maxK,currentnodes,allnodes,penalty=FALSE,dec){
leftindexofpis<-unlist(subtree)
if(length(leftindexofpis)==1){
if(dec==3){
subtree<-sample(setdiff(1:length(pis),leftindexofpis),1)
return(subtree)
}
}
pos<-leftindexofpis[leftindexofpis<=currentnodes]
neg<-leftindexofpis[leftindexofpis>currentnodes]
inputnodes<-setdiff(1:(currentnodes*2),c(leftindexofpis,pos+currentnodes,neg-currentnodes))
nleaf<-length(subtree)#number of literals in the subtree
if(length(inputnodes)==0||nleaf>=currentnodes/2){
dec<-2
penalty<-TRUE
}
if(dec==1){
temp1<-sample(1:nleaf,1)
if(nleaf==1){
subtree<-sample(1:(2*currentnodes),1)
return(subtree)
}
flag<-0
repeat{
temp2<-sample(inputnodes,1)
if(temp2>currentnodes){
temp2<-temp2-currentnodes
flag<-1
}
leftindexofpis<-unlist(subtree[-temp1])
pos<-leftindexofpis[leftindexofpis<=currentnodes]
neg<-leftindexofpis[leftindexofpis>currentnodes]
truepis<-unique(unlist(pis[c(pos,neg)]))
newpis<-pis[[temp2]]
if(!conflict(newpis,truepis,allnodes))
break
inputnodes<-setdiff(inputnodes,temp2)
if(length(inputnodes)==0)#check
subtree<-growor2(tree,pis,maxK,currentnodes,allnodes)
if(is.list(subtree))
return(subtree)
}
subtree<-subtree[-temp1]
if(flag==0)
subtree<-c(subtree,temp2)
if(flag==1)
subtree<-c(subtree,temp2+currentnodes)
subtree<-sort(unique(subtree))
}
else if(dec==2||penalty){
if(nleaf==1)
return(NULL)
subtree<-subtree[-sample(1:nleaf,1)]
subtree<-unique(sort(subtree))
}
else{
repeat{
flag<-0
temp1<-sample(inputnodes,1)
if(temp1>currentnodes){
temp1<-temp1-currentnodes
flag<-1
}
if(!conflict(pis[[temp1]],subtree,allnodes)){
break
}
if(flag==1)
inputnodes<-setdiff(inputnodes,temp1+currentnodes)
else
inputnodes<-setdiff(inputnodes,temp1)
if(length(inputnodes)==0)
subtree<-(growor2(tree,pis,maxK,currentnodes,allnodes))
if(is.list(subtree)||is.null(subtree))
return(subtree)
}
if(flag==0)
subtree<-c(subtree,temp1)
else
subtree<-c(subtree,temp1+currentnodes)
subtree<-unique(sort(subtree))
}
return(subtree)
}
#' Tautology elimination
#'
#' A helper function for avoiding tautology
#'
#' @param pi1 A prime implicant
#' @param pi2 Another prime implicant
#' @param allnodes The number of genes in the Boolean network
#'
#' @return TRUE/False
#' @export
#'
conflict<-function(pi1,pi2,allnodes){
if(length(intersect((pi1-allnodes),pi2))==0||length(intersect((pi2-allnodes),pi1))==0)
return(TRUE)
return(FALSE)
}
#' Generate data matrices
#'
#' A helper function for generating new data matrix in RFRE framework
#'
#' @param PIs A list of prime implicants
#' @param datalist A data list generated by \link{buildTimeSeries} or \link{bootstrap}.
#'
#' @return A list of data matrices containg the expression values of PIs
#' @export
#'
generateData<-function(PIs,datalist){
nodes<-ncol(datalist[[2]])
newData<-sapply(PIs,function(x,datalist,nodes){
x<-list(x)
return(evolTree(x,datalist[[2]],nodes))
},datalist=datalist,nodes=nodes)
return(newData)
}
#' Generate a list of data matrices
#'
#' Build the binary time series dataset matrix that used for network inference
#'
#' @param network A network structure generated by generateRandomNKNetwork() in BoolNet
#' @param numSeries The number of time series
#' @param numPoints The number of time points that is covered by each time series
#' @param noiseLevel The level of the nosie. e.g. 0.05 indiciates the value of a gene can be flipped with probability $5\%$
#' @param wildtype A vector containing the wildtype expression values. This vector is the initial state.
#'
#' @return A list of data matrices containg the expression values of each gene.
#' The row/column corresponds to time point/gene respectively.
#' The second element refers to the input data.
#' The third element refers to the respond data of the target gene.
#' @examples
#' ngenes<-10
#' ## k is the maximum number of genes
#' k<-5
#' ## call generateRandomNKNetwork to generate a Boolean network
#' net1<-generateRandomNKNetwork(ngenes, k, topology="scale_free",simplify=TRUE,readableFunctions=TRUE)
#' datalist<-buildTimeSeries(network=net1,numSeries=10,numPoints=10,noiseLevel=0)
#'
#' @export
#'
buildTimeSeries <- function(network,numSeries,numPoints,noiseLevel,wildtype){
if(missing(noiseLevel)){noiseLevel<-0}
nodes<-length(network$genes)
inputdata <- NULL
for (i in 1:numSeries){
if(missing(wildtype)){
startState <- round(runif(nodes))}
else{
startState <- wildtype}
res <- startState
for (j in 2:numPoints){
startState <- stateTransition(network,startState)
res <- rbind(res,startState)
}
inputdata <- rbind(inputdata,res)
}
if(noiseLevel != 0){
inputdata<-matrix(sapply(inputdata,function(c){
rand<-runif(1)
if(rand<=noiseLevel)
c<-abs(c-1)
else
c<-c
},
simplify=T),
nrow=numSeries*numPoints,byrow=F)
}
rownames(inputdata)<-rep(1:numPoints,numSeries)
colnames(inputdata)<-c(network$genes)
extract<-c(1:(numPoints-1))
newextract<-NULL
for(i in 1:(numSeries-1))
newextract<-c(newextract,extract+numPoints*i)
extract<-c(extract,newextract)
finalextract<-extract+1
Y<-inputdata[finalextract,]
X<-inputdata[extract,]
DataExp<-list(inputdata,X,Y)
return(DataExp)
}
#' Bagging
#'
#' Generate bootstrap samples for determining the importance of prime implicant.
#'
#' @param data the data list generated by \code{\link{buildTimeSeries}}
#'
#' @return The data list after bagging.
#' The first two elements are the in-bag data (input and respond).
#' The last two elements are the out-of-bag data (input and respond).
#'
#' @export
#'
bootstrap<-function(data){
if(is.list(data)){
input<-data[[2]]
resp<-data[[3]]
timepoints<-nrow(input)
selected<-sample(timepoints,timepoints,replace=TRUE)
inbag<-input[selected,]
respinbag<-resp[selected,]
oob<-setdiff(1:timepoints,selected)
outbag<-input[oob,]
respoutbag<-resp[oob,]
}
else{
warnings("Please invoke buildTimeSeries() function to generate a valid time-series data")
return(data)
}
rs<-list(inbag=inbag,respinbag=respinbag,outbag=outbag,respoutbag=respoutbag)
return(rs)
}
#' Reorder the tree
#'
#' A helper function for sorting the And/Or tree
#'
#' @param tree And/Or tree
#'
#' @return A sorted And/Or tree
#' @export
#'
reorderTree<-function(tree){
nbranch<-length(tree) #the number of branch
if(is.null(tree)) return(NULL)
if(length(tree)==1){
return(tree[order(tree,decreasing = F)])
}
tree<-lapply(tree,sort,decreasing = F)
lengthBranch<-unlist(lapply(tree,length))#length of each AND
orderBranch<-order(lengthBranch,decreasing=F)
lengthBranch<-sort(lengthBranch,decreasing=F)
if(length(unique(lengthBranch))==1){
tree<-bubblesort(tree)
return(tree)
}
if(!any(duplicated(lengthBranch))){
tree<-lapply(1:nbranch,function(x,newtree,oldtree,orderBr){
tree[[orderBr[x]]]
},newtree=tmpTree,oldtree=tree,orderBr=orderBranch)
return(tree)
}
else{
tree<-lapply(1:nbranch,function(x,newtree,oldtree,orderBr){
tree[[orderBr[x]]]
},newtree=tmpTree,oldtree=tree,orderBr=orderBranch)
replength<-lengthBranch[duplicated(lengthBranch) | duplicated(lengthBranch, fromLast=TRUE)]
difbranch<-unique(replength)
for(i in 1:length(difbranch)){
if(difbranch[i]==1){
samelengthbranch<-which(lengthBranch==difbranch[i])
tmpTree<-tree[samelengthbranch]
tmpTree<-sort(unlist(tmpTree),decreasing = F)
tmpTree<-lapply(1:length(samelengthbranch),function(x) return(tmpTree[x]))
tree[samelengthbranch]<-tmpTree
}
else{
samelengthbranch<-which(lengthBranch==difbranch[i])
tmpTree<-tree[samelengthbranch]
tmpTree<-bubblesort(tmpTree)
tree[samelengthbranch]<-tmpTree
}
}
}
return(tree)
}
#' Reorder the tree based on bubble sorting
#'
#' A helper function in terms of the length of the tree
#' @param tree And/Or tree
#'
#' @return A sorted And/Or tree
#'
#' @export
#'
bubblesort<-function(tree){
n<-length(tree)
for(i in 1:n){
for(j in 2:(n-i)){
tmp<-(unlist(tree[j]))/(unlist(tree[j-1]))
if(length(which(tmp<1))!=0){
temp = tree[j-1];
tree[j-1] = tree[j];
tree[j] = temp;
}
}
}
return(tree)
}
#' Subtree growth based on individual nodes/genes
#'
#' A helper function for running tree-growth while each leaf is a node/gene
#'
#' @param tree An And/Or tree
#' @param maxK The size of the tree, or the maximum number of input genes; By defalt, maxK=8
#' @param nodes The number of nodes in the Boolean network
#' @param penalty TRUE or FALSE used to control the size of the tree
#' @param fast An argument used to indicate whether a sort is needed
#'
#' @return an And/Or tree
#' @export
#'
growtree<-function(tree,maxK,nodes,penalty=FALSE,fast=TRUE){
if(missing(maxK))
maxK<-8
allnodes<-nodes*2 #Boolean variable and its negation
if(missing(tree)||is.null(tree)){
tree<-sample(allnodes,1)
return(as.list(tree))
}
if(length(unique(unlist(tree)))>=maxK||length(tree)>=factorial(maxK)){
penalty<-TRUE
}
tree<-growor(tree,nodes,penalty,fast)
return(tree)
}
#' Subtree (And) growth based on individual nodes/genes
#'
#' A helper function for running tree-growth while each leaf is a node/gene
#'
#' @param tree An And/Or tree
#' @param subtree A subtre, i.e., a prime implicant
#' @param nodes The number of nodes in the Boolean network
#' @param penalty TRUE or FALSE used to control the size of the tree
#' @param fast An argument used to indicate whether a sort is needed
#' @param dec An argument refers to a specific movement
#'
#' @return an And/Or tree
#' @export
#'
growand<-function(tree,subtree,nodes,penalty=FALSE,dec){
inputnodes<-setdiff(1:(nodes*2),subtree)
nleaf<-length(subtree)#number of literals
if(dec==1){
subtree<-replace(subtree,sample(1:nleaf,1),sample(inputnodes,1))
subtree<-sort(subtree)
}
else if(dec==2||penalty){
if(nleaf==1)
return(NULL)
subtree<-subtree[-sample(1:nleaf,1)]
subtree<-sort(subtree)
}
else{
diff<-subtree[subtree+nodes>2*nodes]-nodes
diff<-c(diff,subtree[subtree+nodes<=2*nodes]+nodes)
inputnodes<-setdiff(inputnodes,diff)
subtree<-c(subtree,sample(inputnodes,1))
subtree<-sort(subtree)
}
return(subtree)
}
#' Subtree (Or) growth based on individual nodes/genes
#'
#' A helper function for running tree-growth while each leaf is a node/gene
#'
#' @param tree An And/Or tree
#' @param nodes The number of nodes in the Boolean network
#' @param penalty TRUE or FALSE used to control the size of the tree
#' @param fast An argument used to indicate whether a sort is needed
#'
#' @return an And/Or tree
#' @export
#'
growor<-function(tree,nodes,penalty=FALSE,fast=TRUE){
nbranch<-length(tree)
l2<-length(unique(unlist(tree)))
temp<-runif(1)
pselect<-selection()
if(penalty){
if(temp<0.5)
temp<-2
else
temp<-pselect[3]+0.01
}
if((temp<pselect[1]||temp==2)&&nbranch>1){
if(nbranch>1){
tree[[sample(1:nbranch,1)]]<-NULL
return(tree)
}
else
temp<-pselect[4]*runif(1)+pselect[1]
}
if((temp<pselect[2]||is.null(tree))&&!penalty){
if(is.null(tree))
return(list(sample(nodes*2,1)))
else{
tree<-c(tree,sample(nodes*2,1))
return(tree)
}
}
else{
nbranch<-length(tree)
nsub<-sample(1:nbranch,1)
subtree<-tree[[nsub]]
if(temp<pselect[3])
dec<-1
else if(temp<pselect[4])
dec<-2
else{
if(length(setdiff(1:nodes,c(subtree[subtree>nodes]-nodes,subtree[subtree<=nodes])))==0)
return(growor(tree,nodes,penalty=TRUE,fast))
else
dec<-3
}
if(nbranch==1&&l2==1)
dec<-sample(c(1,3),1)
subtree<-growand(tree,subtree,nodes,penalty,dec)
tree[[nsub]]<-unlist(subtree)
}
if(length(tree)!=0&&fast==FALSE)
tree<-reorderTree(tree)
return(tree)
}
#' Tree growth parameters
#'
#' A helper function for tuning tree growth
#'
#' @return a vector encoding the parameters used for tree growth
#' @export
#'
selection<-function(){
#a<-c(1,3,10,3,5)
a<-c(1,3,10,3,3)
denom<-0
for(i in 1:5){
denom<-denom+a[i]
}
b<-rep(0,5)
for(i in 1:5){
for(ii in 1:i){
b[i]<-b[i]+a[ii]
}
}
for(i in 1:5){
a[i]<-a[i]/denom
b[i]<-b[i]/denom
}
return(b)
}
#' Compute the output of the tree
#'
#' Compute the output of the tree according to the Boolean functions (And/Or tree) and the data matrix
#'
#' @param tree An And/Or tree
#' @param input A data matrix, each row is a time point, each column is a node/gene.
#' @param nodes The number of nodes in the Boolean network
#'
#' @return a vector of the output of the tree
#' @export
#'
outputTree<-function(tree,input,nodes){
ntree<-sort(unique(unlist(tree,recursive=TRUE)))
positive<-ntree[(ntree-nodes)<=0]
reserve<-ntree[which(ntree>nodes)]
negative<-reserve-nodes
ntree<-unique(sort(c(positive,negative)))
if(length(colnames(input))!=0){
temp_name<- gsub("Gene","",colnames(input))
ntree<-as.numeric(temp_name)
}
if(!is.matrix(input)&&length(tree)!=1){
input<-matrix(input,nrow=1)
allrs<-matrix(0,nrow(input),length(tree))
}
else if(!is.matrix(input)&&length(tree)==1){
input<-matrix(input,ncol=1)
allrs<-matrix(0,nrow(input),1)
}
else
allrs<-matrix(0,nrow(input),length(tree))
if(ncol(input)!=nodes){
for(i in 1:length(tree)){
intNodes<-sort(unlist(tree[[i]]))
normNodes<-intNodes[which(intNodes<=nodes)]
negNodes<-setdiff(intNodes,normNodes)
normposit<-match(normNodes,ntree)
negposit<-match(negNodes-nodes,ntree)
rs<-apply(input,1,function(x,normNodes,negNodes){
if(any(x[normposit]==0)||any(x[negposit]==1))
return(0)
else
return(1)
},normNodes=normNodes,negNodes=negNodes)
allrs[,i]<-rs
}
}
else{
rs<-evolTree(tree,input,nodes)
return(rs)
}
rs<-apply(allrs,1,function(x){
if(any(x==1)){
return(1)
}
return(0)
})
return(rs)
}
#' Compute the output of the tree if the data matrix contain less columns
#'
#' A helper function for computing the output of the tree according to the Boolean functions (And/Or tree) and the data matrix
#'
#' @param tree An And/Or tree
#' @param input a data matrix, each row is a time point, each column corresponds to a node/gene of the tree
#' @param nodes the number of nodes in the Boolean network
#'
#' @return a vector of the output of the tree
#' @export
#'
evolTree<-function(tree,input,nodes){
if(is.null(nrow(input))){
temp<-lapply(tree,function(x,timepoint){
intNodes<-unlist(x)
normNodes<-intNodes[which(intNodes<=nodes)]
negNodes<-setdiff(intNodes,normNodes)
if(any(timepoint[normNodes]==0)||any(timepoint[negNodes-nodes]==1))
return(0)
return(1)
},timepoint=input)
if(any(temp==1)) temp<-1
else temp<-0
}
else{
temp<-apply(input,1,function(x,tree){
temp<-lapply(tree,function(x,timepoint){
intNodes<-unlist(x)
normNodes<-intNodes[which(intNodes<=nodes)]
negNodes<-setdiff(intNodes,normNodes)-nodes
if(any(timepoint[normNodes]==0)||any(timepoint[negNodes]==1))
return(0)
return(1)
},timepoint=x)
if(any(temp==1)) temp<-1
else temp<-0
return(temp)
},tree=tree)
}
return(temp)
}
#' Tree minimization
#'
#' A helper function for minimizing the And/Or tree
#'
#' @param tree An And/Or tree
#' @param nodes The number of nodes in the Boolean network
#'
#' @return A minimized tree
#' @export
#'
minimization<-function(tree,nodes){
if(all(sapply(tree,length)==1)){
temp_tree<-unique(unlist(tree))
for(i in 1:length(temp_tree)){
if(temp_tree[i]>nodes)
temp_tree[i]<-temp_tree[i]-nodes
}
if(any(duplicated(temp_tree)))
return(1)
return(unique(temp_tree))
}
nbranch<-length(tree)
ntree<-sort(unique(unlist(tree,recursive=TRUE)))
if(nbranch==1||length(ntree)==1)
return(tree[1])
for(i in 1:nbranch){
temp_tree<-unique(unlist(tree[[i]]))
for(i in 1:length(temp_tree)){
if(temp_tree[i]>nodes)
temp_tree[i]<-temp_tree[i]-nodes
}
if(any(duplicated(temp_tree)))
return(1)
}
positive<-ntree[(ntree-nodes)<=0]
reserve<-ntree[which(ntree>nodes)]
negative<-reserve-nodes
ntree2<-unique(sort(c(positive,negative)))
n<-length(ntree2)
mat<-matrix(0,2^n,n)
for(i in 1:n)
mat[,i]<-rep(0:1,times=2^(i-1),each=2^(n-i))
colnames(mat)<-paste("",ntree2,sep="")
input<-mat
for(i in 1:nbranch){
temp<-outputTree(tree[i],input,nodes)
mat<-cbind(mat,temp)
names_pi<-unique(unlist(tree[[i]],recursive=TRUE))
names_pi<-paste0(names_pi,collapse = "&")
colnames(mat)[which(colnames(mat)=="temp")]<-names_pi
}
output<-outputTree(tree,input,nodes)
mat<-cbind(mat,output=output)
id.truth<-mat[,"output"]==1
mat<-mat[id.truth,-ncol(mat),drop=FALSE]
mat<-mat[,-(1:n)]
tree<-minimizePI(mat)
return(tree)
}
#' Tree growth based on individual nodes/genes
#'
#' A helper function for running tree-growth based on simulated annealling algorithm
#'
#' @param data The data list generated by \link{buildTimeSeries} or \link{bootstrap}
#' @param maxk The maximum number of leaves/genes in the And/Or tree
#' @param tree A initial tree
#' @param initT the upper temperature (on a log10 scale)
#' @param endT the lower temperature (on a log10 scale)
#' @param iter the maximum number of iterations used in simulated annealing algorithm
#'
#' @return An And/Or tree
#'
#' @export
#'
saalg<-function(data,maxK,tree,initT,endT,iter){
if(length(data)==4){
input<-data$inbag
resp<-data$respinbag
}
else{
input<-data[[2]]
resp<-data[[3]]
}
currentnodes<-ncol(input)
nodes<-ncol(input)
if(missing(maxK)){
maxK<-8
}
if(missing(tree)||is.null(tree)){
tree<-growtree(NULL,maxK=maxK,nodes=nodes)
}
T<-10^initT
count<-1
oldtree<-tree
const<-oldtree
advexit<-1
repeat{
count<-count+1
newtree<-growtree(oldtree,maxK=maxK,nodes=nodes)
prob<-min(1,exp(mse(input,resp,oldtree,newtree,nodes)/T))
temp<-runif(1)
cse<-abs(initT)+abs(endT)
if(is.element(count,(iter/cse)*(1:cse))){
T<-T/10
}
#T<-T*0.98
if(T<=10^endT)
break
if(temp<prob)#{
oldtree<-newtree
if(count==iter)
break
}
return(oldtree)
}
#' Find the better tree
#'
#' Given a data matrix with respond, try to find which tree is more predictive. The output of each tree is compared with the standard respond.
#'
#' @param input The input matrix in a data matrix
#' @param resp The respond in a data matrix
#' @param tree1 An And/Or tree
#' @param tree2 Another And/Or tree
#' @param nodes the number of nodes in the Boolean network
#'
#' @return the output difference between the output of tree1 and tree2. If a negative value is returned, then tree1 is better. If a positive value is returned, the tree2 is better. If zero is return, then tree1 and tree2 are equally good.
#'
#' @export
#'
mse<-function(input,resp,tree1,tree2,nodes){
o1<-outputTree(tree1,input,nodes)
o2<-outputTree(tree2,input,nodes)
rs<-sum(o2==resp,na.rm=TRUE)-sum(o1==resp,na.rm=TRUE)
rs<-rs/nrow(input)
return(rs)
}
#' Tree minimization helper function
#'
#' A helper function for minimizing the And/Or tree
#' @param mat a matrix used for selecting prime implicants
#' @export
#'
minimizePI<-function(mat){
vec.primes<-NULL
repeat{
ids.row<-which(rowSums(mat)==1)
if(length(ids.row)>0){
tmp<-matrix(mat[ids.row,],nrow=length(ids.row))
ids.prime<-which(diag(t(tmp)%*%tmp)!=0)
tmp<-mat[,ids.prime]%*%t(mat[,ids.prime])
ids.stay<-rowSums(tmp)==0
mat<-mat[ids.stay,,drop=FALSE]
vec.primes<-c(vec.primes,colnames(mat)[ids.prime])
if(nrow(mat)==0)
break
mat<-mat[,-ids.prime,drop=FALSE]
if(ncol(mat)==0)
break
}
dim.mat<-dim(mat)
mat<-rm.dom(mat)
mat<-rm.dom(mat,col=TRUE,dom=FALSE)
if(all(dim(mat)==dim.mat)){
vec.primes<-cyclic.covering(mat,vec.primes)
break
}
}
tree<-list(length(vec.primes))
for(i in 1:length(vec.primes)){
tree[i]<-lapply((strsplit(gsub("\\D+"," ",vec.primes[i])," ")),as.numeric)
}
return(tree)
}
#' Tree minimization helper function
#'
#' A helper function for minimizing the And/Or tree
#' @param mat a matrix used for selecting prime implicants
#' @export
#'
rm.dom<-function(mat,col=FALSE,dom=TRUE){
if(col)
mat<-t(mat)
mat<-mat[!duplicated(mat),,drop=FALSE]
row.dom<-mat%*%t(mat)==rowSums(mat)
ids.row<-if(dom) colSums(row.dom)==1 else rowSums(row.dom)==1
mat<-mat[ids.row,,drop=FALSE]
if(col)
mat<-t(mat)
mat
}
#' Tree minimization helper function
#'
#' A helper function for minimizing the And/Or tree
#' @param mat a matrix used for selecting prime implicants
#' @export
#'
cyclic.covering<-function(mat,vec.primes){
ia<-as.data.frame(ia.samp(ncol(mat)))
not.covered<-rowSums(as.matrix(ia)%*%t(mat)==0)
ia<-ia[not.covered==0,]
rowS<-rowSums(ia)
min.rowS<-which(rowS==min(rowS))
ia<-ia[min.rowS,]
list.primes<-vector("list",nrow(ia))
for(i in 1:nrow(ia))
list.primes[[i]]<-c(vec.primes,colnames(mat)[ia[i,]==1])
list.primes
}
#' Tree minimization helper function
#'
#' A helper function for minimizing the And/Or tree
#' @param mat a matrix used for selecting prime implicants
#' @export
#'
ia.samp<-function(n.pair,conj=0){
mat<-matrix(0,2^n.pair,n.pair)
for(i in 1:n.pair)
mat[, i]<-rep(rep(c(1,conj),e=2^(n.pair-i)),2^(i-1))
mat
}
#' Output the name of final solution
#'
#' Change the name of prime implicants to corresponding "genes"
#' @param singlename A string represents a prime implicant
#' @param ngenes The number of nodes in the Boolean network; the genes in singlename should not be greater than ngenes
#'
#' @return The name of final solution
#'
#' @export
#'
changeName<-function(singlename,ngenes){
pis<-unlist(strsplit(x = singlename,split = "&"))
if(length(pis)==1){
if(as.numeric(pis)>ngenes)
return(paste0("!Gene",as.numeric(pis)-ngenes))
else
return(paste0("Gene",pis))
}
for(i in 1:length(pis)){
temp<-as.numeric(pis[i])
if(temp>2*ngenes){
warnings("The genes exist in the name is greater than the number of genes in the Boolean Network")
return("NULL")
}
if(temp>ngenes)
pis[i]<-paste0("!Gene",temp-ngenes)
else
pis[i]<-paste0("Gene",pis[i])
}
pis<-paste0(pis,collapse = "&")
return(pis)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.