R/randomnet.R
In ddgraph: Distinguish direct and indirect interactions with Graphical Modelling

Documented in biased.bn.fit biased.graph estimateNetworkDistribution formulaFalseNeg graph.to.bn independent.contributions.formula independent.contributions.formula.mul prob.distr.norm prob.distr.unif random.bn.fit

#
# Utility functions for generating random networks and difference testing scenarios
#


#' Estimate the in.degree distribution and conditional probability distribution from data
#'
#' The algorithm uses hill-climbing with BIC to construct the network and estimate the
#' parameters. Then, provided that for each in-degree there is at least two nodes, it
#' estimates the beta distribution parameters. 
#'
#' @title Estimate network distribution parameters
#' @param obj and object of class DDDataSet
#' @param use.class if to include the class variable into the estimate
#'
#' @return a list of two elements: in.degree.distr - distribution of in-degrees, and 
#'         beta.est - estimate beta distribution values
#' @export
#' @examples
#' data(mesoBin)
#' estimateNetworkDistribution(mesoBin$Meso)
estimateNetworkDistribution = function(obj, use.class=FALSE){

	tt = rawData(obj)
	if(!use.class)
		tt = tt[-which(names(tt)=="class")]
	
	orig.tt = tt
	
	if(dataType(obj) == "binary")
		tt = convertToFactor(tt)

	# generate the graph and the fitted graph with probabilities
	bn.graph = hc(tt)
	net = bn.fit(bn.graph, tt)

	# generate degree distribution
	all.degrees = rep(0, length(net))
	for(i in 1:length(net)){
		all.degrees[i] = length(dim(net[[i]]$prob))
	
	}

	# extract distributions of conditional probabilities for all degrees of freedom
	udegree = sort(unique(all.degrees))
	distr = list()

	for(i in 1:length(udegree)){
		inx = which(all.degrees == udegree[i])
		len = 2^udegree[i]
		m = matrix(0, ncol=len/2, nrow=length(inx))
		for(j in 1:length(inx)){
			m[j,] = as.vector(net[[ inx[j] ]]$prob)[seq(1,len,2)]
		}
	
		distr[[ udegree[i] ]] = m
	}

	# estimate parameters of the beta distribution
	beta.est = list()

	for(i in 1:length(distr)){
		if(i == 1){
			# use marginals for nodes when there is no parent
			val = colMeans(orig.tt)
			p = fitdistr(val, "beta", list(shape1=1, shape2=1))
			params = unlist(unlist(p)[c("estimate.shape1", "estimate.shape2")])
			params = data.frame("shape1" = params[1], "shape2" = params[2])
			beta.est[[i]] = params
		} else{
			# use the actual observed values, requires there is at least 2 values observed
			m = distr[[i]]
			params = matrix(0, ncol=2, nrow=ncol(m))
			for(j in 1:ncol(m)){
				val = m[,j]
				# values need to be in range (0,1) not exactly 1 or 0
				if(any(val == 1))
					val[val==1] = 0.99999
				if(any(val == 0))
					val[val==0] = 0.00001
			
				p = fitdistr(val, "beta", list(shape1=1, shape2=1))
				params[j,] = unlist(p)[c("estimate.shape1", "estimate.shape2")]
			}
			params = as.data.frame(params)
			names(params) = c("shape1", "shape2")
			beta.est[[i]] = params
		}
	}
	
	list(in.degree.distr = all.degrees-1, beta.est = beta.est)
}


#' Uniform distribution function for \code{random.bn.fit}
#'
#' Generate 2^n uniformly distributed numbers in range 0 to 1
#'
#' @title Uniform distribution for \code{random.bn.fit}
#' @param n number of variables
#' @return vector of 2^n uniform random numbers
#' @export
#' @examples
#' # return 8 random uniform numbers
#' prob.distr.unif(3)
prob.distr.unif = function(n){
	runif(2^n)
}

#' Generate 2^n numbers from distribution with most of the pdf mass in extreme probabilities (mirrored normal). 
#' We use standard deviation of 1/3 and modulo-1 of normal distribution. 
#'
#' @title Normal distribution function for \code{random.bn.fit}
#'
#' @param n number of variables
#' @param sd the standard deviation of distribution
#' @return vector of 2^n random numbers
#' @export
#' @examples
#' # return 8 random numbers since n=3
#' prob.distr.norm(3)
prob.distr.norm = function(n, sd=1/3){
	rnorm(2^n, sd=sd) %% 1
}

#' Convert graphNEL and friends representation to bn
#' 
#' @param graph graphNEL or graphAM object
graph.to.bn = function(graph){
	# start with an empty graph and add edges
	r = empty.graph(nodes(graph))
	
	p = as(graph, "graphAM")
	adj = p@adjMat
	rownames(adj) = colnames(adj)
	
	for(i in 1:nrow(adj)){
		for(j in 1:ncol(adj)){
			if(adj[i,j] == 1){
				r = set.arc(r, rownames(adj)[i], rownames(adj)[j])
			}
		}
	}
	
	r
	
}

#' Generate a random Bayesian network using package \code{bnlearn}. The nodes specify the partial ordering
#' of the graph, and the conditional probabilities are sampled from given distribution. The network is
#' generated to have on average given number of neighbours (i.e. both in-going and out-going edges)
#'
#' @title Generate a random \code{bn.fit} network
#'
#' @param nodes a vector of desired node names (basis for partial ordering)
#' @param num.neigh expected number of neighbours per node in the random graph
#' @param prob.distr the probability distribution function to use
#' @param bn.graph the \code{bn} object with an already laid out graph, if not supplied will be generated
#'
#' @return a list of two elements: \code{bn} - a \code{bn} object which contains the structure and 
#'         \code{bn.fit} - a \code{bn.fit} object with filled in conditional probabilities
#' @export
#' @examples
#' # a random network with 3 nodes "A", "B", "C" with average of 1 neighbour
#' random.bn.fit(c("A", "B", "C"), num.neigh=1)
random.bn.fit = function(nodes, num.neigh=2, prob.distr=prob.distr.norm, bn.graph=NULL){
	# generate a random graph structure using nodes for ordering
	if( is.null(bn.graph) ){
		#g = graph.to.bn(randomEGraph(nodes, num.neigh/(length(nodes)-1)))
		bn.graph = random.graph(nodes, prob=(num.neigh/(length(nodes)-1)))
	}

	# extract node information
	nodes = bn.graph$nodes	
	# start creating a bnfit object
	bnfit = lapply(nodes, function(x) list("parents"=x$parents, "children"=x$children))
	
	nodenames = names(nodes)
	for(i in 1:length(nodenames)){
		# set the name
		bnfit[[i]]$node = nodenames[i]
		
		n = length(bnfit[[i]]$parents)+1
		# generate some conditional probabilities
		prob = array(prob.distr(n), dim=rep(2,n))
		sel1 = c(1, rep("",n-1))
		sel2 = c(2, rep("",n-1))
		
		eval(parse(text=paste("prob[", paste(sel2, collapse=","), "] <- 1 - prob[", paste(sel1, collapse=","), "]", sep="")))
		
		# put in the names of dimensions
		dm = list()
		dm[[ nodenames[i] ]] = c("0", "1")
		if( n > 1 ){
			for(j in 1:(n-1)){
				dm[[ bnfit[[i]]$parents[j] ]] = c("0","1")
			}
		}
		
		dimnames(prob)  = dm
		class(prob) = "table"
		
		bnfit[[i]]$prob = prob
		
		class(bnfit[[i]]) = "bn.fit.dnode"
	}
	
	class(bnfit) = "bn.fit"
	
	list("bn"=bn.graph, "bn.fit"=bnfit)
		
}

#' Generate a random directed graph with the given node ordering and degree distribution
#'
#' @title Generate random network with degree distribution
#' @param nodes character vector of node names which species the node ordering
#' @param in.degree.distr the node in-degree distribution
#'
#' @return an object of class bn with the random graph
#' @export
#' @examples
#' # a random network of 5 nodes with choosen in-degree distribution
#' biased.graph(letters[1:5], c(0, 1, 1, 2, 2))
biased.graph = function(nodes, in.degree.distr){
	r = empty.graph(nodes)
	
	if( !any(in.degree.distr == 0) )
		stop("At least one node needs to have in-degree zero")
	
	stopifnot(length(nodes) == length(in.degree.distr))
	
	pool = in.degree.distr	
	for(i in 1:length(nodes)){
		num.parents = sample(pool[pool<i],1)
		# remove the picked value from the pool
		pool = pool[- (which(pool == num.parents)[1]) ] 
		if(num.parents == 0)
			next

		all.parents = sample(nodes[1:(i-1)], num.parents)		
		for(j in 1:num.parents){
			r = set.arc(r, all.parents[j], nodes[i])
		}
		
	}
	
	r
	
	
}

#' A version of random.bn.fit which generates a graph based on degree distribution and beta distribution for probabilities
#'
#' @title Random network with a biased degree distribution
#' @param nodes character vector of node names
#' @param beta.est the beta distribution parameters for different degrees of a node. Should be a list where [[2]] corresponds 
#'        to 2-dimenstional contingency table (i.e. one parent, one output). It contains a data.frame with columns shape1, shape2
#'        for the beta distribution, and rows are degrees of freedom (in this case 2, when P(Out=0|Parent=0) and P(Out=0|Parent=1))
#' @param in.degree.distr a vector with degree distribution for all the nodes in the network (names are ignored, and degree is 
#'        randomly sampled from this vector)
#' @param bn.graph if the graph structure is already available, then the graph structure in object of class "bn"  
#' @return a list of two elements: \code{bn} - a \code{bn} object which contains the structure and 
#'         \code{bn.fit} - a \code{bn.fit} object with filled in conditional probabilities
#' @export
#' @examples
#' # nodes, conditional probability distribution, an indegree distribution
#' nodes = letters[1:5]
#' beta.est = list(data.frame(shape1=2,shape2=3), data.frame(shape1=c(2,4), shape2=c(5,2)), data.frame(shape1=c(1,2,3,4), shape2=c(3,2,1,2)))
#' in.degree.distr = c(0, 1, 1, 2, 2)
#' # make a random graph using these parameters
#' biased.bn.fit(nodes, beta.est, in.degree.distr)
biased.bn.fit = function(nodes, beta.est, in.degree.distr, bn.graph=NULL){
	# generate a random graph structure using nodes for ordering
	if( is.null(bn.graph) ){
		bn.graph = biased.graph(nodes, in.degree.distr)
	}

	# extract node information
	nodes = bn.graph$nodes	
	# start creating a bnfit object
	bnfit = lapply(nodes, function(x) list("parents"=x$parents, "children"=x$children))
	
	nodenames = names(nodes)
	for(i in 1:length(nodenames)){
		# set the name
		bnfit[[i]]$node = nodenames[i]
		
		n = length(bnfit[[i]]$parents)+1
		# generate some conditional probabilities
		prob = rep(0, 2^(n-1))
		beta = beta.est[[n]]
		
		# there is 2^(n-1) degrees of freedom
		stopifnot(2^(n-1) == nrow(beta))		
		for(j in 1:nrow(beta)){
			prob[j] = rbeta(1, shape1=beta$shape1[j], shape2=beta$shape2[j])
		}
		# there is 2x more values that degrees of freedom
		prob = rep(prob, each=2)
		prob = array(prob, dim=rep(2,n))
		
		sel1 = c(1, rep("",n-1))
		sel2 = c(2, rep("",n-1))
		
		# make sure the probabilities are adding up to 1
		eval(parse(text=paste("prob[", paste(sel2, collapse=","), "] <- 1 - prob[", paste(sel1, collapse=","), "]", sep="")))
		
		# put in the names of dimensions
		dm = list()
		dm[[ nodenames[i] ]] = c("0", "1")
		if( n > 1 ){
			for(j in 1:(n-1)){
				dm[[ bnfit[[i]]$parents[j] ]] = c("0","1")
			}
		}
		
		dimnames(prob)  = dm
		class(prob) = "table"
		
		bnfit[[i]]$prob = prob
		
		class(bnfit[[i]]) = "bn.fit.dnode"
	}
	
	class(bnfit) = "bn.fit"
	
	list("bn"=bn.graph, "bn.fit"=bnfit)
		
}

#' Generate class labels by using the readout mechanism. Logical formula is applied to two variables
#' which are read out from the real data using the var1 and var2 probabilities. This only works
#' with binary variables. 
#'
#' @title Generate class labels by independent contributions of two variables
#'
#' @param data a matrix or data.frame containing binary observations (columns are variables)
#' @param var1 index or name of the first variable
#' @param var2 index or name of the second variable
#' @param var1.prob1 the conditional probability P(class labels = 1|var1=1)
#' @param var1.prob0 the conditional probability P(class labels = 1|var1=0)
#' @param var2.prob1 the conditional probability P(class labels = 1|var2=1)
#' @param var2.prob0 the conditional probability P(class labels = 1|var2=0)
#' @param logical.formula logical formula to apply
#' @param false.neg a false negative probability
#' @param false.pos a false positive probability
#' @return a binary vector containing the class labels
#' @export
#' @examples
#' # noisy OR function with 0.1 probability of error for reading "a" and "b" (error in both 1 and 0)
#' data <- cbind("a"=c(0,0,1,1), "b"=c(0,1,0,1))
#' independent.contributions.formula(data, "a", "b", 0.9, 0.1, 0.9, 0.1, "a | b")
independent.contributions.formula = function(data, var1, var2, var1.prob1, var1.prob0, var2.prob1, var2.prob0, logical.formula,
	false.neg=0, false.pos=0){

	apply.formula = function(x){
		expr = paste("with(as.list(x),", logical.formula, ")", sep="")
		out = eval(parse(text=expr))
		as.numeric(out)
	}
	
	# extract the two variables
	a = data[,var1]
	b = data[,var2]
	
	labels = rep(0, nrow(data))
	
	for(i in 1:nrow(data)){
		out = c(0,0)
		names(out) = colnames(data[,c(var1,var2)])
		
		if( a[i] == 1){
			if(runif(1) <= var1.prob1)
				out[1] = 1
		}
		if( a[i] == 0){
			if(runif(1) <= var1.prob0)
				out[1] = 1
		}

		if( b[i] == 1){
			if(runif(1) <= var2.prob1)
				out[2] = 1
		}
		if( b[i] == 0){
			if(runif(1) <= var2.prob0)
				out[2] = 1
		}

		labels[i] = apply.formula(out)
		
		# apply false negative rate
		if(labels[i] == 1){
			if(runif(1) < false.neg)
				labels[i] = 0
		}
		# false positive rate
		if(labels[i] == 0){
			if(runif(1) < false.pos)
				labels[i] = 1
		}
	
	}

	labels	
}

#' Generate class labels by using the readout mechanism. Logical formula is applied to two variables
#' which are read out from the real data using the var1 and var2 probabilities. This only works
#' with binary variables. 
#'
#' @title Generate class labels by a noisy formula with high false negative rate
#'
#' @param data a matrix or data.frame containing binary observations (columns are variables)
#' @param var1 index or name of the first variable
#' @param var2 index or name of the second variable
#' @param false.neg a false negative probability
#' @param logical.formula logical formula to apply
#' @return a binary vector containing the class labels
#' @export
#' @examples
#' # noisy OR function with 0.1 probability of error for reading "a" and "b" (error in both 1 and 0)
#' data <- cbind("a"=c(0,0,1,1), "b"=c(0,1,0,1))
#' formulaFalseNeg(data, "a", "b", 0.8, "a | b")
formulaFalseNeg = function(data, var1, var2, false.neg, logical.formula){

	apply.formula = function(x){
		expr = paste("with(as.list(x),", logical.formula, ")", sep="")
		out = eval(parse(text=expr))
		as.numeric(out)
	}
	
	# extract the two variables
	a = data[,var1]
	b = data[,var2]
	
	labels = rep(0, nrow(data))
	
	for(i in 1:nrow(data)){
		out = c(a[i], b[i])
		names(out) = colnames(data[,c(var1,var2)])
		
		labels[i] = apply.formula(out)
		
		# apply false negative rate
		if(labels[i] == 1){
			if(runif(1) < false.neg)
				labels[i] = 0
		}
	
	}

	labels	
}


#' Version of \code{independent.contributions.formula} that works with any number of variables. See the help page
#' for \code{independent.contributions.formula} for description of functionality. 
#'
#' @title Generate class labels by independent contributions of two variables
#'
#' @param data a matrix or data.frame containing binary observations (columns are variables)
#' @param target.vars indexes of target variables
#' @param prob1 vector of P(class labels = 1|varX=1) for different X
#' @param prob0 vector of P(class labels = 1|varX=0) for different X
#' @param logical.formula a character string for the formula
#'
#' @return a vector of binary class labels
#' @export
#' @examples
#' # noisy OR function with three variables and with noise level of 0.1 for a, b, and 0.2 for c
#' data <- cbind("a"=c(0,0,0,0,1,1,1,1), "b"=c(0,0,1,1,0,0,1,1), "c"=c(0,1,0,1,0,1,0,1))
#' independent.contributions.formula.mul(data, c("a", "b", "c"), c(0.9, 0.9, 0.8), c(0.1, 0.1, 0.2), "a | b | c")
independent.contributions.formula.mul = function(data, target.vars, prob1, prob0, logical.formula){
	apply.formula = function(x){
		expr = paste("with(as.list(x),", logical.formula, ")", sep="")
		out = eval(parse(text=expr))
		as.numeric(out)
	}
	
	# extract the two variables
	vars = data[,target.vars]
	
	labels = rep(0, nrow(data))
	
	for(i in 1:nrow(data)){
		out = rep(0, ncol(vars))
		names(out) = colnames(data[,target.vars])
		
		for(j in 1:ncol(vars)){
			# probability of outputing 0 if input is one
			if(vars[i,j] == 1){
				if(runif(1) <= prob1[j])
					out[j] = 1
			}
			# probability of outputing 1 if input is zero
			if(vars[i,j] == 0){
				if(runif(1) <= prob0[j])
					out[j] = 1
			}
		}

		labels[i] = apply.formula(out)
	
	}

	labels	
}

Any scripts or data that you put into this service are public.

ddgraph documentation built on Nov. 17, 2017, 10:50 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

ddgraph
Distinguish direct and indirect interactions with Graphical Modelling

R/randomnet.R
In ddgraph: Distinguish direct and indirect interactions with Graphical Modelling

Defines functions estimateNetworkDistribution prob.distr.unif prob.distr.norm graph.to.bn random.bn.fit biased.graph biased.bn.fit independent.contributions.formula formulaFalseNeg independent.contributions.formula.mul

Documented in biased.bn.fit biased.graph estimateNetworkDistribution formulaFalseNeg graph.to.bn independent.contributions.formula independent.contributions.formula.mul prob.distr.norm prob.distr.unif random.bn.fit

Try the ddgraph package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ddgraph Distinguish direct and indirect interactions with Graphical Modelling

R/randomnet.R In ddgraph: Distinguish direct and indirect interactions with Graphical Modelling

Defines functions estimateNetworkDistribution prob.distr.unif prob.distr.norm graph.to.bn random.bn.fit biased.graph biased.bn.fit independent.contributions.formula formulaFalseNeg independent.contributions.formula.mul

Documented in biased.bn.fit biased.graph estimateNetworkDistribution formulaFalseNeg graph.to.bn independent.contributions.formula independent.contributions.formula.mul prob.distr.norm prob.distr.unif random.bn.fit

Try the ddgraph package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ddgraph
Distinguish direct and indirect interactions with Graphical Modelling

R/randomnet.R
In ddgraph: Distinguish direct and indirect interactions with Graphical Modelling