
processKB <- function(ents.file, rels.file, verbose=FALSE)
	## Generating the one-level network form the knowledge base

	## Read entity file
	ents <- read.table(ents.file, header=TRUE, sep='\t', comment.char="", 
		stringsAsFactors=FALSE, na.strings="")
	colnames(ents) = c("uid", "name", "id", "type")
	rels <- read.table(rels.file, header = TRUE, stringsAsFactors = FALSE, strip.white=TRUE, sep = '\t',
	quote = NULL, comment.char = '')
	colnames(rels) = c('uid', 'srcuid', 'trguid', 'type', 'pmids', 'nls')

	cleanup <- function(x) {
	x <- gsub('\"', '', x)
	x <- gsub('\'', 'p', x)
	x <- gsub('#', '_', x)
	x <- gsub(' ', '', x)
	ents <- as.data.frame(lapply(ents,cleanup), stringsAsFactors = F)
	rownames(ents) <- 1:nrow(ents)
	rels <- as.data.frame(lapply(rels,cleanup), stringsAsFactors = F)
	rownames(rels) = 1:nrow(rels)

	## Group entities by id, name, and type
	if (anyDuplicated(ents[,c("name","id","type")]))
		dups <- duplicated(ents[,c("name","id","type")])
		refs <- which(!dups)
		cands <- refs[ents$name[refs] %in% unique(ents$name[dups]) & 
			ents$id[refs] %in% unique(ents$id[dups])]
		fdups <- with(ents[dups,], paste(name,id,type,sep="."))
		fcands <- with(ents[cands,], paste(name,id,type,sep="."))
		ind <- match(fdups,fcands)	
		uid.new <- integer(nrow(ents))
		uid.new[refs] <- 1:length(refs)
		uid.new[dups] <- uid.new[cands[ind]]
	} else {
		refs <- uid.new <- 1:nrow(ents)
	## Relabel entities uid
	id.map <- data.frame(uid.orig = ents$uid, uid.new = uid.new)
	ents$uid <- uid.new	
	## Consolidate entities (if multiple rows have the
	## same name, id, and type, only keep the first one)
	if (length(refs) < nrow(ents)) ents <- ents[refs,]

	## Extract Protein, Compound or mRNA entities
	ents <- ents[ents$type %in% c("Protein", "Compound", "mRNA"),]	

	## Remove entities with missing values
	is.mRNA <- (ents$type == "mRNA") 
	is.pc <- (ents$type %in% c("Protein","Compound"))
	na.id <- is.na(ents$id)
	na.name <- is.na(ents$name)
	ents <- ents[(is.mRNA & !na.id) | (is.pc & (!na.id | !na.name)),]
	is.mRNA <- (ents$type == "mRNA") 
	is.pc <- (ents$type %in% c("Protein","Compound"))
	na.id <- is.na(ents$id)
	na.name <- is.na(ents$name)
	## Remove duplicate entities 
	if (anyDuplicated(ents)) ents <- unique(ents)

	for (val in c("increase","decrease","conflict")) {
		ind <- grep(val,rels$type)
		if (length(ind)) rels$type[ind] <- val
	## Remove duplicate relations
	if (anyDuplicated(rels)) rels <- unique(rels)
	## Discard relations with invalid type 
	ind <- which(!(rels$type %in% c("increase","decrease","conflict")))
	if (length(ind)) rels <- rels[-ind,]
	## Relabel relations source and target uid to match entities uid
	rels$srcuid <- id.map$uid.new[match(rels$srcuid,id.map$uid.orig)]
	rels$trguid <- id.map$uid.new[match(rels$trguid,id.map$uid.orig)]

	## Source must be protein or compound and target must be mRNA
	##is.mRNA <- (ents$type == "mRNA")
	##is.pc <- (ents$type %in% c("Protein","Compound"))
	rels <- rels[rels$srcuid %in% ents$uid[is.pc] & rels$trguid %in% ents$uid[is.mRNA],]
	## Extract entities that are matched in relation file
	ents <- ents[which(ents$uid %in% c(rels$srcuid, rels$trguid)), ] 

	## Resolve relation types
	if (anyDuplicated(rels[,c("srcuid","trguid")])) {
		dups <- which(duplicated(rels[,c("srcuid","trguid")]))
		refs <- (1:nrow(rels))[-dups] # first occurence of each combination (srcuid,trguid)
		refs <- refs[rels$srcuid[refs] %in% unique(rels$srcuid[dups]) & 
			rels$trguid[refs] %in% unique(rels$trguid[dups])] 
		fdups <- paste(rels$srcuid[dups],rels$trguid[dups],sep=".")
		frefs <- paste(rels$srcuid[refs],rels$trguid[refs],sep=".")
		ind <- which(frefs %in% fdups)
		refs <- refs[ind] # first occurence of each duplicated combination (srcuid,trguid) 
		frefs <- frefs[ind]
		alldups <- c(refs,dups) # indexes of all relations with duplicated (srcuid,trguid)
		g <- factor(c(frefs,fdups),levels=frefs) # grouping factor
		conflict <- by(rels$type[alldups], g, function(x) (length(unique(x))>1)) 
		tmp <- split(alldups, g)
		rels$type[refs[conflict]] <- "conflict"
		rels <- rels[-dups,]

	if (verbose) {
	cat("\n Processed network dimensions:")
	cat("\n ents:", nrow(ents))
	cat("\n rels:", nrow(rels))
	L = list(ents = ents, rels = rels, id.map = id.map)


