Nothing
#' Network querying method based on conditional random fields
#'
#' Find the best matching subnetworks from a large target network for small
#' query networks based on the conditional random fields (CRF) model.
#'
#' This is an approach for network querying problem based on conditional random
#' field (CRF) model which can handle both undirected and directed networks,
#' acyclic and cyclic networks, and any number of insertions/deletions.
#'
#' When querying several networks in the same target network,
#' \code{\link{net_query_batch}} will save much time.
#'
#' \itemize{
#' \item query.net: The query network file is written as follows:\cr
#' v1 v2 v3 v4 v5\cr v3 v4 \cr ... \cr where v1, v2, v3, v4, v5 ... are the
#' nodes' names and each line indicates there are edges between the first node
#' and other nodes in the line. For example, the first line denotes 4 edges:
#' (v1, v2), (v1, v3), (v1, v4), and (v1, v5).
#'
#' \item target.net: The format of this file is the same as the query network
#' file.
#'
#' \item node.sim: This similarity file's format is as follows:\cr v1 V1 s1 \cr
#' v1 V2 s2 \cr ... \cr v1 is the node from the query network, V1 is the node
#' from the target network, s1 is the similarity score between the node v1 and
#' V1, and so on.
#'
#' \item query.type: If query.type = 1, the loopy belief propagation (LBP)
#' algorithm will be applied, which is an approximate algorithm for a general
#' graph with loops. If the query is a chain or tree, there are exact
#' algorithms. Set query.type = 2 when the query is a chain, and query.type = 3
#' when the query is a tree. The heuristic algorithm will be used when
#' query.type = 4, which will try the exact algorithm (junction tree algorithm)
#' first and resort to LBP algorithm when the exact algorithm failed. The
#' default value is 4.
#'
#' \item delta.d: The smaller delta.d is, the heavier penalty for deletions.
#'
#' \item delta.c: The smaller delta.c is, the heavier penalty for consecutive
#' deletions.
#'
#' \item delta.e: The smaller delta.e is, the heavier penalty for single
#' deletion.
#'
#' \item delta.s: The larger delta.s indicates heavier penalty for insertions.
#'
#' }
#'
#' @param query.net The input file name of the query network.
#' @param target.net The input file name of the target network.
#' @param node.sim The input file name of the node similarity scores between
#' the query network and the target network.
#' @param query.type The querying network type: 1 - general, 2 - chain, 3 -
#' tree, 4 - heuristic.
#' @param delta.d The parameter delta.d is a parameter for deletions.
#' @param delta.c The parameter delta.c is a parameter for consecutive
#' deletions.
#' @param delta.e The parameter delta.e is a parameter for single deletion.
#' @param delta.s The parameter delta.s is a parameter for insertions.
#' @param output The suffix of output file name.
#'
#' @references Qiang Huang, Ling-Yun Wu, and Xiang-Sun Zhang. An Efficient
#' Network Querying Method Based on Conditional Random Fields. Bioinformatics,
#' 27(22):3173-3178, 2011.
#'
#' @examples
#'
#' \dontrun{
#' library(Corbi)
#'
#' ## An example: "querynet.txt", "targetnet.txt", "nodesim.txt" are
#' ## three input files in the working directory
#' net_query("querynet.txt", "targetnet.txt", "nodesim.txt", query.type=3)
#' }
#'
#' @export
net_query <- function(query.net, target.net, node.sim, query.type=4, delta.d=1e-10, delta.c=0.5, delta.e=1, delta.s=1, output="result.txt")
{
# options
# query.net: input file name of query network
# target.net: input file name of target network
# node.sim: input file name of node similarity
# query.type: the type of query network, 1 - general, 2 - chain, 3 - tree, 4 - heuristic, 5 - ilp
# delta: the parameters \Delta_d, \Delta_c, \Delta_e, \Delta_s
# output: the output filename
# read the input files
# target$node: the node names of target network
# target$matrix: the adjacency matrix of target network
# target$sim: the node similarity matrix
# query$node: the node names of query network
# query$matrix: the adjacency matrix of query network
query.type <- as.numeric(query.type)
delta <- lapply(list(d=delta.d, c=delta.c, e=delta.e, s=delta.s), as.numeric)
target <- read_net(target.net)
target$sim <- read_sim(node.sim)
query <- read_net(query.net)
# compute the shortest distance matrix for the target network
# and simplify the target network
label <- .net_query.simplify_target(query, target, delta)
# build and solve CRF model
model <- .net_query.build_model(query, label, delta)
result <- .net_query.solve_crf(model, query.type)
# write result to output file
.net_query.write_result(query, label, model, result, paste(query.net, output, sep="_"))
}
.net_query.simplify_target <- function(query, target, delta)
{
net.sim <- matrix(0, query$size, target$size, dimnames=list(query$node, target$node))
n1 <- query$node[query$node %in% rownames(target$sim)]
n2 <- target$node[target$node %in% colnames(target$sim)]
net.sim[n1, n2] <- target$sim[n1, n2]
select <- colSums(net.sim > delta$d) > 0
net.size <- sum(select)
if (net.size <= 0)
{
stop("The simplified target network is empty, please check the node similarity file,
or decrease the value of parameter delta.d!")
}
net.node <- target$node[select]
net.matrix <- target$matrix[select, select]
net.sim <- net.sim[, select]
net.sim[net.sim <= delta$d] <- 0
if (is.null(target$dist))
{
net.dist <- get_shortest_distances(target$matrix, select)[select, select]
}
else
{
net.dist <- target$dist[select, select]
}
net.dist[net.dist == -1] <- 10000
net.dist[cbind(1:net.size, 1:net.size)] <- 1
list(size=net.size, node=net.node, matrix=net.matrix, sim=net.sim, dist=net.dist)
}
.net_query.build_model <- function(query, label, delta)
{
query.size <- query$size
query.net <- query$matrix
label.size <- label$size
label.gap <- label.size + 1
n.labels <- label.gap
label.weight <- 1 / label$dist ^ delta$s
S <- cbind(label$sim, delta$d)
W <- matrix(nrow=n.labels, ncol=n.labels)
W[1:label.size, 1:label.size] <- label.weight
W[label.gap,] <- delta$e
W[,label.gap] <- delta$e
W[label.gap, label.gap] <- delta$c
crf <- CRF::make.crf(query.net, rowSums(S > 0))
crf$state.map <- matrix(label.gap, nrow=crf$n.nodes, ncol=crf$max.state)
for (i in 1:crf$n.nodes)
{
crf$state.map[i, 1:crf$n.states[i]] <- which(S[i,] > 0)
crf$node.pot[i,] <- S[i, crf$state.map[i,]]
}
for (e in 1:crf$n.edges)
{
n1 <- crf$edges[e, 1]
n2 <- crf$edges[e, 2]
m1 <- 1:crf$n.states[n1]
m2 <- 1:crf$n.states[n2]
S1 <- matrix(crf$node.pot[n1, m1], crf$n.states[n1], crf$n.states[n2])
S2 <- matrix(crf$node.pot[n2, m2], crf$n.states[n1], crf$n.states[n2], byrow=TRUE)
W1 <- W[crf$state.map[n1, m1], crf$state.map[n2, m2]]
W2 <- W[crf$state.map[n2, m2], crf$state.map[n1, m1]]
crf$edge.pot[[e]] <- (S1 + S2) * pmax(W1 * query.net[n1, n2], t(W2) * query.net[n2, n1]) / 2
}
crf
}
decode_heuristic <- function(crf)
{
result <- try(CRF::decode.junction(crf), TRUE)
if (class(result) == "try-error")
{
result <- CRF::decode.lbp(crf)
}
result
}
.net_query.solve_crf <- function(model, query.type)
{
decode.method <- list(CRF::decode.lbp, CRF::decode.chain, CRF::decode.tree, decode_heuristic, CRF::decode.ilp)
if (!is.numeric(query.type) || query.type > length(decode.method) || query.type < 1) query.type = 4
result <- decode.method[[query.type]](model)
result <- model$state.map[cbind(1:model$n.nodes, result)]
}
.net_query.write_result <- function(query, label, model, result, filename="result.txt")
{
query.name <- query$node
label.name <- c(label$node, "gap")
label.dist <- matrix("gap", nrow=label$size+1, ncol=label$size+1)
label.dist[1:label$size, 1:label$size] <- label$dist
con <- file(as.character(filename), "w")
writeLines("node match:", con, sep="\n")
for (i in 1:query$size)
{
writeLines(paste(query.name[i], " ", label.name[result[i]]), con, sep="\n");
}
writeLines("", con, sep="\n")
direction <- c("--->", "<---", "----")
writeLines("edge match:", con, sep="\n")
for (i in 1:model$n.edges)
{
x1 <- model$edges[i,1]
x2 <- model$edges[i,2]
y1 <- result[x1]
y2 <- result[x2]
distance <- c(label.dist[y1,y2], label.dist[y2,y1], min(label.dist[y1,y2], label.dist[y2,y1]))
d <- (query$matrix[x1,x2] > 0) + 2 * (query$matrix[x2,x1] > 0)
writeLines(paste(query.name[x1], direction[d], query.name[x2], "\t", label.name[y1], direction[d], label.name[y2], "\t", distance[d]), con, sep="\n")
}
close(con)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.