Nothing
###############################################################################
#
# pathRank.R: This file contains functions to rank paths provided with a w
# weigthed igraph object.
#
# author: Ahmed Mohamed <mohamed@kuicr.kyoto-u.ac.jp>
#
# This is released under GPL-2.
#
# Documentation was created using roxygen
#
###############################################################################
#' Creates a subnetwork from a ranked path list
#'
#' Creates a subnetwork from a ranked path list generated by \code{\link{pathRanker}}.
#'
#' @param paths The paths extracted by \code{\link{pathRanker}}.
#' @param graph A annotated igraph object.
#'
#' @return A subnetwork from all paths provided. If paths are computed for several
#' labels (sample categories), a subnetwork is returned for each label.
#'
#' @author Ahmed Mohamed
#' @family Path ranking methods
#' @export
#' @examples
#' ## Prepare a weighted reaction network.
#' ## Conver a metabolic network to a reaction network.
#' data(ex_sbml) # bipartite metabolic network of Carbohydrate metabolism.
#' rgraph <- makeReactionNetwork(ex_sbml, simplify=TRUE)
#'
#' ## Assign edge weights based on Affymetrix attributes and microarray dataset.
#' # Calculate Pearson's correlation.
#' data(ex_microarray) # Part of ALL dataset.
#' rgraph <- assignEdgeWeights(microarray = ex_microarray, graph = rgraph,
#' weight.method = "cor", use.attr="miriam.uniprot",
#' y=factor(colnames(ex_microarray)), bootstrap = FALSE)
#'
#' ## Get ranked paths using probabilistic shortest paths.
#' ranked.p <- pathRanker(rgraph, method="prob.shortest.path",
#' K=20, minPathSize=6)
#'
#' ## Get the subnetwork of paths in reaction graph.
#' reaction.sub <- getPathsAsEIDs(ranked.p, rgraph)
#'
#' ## Get the subnetwork of paths in the original metabolic graph.
#' metabolic.sub <- getPathsAsEIDs(ranked.p, ex_sbml)
#'
extractPathNetwork <- function(paths, graph){
p.eids <- getPathsAsEIDs(paths, graph)
if(length(paths$y.labels)>1){
graph.ls <- lapply(p.eids, function(x)
subgraph.edges(graph, as.integer(unlist(x))) )
names(graph.ls) <- paths$y.labels
return(graph.ls)
}else
return(subgraph.edges(graph,as.integer(unlist(p.eids))))
}
#' Convert a ranked path list to edge ids of a graph
#'
#' Convert a ranked path list to Edge ids of a graph, where paths can come from
#' a different representation (for example matching path from a reaction network to
#' edges on a metabolic network).
#'
#' @param paths The paths extracted by \code{\link{pathRanker}}.
#' @param graph A annotated igraph object.
#'
#' @return A list of edge ids on the provided graph.
#'
#' @author Ahmed Mohamed
#' @family Path ranking methods
#' @export
#' @examples
#' ## Prepare a weighted reaction network.
#' ## Conver a metabolic network to a reaction network.
#' data(ex_sbml) # bipartite metabolic network of Carbohydrate metabolism.
#' rgraph <- makeReactionNetwork(ex_sbml, simplify=TRUE)
#'
#' ## Assign edge weights based on Affymetrix attributes and microarray dataset.
#' # Calculate Pearson's correlation.
#' data(ex_microarray) # Part of ALL dataset.
#' rgraph <- assignEdgeWeights(microarray = ex_microarray, graph = rgraph,
#' weight.method = "cor", use.attr="miriam.uniprot",
#' y=factor(colnames(ex_microarray)), bootstrap = FALSE)
#'
#' ## Get ranked paths using probabilistic shortest paths.
#' ranked.p <- pathRanker(rgraph, method="prob.shortest.path",
#' K=20, minPathSize=6)
#'
#' ## Get the edge ids along paths in the reaction graph.
#' path.eids <- getPathsAsEIDs(ranked.p, rgraph)
#'
#' ## Get the edge ids along paths in the original metabolic graph.
#' path.eids <- getPathsAsEIDs(ranked.p, ex_sbml)
#'
getPathsAsEIDs <- function(paths, graph){
if(length(paths$y.labels)>1){
eids <- lapply(paths$paths, getPaths, graph, paths$source.net)
names(eids) = paths$y.labels
}else{
eids <- getPaths(paths$paths, graph, paths$source.net)
}
return(eids)
}
getPaths <- function(paths, graph, source.net){
if(length(paths)==0) return(list())
# graph source unknown
if(is.null(graph$type) || is.null(source.net))
return(lapply(paths, function(x) E(graph,path=x$genes)))
# (G.graph,G.graph) or (R.graph, R.graph)
if(graph$type == source.net)
return(lapply(paths, function(x) E(graph,path=x$genes)))
# (G.graph, R.graph)
if(graph$type=="G.graph" && source.net=="R.graph"){
gene.nodes <- lapply(paths, function(x) V(graph)[sub("^(.*)##", "",V(graph)$name) %in% x$genes])
eid <- lapply(gene.nodes,function(x) E(graph)[x %--% x])
return(eid)
}
# (R.graph, G.graph) or (MR.graph, G.graph) else (MR.graph, R.graph)
if(source.net=="G.graph"){
gene.nodes = lapply(paths, function(x) sub("^(.*)##", "",x$genes))
}else{
gene.nodes = lapply(paths, "[[", "genes")
}
# (R.graph, G.graph)
if(graph$type=="R.graph")
return(lapply(gene.nodes, function(x) E(graph, path=x)))
# It must be MR.graph
eid <- list()
for(i in 1:length(paths)){
deleted.edges <- unlist(lapply(lapply(grep("->",paths[[i]]$compounds),
function(x) unlist(strsplit(paths[[i]]$compounds[[x]], "->"))),
function(y) mapply(
function(from,to)get.shortest.paths(graph, from, to, mode="out", output="epath"),
head(y,-1), y[-1])
))
paths[[i]]$compounds <- unlist(strsplit(paths[[i]]$compounds, "(->)|( \\+ )"))
paths[[i]]$genes <- gene.nodes[[i]]
eid[[i]] = c(as.numeric(deleted.edges),
E(graph)[.to(paths[[i]]$genes[1]) |
paths[[i]]$genes %--% paths[[i]]$compounds|
.from(tail(paths[[i]]$genes,1)) ]
)
}
return(eid)
}
# TODO: pvalue description
#' Extracting and ranking paths from a network
#'
#' Given a weighted igraph object, path ranking finds a set of node/edge sequences (paths) to
#' maximize the sum of edge weights.
#' \code{pathRanker(method="prob.shortest.path")} extracts the K most probable paths within
#' a weighted network.
#' \code{pathRanker(method="pvalue")} extracts a list of paths whose sum of edge weights are
#' significantly higher than random paths of the same length.
#'
#'
#' The input here is \code{graph}. A weight must be assigned to each edge. Bootstrapped Pearson correlation edge weights
#' can be assigned to each edge by \code{\link{assignEdgeWeights}}. However the specification of the edge weight is flexible
#' with the condition that increasing values indicate stronger relationships between vertices.
#'
#' \subsection{Probabilistic Shortest Paths}{
#' \code{pathRanker(method="prob.shortest.path")} finds the K most probable loopless paths given a weighted network.
#' Before the paths are ranked the edge weights are converted into probabilistic edge weights using the Empirical
#' Cumulative Distribution (ECDF) over all edge weights. This is called ECDF edge weight. The ECDF edge weight
#' serves as a probabilistic rank of the most important gene-gene interactions. The probabilistic nature of the ECDF
#' edge weights allow for a significance test to determine if a path contains any functional structure or is
#' simply a random walk. The probability of a path is simily the product of all ECDF weights along the path.
#' This is computed as a sum of the logs of the ECDF edge weights.
#'
#' The follwing arguments can be passed to \code{pathRanker(method="prob.shortest.path")}:
#' \describe{
#' \item{\code{K}}{Maximum number of paths to extract. Defaults to 10.}
#' \item{\code{minPathSize}}{The minimum number of edges for each extracted path. Defualts to 1.}
#' \item{\code{normalize}}{Specify if you want to normalize the probabilistic edge weights (across different labels)
#' before extracting the paths. Defaults to TRUE.}
#' }
#' }
#'
#' \subsection{P-value method}{
#' \code{pathRanker(method="pvalue")} searches all paths between the specified start and end vertices, and if a
#' significant path is found it returns it. However, It doesn't search for the best path between the start and
#' terminal vertices, as there could be many paths which lead to the same terminal vertex, and searching through
#' all of them is time comsuming. We just stop when the first significant path is found.
#'
#' All provided edge weights are recaled from 0-1. Path significance is calculated based on the empirical distribution
#' of random paths of the same length. This can be estimated using \code{\link{samplePaths}} and passed as an argument.
#'
#' The follwing arguments can be passed to \code{pathRanker(method="pvalue")}:
#' \describe{
#' \item{\code{sampledpaths}}{The emripical results from \code{\link{samplePaths}}.}
#' \item{\code{alpha}}{The P value cut-off. Defualts to 0.01}
#' }
#' }
#' @param graph A weighted igraph object. Weights must be in \code{edge.weights} or \code{weight}
#' edge attributes.
#' @param method Which path ranking method to use.
#' @param start A list of start vertices, given by their vertex id.
#' @param end A list of terminal vertices, given by their vertex id.
#' @param verbose Whether to display the progress of the function.
#' @param ... Method-specific parameters. See Details section.
#'
#' @return
#' A list of paths where each path has the following items:
#' \item{gene}{The ordered sequence of genes visited along the path.}
#' \item{compounds}{The ordered sequence of compounds visited along the path.}
#' \item{weights}{The ordered sequence of the log(ECDF edge weights) along the path.}
#' \item{distance}{The sum of the log(ECDF edge weights) along each path. (a sum of logs is a product)}
#'
#'
#' @author Timothy Hancock, Ichigaku Takigawa, Nicolas Wicker and Ahmed Mohamed
#' @family Path ranking methods
#' @seealso getPathsAsEIDs, extractPathNetwork
#' @export
#' @examples
#' ## Prepare a weighted reaction network.
#' ## Conver a metabolic network to a reaction network.
#' data(ex_sbml) # bipartite metabolic network of Carbohydrate metabolism.
#' rgraph <- makeReactionNetwork(ex_sbml, simplify=TRUE)
#'
#' ## Assign edge weights based on Affymetrix attributes and microarray dataset.
#' # Calculate Pearson's correlation.
#' data(ex_microarray) # Part of ALL dataset.
#' rgraph <- assignEdgeWeights(microarray = ex_microarray, graph = rgraph,
#' weight.method = "cor", use.attr="miriam.uniprot",
#' y=factor(colnames(ex_microarray)), bootstrap = FALSE)
#'
#' ## Get ranked paths using probabilistic shortest paths.
#' ranked.p <- pathRanker(rgraph, method="prob.shortest.path",
#' K=20, minPathSize=6)
#'
#' ## Get significantly correlated paths using "p-valvue" method.
#' ## First, establish path score distribution by calling "samplePaths"
#' pathsample <- samplePaths(rgraph, max.path.length=10,
#' num.samples=100, num.warmup=10)
#'
#' ## Get all significant paths with p<0.1
#' significant.p <- pathRanker(rgraph, method = "pvalue",
#' sampledpaths = pathsample ,alpha=0.1)
#'
pathRanker <- function(graph, method=c("prob.shortest.path", "pvalue") ,start, end, verbose=TRUE, ...){
# Checking the graph
if(is.null(E(graph)$edge.weights)){
if(!is.null(E(graph)$weight))
E(graph)$edge.weights <- E(graph)$weight
else{
stop("No edge weights provided.")
}
}
if(length(method)>1 || !method %in% c("prob.shortest.path", "pvalue"))
stop("Please specify the ranking method 'prob.shortest.path' or 'pvalue'.")
if(method == "prob.shortest.path")
return(rankShortestPaths(graph, start=start, end=end, verbose=verbose, ...))
else return(rankPvalue(graph, start=start, end=end, verbose=verbose, ...))
}
rankShortestPaths <- function(graph, K=10, minPathSize=1, start, end, normalize = TRUE, verbose) {
pg = processNetwork(graph, start, end, scale="ecdf" ,normalize)
zret <- list()
for (i in 1:ncol(pg$weights)) {
if(verbose){
if (ncol(pg$weights) > 1) message("Extracting the ",K," most probable paths for ",graph$y.labels[i])
else message("Extracting the ",K," most probable paths.")
}
# min path size is increased by 2 to include start and end compounds.
ps <- .Call("pathranker",
pg$nodes, #nodes
pg$edges, #Edges
pg$weights[,i], #Edge weights
K=K,minpathsize = minPathSize+2) #add 2 nodes to min path length ("s" & "t")
idx <- which(sapply(ps,is.null))
if (length(idx)>0) ps <- ps[-idx]
if(length(ps)==0){
if(ncol(pg$weights) > 1)
message(" Warning:Counldn't find paths matching the criteria for ",graph$y.labels[i])
else message(" Warning:Counldn't find paths matching the criteria.")
}
ps <- ps[order(sapply(ps,"[[", "distance"))] #order paths by distance
if (ncol(pg$weights) > 1){
zret[[i]] <- ps
}else zret <- ps
}
if(length(zret)==0)return(NULL)
if (ncol(pg$weights) > 1) {
names(zret) <- graph$y.labels
colnames(pg$weights) <- paste("prob",graph$y.labels,sep = ":")
} else colnames(pg$weights) <- "prob"
return(list(paths = zret,edge.weights = pg$weights, y.labels=graph$y.labels, source.net=graph$type))
}
rankPvalue <- function(graph, sampledpaths, start, end, alpha = 0.01, verbose) {
# Checking the graph
if(is.null(E(graph)$edge.weights)){
if(!is.null(E(graph)$weight))
E(graph)$edge.weights <- E(graph)$weight
else{
stop("No edge weights provided.")
}
}
range <- ifelse(missing(sampledpaths), ecount(graph),
if(length(graph$y.labels)==1)nrow(sampledpaths)-1 else nrow(sampledpaths[[1]])-1)
# Defining start reactions end their scopes.
if(missing(start)){
if(verbose) message("Start nodes weren't provided. Using entry nodes as start points.")
start <- V(graph)[degree(graph,mode="in")==0]$name
}
if(missing(end)){
end <- V(graph)[ unique(unlist(neighborhood(graph, order=range, nodes=start, mode="out"))) ]
}
pg = processNetwork(graph, start, end, scale="rescale", normalize=FALSE)
zret=NULL
for (i in 1:ncol(pg$weights)) {
if(verbose){
if (ncol(pg$weights) > 1) message("Extracting non-random path p<",alpha," for ",graph$y.labels[i])
else message("Extracting non-random path p<",alpha)
}
p.sample <- if(missing(sampledpaths)) NULL
else if(ncol(pg$weights) > 1) t(sampledpaths[[i]])
else t(sampledpaths)
ps <- .Call("scope",
pg$nodes,
pg$edges,
pg$weights[,i], #add column subscript
SAMPLEDPATHS = p.sample,
ALPHA = alpha,
ECHO = as.integer(verbose))
idx <- which(sapply(ps$paths,is.null))
ps$paths <- ps$paths[-idx]
if(length(ps)==0) stop("couldn't find any significant paths")
ps$paths <- ps$paths[order(sapply(ps$paths,"[[", "pvalue"))] #order paths by pvalue
if (ncol(pg$weights) > 1) zret[[i]] <- ps
else zret <- ps
}
if (ncol(pg$weights) > 1)
names(zret) <- graph$y.labels
zret$y.labels=graph$y.labels
zret$source.net = graph$type
return(zret)
}
#' Creates a set of sample path p-values for each length given a weighted network
#'
#' Randomly traverses paths of increasing lengths within a set network to create an
#' empirical pathway distribution for more accurate determination of path significance.
#'
#' Can take a bit of time.
#'
#' @param graph A weighted igraph object. Weights must be in \code{edge.weights} or \code{weight}
#' edge attributes.
#' @param max.path.length The maxmimum path length.
#' @param num.samples The numner of paths to sample
#' @param num.warmup The number of warm up paths to sample.
#' @param verbose Whether to display the progress of the function.
#'
#' @return A matrix where each row is a path length and each column is the number of paths sampled.
#'
#' @author Timothy Hancock
#' @author Ahmed Mohamed
#' @family Path ranking methods
#' @export
#' @examples
#' ## Prepare a weighted reaction network.
#' ## Conver a metabolic network to a reaction network.
#' data(ex_sbml) # bipartite metabolic network of Carbohydrate metabolism.
#' rgraph <- makeReactionNetwork(ex_sbml, simplify=TRUE)
#'
#' ## Assign edge weights based on Affymetrix attributes and microarray dataset.
#' # Calculate Pearson's correlation.
#' data(ex_microarray) # Part of ALL dataset.
#' rgraph <- assignEdgeWeights(microarray = ex_microarray, graph = rgraph,
#' weight.method = "cor", use.attr="miriam.uniprot",
#' y=factor(colnames(ex_microarray)), bootstrap = FALSE)
#'
#' ## Get significantly correlated paths using "p-valvue" method.
#' ## First, establish path score distribution by calling "samplePaths"
#' pathsample <- samplePaths(rgraph, max.path.length=10,
#' num.samples=100, num.warmup=10)
#'
#' ## Get all significant paths with p<0.1
#' significant.p <- pathRanker(rgraph, method = "pvalue",
#' sampledpaths = pathsample ,alpha=0.1)
#'
samplePaths <- function(graph, max.path.length, num.samples=1000, num.warmup=10, verbose=TRUE){
# Checking the graph
if(is.null(E(graph)$edge.weights)){
if(!is.null(E(graph)$weight))
E(graph)$edge.weights <- E(graph)$weight
else{
stop("No edge weights provided.")
}
}
if (max.path.length > vcount(graph)-1) {
if(verbose) message("Maximum Path Length is larger than Number of Network Vertices ... setting them to be equal.")
max.path.length <- vcount(graph)-1
}
# Get edge.weights and apply ecdf on each column
edge.weights = do.call("rbind", E(graph)$edge.weights)
edge.probs <- apply(edge.weights, 2, rescale, c(1,0))
# Prepare edgelist as required by PathRanker method.
label = lapply(E(graph)$attr, paste, collapse=' + ') # For edges annotated with >1 compound, concatenate.
edgelist = get.edgelist(graph, names=FALSE)
edgelist = data.frame(from=edgelist[,1], to=edgelist[,2], label=unlist(label), stringsAsFactors=FALSE)
pg = list(nodes=V(graph)$name, edges=edgelist, weights=edge.probs)
if(ncol(pg$weights) == 1){
ps <- .Call("samplepaths",
pg$nodes,
pg$edges,
pg$weights,
MAXPATHLENGTH = as.integer(max.path.length),
SAMPLEPATHS = as.integer(num.samples),
WARMUPSTEPS = as.integer(num.warmup)
)
return( matrix(ps,max.path.length+1,num.samples,byrow = TRUE) )
}else{
ps <- list()
for (i in 1:ncol(pg$weights)) {
if(verbose) message("Sampling",num.samples," for",graph$y.labels[i],"\n")
p.samplei <- .Call("samplepaths",
pg$nodes,
pg$edges,
pg$weights,
MAXPATHLENGTH = as.integer(max.path.length),
SAMPLEPATHS = as.integer(num.samples),
WARMUPSTEPS = as.integer(num.warmup)
)
ps[[i]] <- matrix(p.samplei, max.path.length+1, num.samples, byrow = TRUE)
}
names(ps) <- graph$y.labels
return(ps)
}
}
processNetwork <- function(graph, start, end, scale=c("ecdf", "rescale"), normalize){
# Add S, T vetrices for the shortest path algorithm.
snodes <- if(missing(start)) V(graph)[degree(graph,mode="in")==0]$name else V(graph)[start]$name
enodes <- if(missing(end)) V(graph)[degree(graph,mode="out")==0]$name else V(graph)[end]$name
enodes <- enodes[!enodes %in% snodes]
graph <- graph + vertices("s", "t")
graph <- add.edges(graph,
c(rbind("s",snodes),
rbind(enodes,"t")),
attr=list(edge.weights=list(rep(1, length(unlist(graph$y.labels)) )),
compound=""))
# Get edge.weights and apply ecdf on each column
edge.weights <- do.call("rbind", as.list(E(graph)$edge.weights))
if(sum(!is.finite(edge.weights))>0){
warning("Edge weights contain non-finite numbers. Setting them to the minimum edge weight")
edge.weights <- apply(edge.weights, 2,
function(x){
x[ !is.finite(x) ] <- min( x[ is.finite(x) ] )
return(x)
})
}
if(scale=="ecdf")
edge.probs <- apply(edge.weights, 2, function(x) ecdf(x[1:ecount(graph)])(x))
else edge.probs <- apply(edge.weights, 2, rescale, c(1,0))
# Normalize across Y labels
if (ncol(edge.probs) > 1 & normalize == TRUE) {
edge.probs <- edge.probs / rowSums(edge.probs)
}
if(scale=="ecdf")
edge.probs <- -log(edge.probs)
# Prepare edgelist as required by PathRanker method.
label = lapply(E(graph)$compound, paste, collapse=' + ') # For edges annotated with >1 compound, concatenate.
edgelist = get.edgelist(graph, names=FALSE)
edgelist = data.frame(from=edgelist[,1], to=edgelist[,2], label=unlist(label), stringsAsFactors=FALSE)
return(list(nodes=V(graph)$name, edges=edgelist, weights=edge.probs))
}
#Adapted from scales package
rescale <- function (x, to = c(0, 1), from = range(x, na.rm = TRUE)){
(x - from[1])/diff(from) * diff(to) + to[1]
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.