R/pathRank.R
In NetPathMiner: NetPathMiner for Biological Network Construction, Path Mining and Visualization

Documented in extractPathNetwork getPathsAsEIDs pathRanker samplePaths

###############################################################################
#
# pathRank.R:     This file contains functions to rank paths provided with a w
# weigthed igraph object.
#
# author: Ahmed Mohamed <mohamed@kuicr.kyoto-u.ac.jp>
#
# This is released under GPL-2.
#
# Documentation was created using roxygen
#
###############################################################################

#' Creates a subnetwork from a ranked path list
#'
#' Creates a subnetwork from a ranked path list generated by \code{\link{pathRanker}}.
#'
#' @param paths The paths extracted by \code{\link{pathRanker}}.
#' @param graph A annotated igraph object.
#'
#' @return A subnetwork from all paths provided. If paths are computed for several
#' labels (sample categories), a subnetwork is returned for each label.
#'
#' @author Ahmed Mohamed
#' @family Path ranking methods
#' @export
#' @examples
#' 	## Prepare a weighted reaction network.
#' 	## Conver a metabolic network to a reaction network.
#'  data(ex_sbml) # bipartite metabolic network of Carbohydrate metabolism.
#'  rgraph <- makeReactionNetwork(ex_sbml, simplify=TRUE)
#'
#' 	## Assign edge weights based on Affymetrix attributes and microarray dataset.
#'  # Calculate Pearson's correlation.
#' 	data(ex_microarray)	# Part of ALL dataset.
#' 	rgraph <- assignEdgeWeights(microarray = ex_microarray, graph = rgraph,
#' 		weight.method = "cor", use.attr="miriam.uniprot",
#' 		y=factor(colnames(ex_microarray)), bootstrap = FALSE)
#'
#' 	## Get ranked paths using probabilistic shortest paths.
#'  ranked.p <- pathRanker(rgraph, method="prob.shortest.path",
#' 					K=20, minPathSize=6)
#'
#' 	## Get the subnetwork of paths in reaction graph.
#' 	reaction.sub <- getPathsAsEIDs(ranked.p, rgraph)
#'
#' 	## Get the subnetwork of paths in the original metabolic graph.
#' 	metabolic.sub <- getPathsAsEIDs(ranked.p, ex_sbml)
#'
extractPathNetwork <- function(paths, graph){
    p.eids <- getPathsAsEIDs(paths, graph)
    if(length(paths$y.labels)>1){
        graph.ls <- lapply(p.eids, function(x)
                    subgraph.edges(graph, as.integer(unlist(x))) )
        names(graph.ls) <- paths$y.labels
        return(graph.ls)
    }else
        return(subgraph.edges(graph,as.integer(unlist(p.eids))))
}

#' Convert a ranked path list to edge ids of a graph
#'
#' Convert a ranked path list to Edge ids of a graph, where paths can come from
#' a different representation (for example matching path from a reaction network to
#' edges on a metabolic network).
#'
#' @param paths The paths extracted by \code{\link{pathRanker}}.
#' @param graph A annotated igraph object.
#'
#' @return A list of edge ids on the provided graph.
#'
#' @author Ahmed Mohamed
#' @family Path ranking methods
#' @export
#' @examples
#' 	## Prepare a weighted reaction network.
#' 	## Conver a metabolic network to a reaction network.
#'  data(ex_sbml) # bipartite metabolic network of Carbohydrate metabolism.
#'  rgraph <- makeReactionNetwork(ex_sbml, simplify=TRUE)
#'
#' 	## Assign edge weights based on Affymetrix attributes and microarray dataset.
#'  # Calculate Pearson's correlation.
#' 	data(ex_microarray)	# Part of ALL dataset.
#' 	rgraph <- assignEdgeWeights(microarray = ex_microarray, graph = rgraph,
#' 		weight.method = "cor", use.attr="miriam.uniprot",
#' 		y=factor(colnames(ex_microarray)), bootstrap = FALSE)
#'
#' 	## Get ranked paths using probabilistic shortest paths.
#'  ranked.p <- pathRanker(rgraph, method="prob.shortest.path",
#' 					K=20, minPathSize=6)
#'
#' 	## Get the edge ids along paths in the reaction graph.
#' 	path.eids <- getPathsAsEIDs(ranked.p, rgraph)
#'
#' 	## Get the edge ids along paths in the original metabolic graph.
#' 	path.eids <- getPathsAsEIDs(ranked.p, ex_sbml)
#'
getPathsAsEIDs <- function(paths, graph){
    if(length(paths$y.labels)>1){
        eids <- lapply(paths$paths, getPaths, graph, paths$source.net)
        names(eids) = paths$y.labels

    }else{
        eids <- getPaths(paths$paths, graph, paths$source.net)
    }
    return(eids)
}


getPaths <- function(paths, graph, source.net){
    if(length(paths)==0) return(list())

    # graph source unknown
    if(is.null(graph$type) || is.null(source.net))
        return(lapply(paths, function(x) E(graph,path=x$genes)))

    # (G.graph,G.graph) or (R.graph, R.graph)
    if(graph$type == source.net)
        return(lapply(paths, function(x) E(graph,path=x$genes)))

    # (G.graph, R.graph)
    if(graph$type=="G.graph" && source.net=="R.graph"){
        gene.nodes <- lapply(paths, function(x) V(graph)[sub("^(.*)##", "",V(graph)$name) %in% x$genes])
        eid <- lapply(gene.nodes,function(x) E(graph)[x %--% x])

        return(eid)
    }

    # (R.graph, G.graph) or (MR.graph, G.graph) else (MR.graph, R.graph)
    if(source.net=="G.graph"){
        gene.nodes = lapply(paths, function(x) sub("^(.*)##", "",x$genes))
    }else{
        gene.nodes = lapply(paths, "[[", "genes")
    }

    # (R.graph, G.graph)
    if(graph$type=="R.graph")
        return(lapply(gene.nodes, function(x) E(graph, path=x)))

    # It must be MR.graph
    eid <- list()
    for(i in 1:length(paths)){
        deleted.edges <- unlist(lapply(lapply(grep("->",paths[[i]]$compounds),
                                function(x) unlist(strsplit(paths[[i]]$compounds[[x]], "->"))),
                        function(y) mapply(
                                    function(from,to)get.shortest.paths(graph, from, to, mode="out", output="epath"),
                                    head(y,-1), y[-1])
                ))

        paths[[i]]$compounds <- unlist(strsplit(paths[[i]]$compounds, "(->)|( \\+ )"))
        paths[[i]]$genes <- gene.nodes[[i]]
        eid[[i]] = c(as.numeric(deleted.edges),
                    E(graph)[.to(paths[[i]]$genes[1]) |
                        paths[[i]]$genes %--% paths[[i]]$compounds|
                        .from(tail(paths[[i]]$genes,1)) ]
                    )
    }
    return(eid)


}

# TODO: pvalue description
#' Extracting and ranking paths from a network
#'
#' Given a weighted igraph object, path ranking finds a set of node/edge sequences (paths) to
#' maximize the sum of edge weights.
#' \code{pathRanker(method="prob.shortest.path")} extracts the K most probable paths within
#' a weighted network.
#' \code{pathRanker(method="pvalue")} extracts a list of paths whose sum of edge weights are
#' significantly higher than random paths of the same length.
#'
#'
#' The input here is \code{graph}. A weight must be assigned to each edge. Bootstrapped Pearson correlation edge weights
#' can be assigned to each edge by \code{\link{assignEdgeWeights}}. However the specification of the edge weight is flexible
#' with the condition that increasing values indicate stronger relationships between vertices.
#'
#' \subsection{Probabilistic Shortest Paths}{
#' \code{pathRanker(method="prob.shortest.path")} finds the K most probable loopless paths given a weighted network.
#' Before the paths are ranked the edge weights are converted into probabilistic edge weights using the Empirical
#' Cumulative Distribution (ECDF) over all edge weights.  This is called ECDF edge weight. The ECDF edge weight
#' serves as a probabilistic rank of the most important gene-gene interactions.  The probabilistic nature of the ECDF
#' edge weights allow for a significance test to determine if a path contains any functional structure or is
#' simply a random walk.  The probability of a path is simily the product of all ECDF weights along the path.
#' This is computed as a sum of the logs of the ECDF edge weights.
#'
#' The follwing arguments can be passed to \code{pathRanker(method="prob.shortest.path")}:
#' \describe{
#' \item{\code{K}}{Maximum number of paths to extract. Defaults to 10.}
#' \item{\code{minPathSize}}{The minimum number of edges for each extracted path. Defualts to 1.}
#' \item{\code{normalize}}{Specify if you want to normalize the probabilistic edge weights (across different labels)
#' before extracting the paths. Defaults to TRUE.}
#' }
#' }
#'
#' \subsection{P-value method}{
#' \code{pathRanker(method="pvalue")} searches all paths between the specified start and end vertices, and if a
#' significant path is found it returns it. However, It doesn't search for the best path between the start and
#' terminal vertices,  as there could be many paths which lead to the same terminal vertex, and searching through
#' all of them is time comsuming.  We just stop when the first significant path is found.
#'
#' All provided edge weights are recaled from 0-1. Path significance is calculated based on the empirical distribution
#' of random paths of the same length. This can be estimated using \code{\link{samplePaths}} and passed as an argument.
#'
#' The follwing arguments can be passed to \code{pathRanker(method="pvalue")}:
#' \describe{
#' \item{\code{sampledpaths}}{The emripical results from \code{\link{samplePaths}}.}
#' \item{\code{alpha}}{The P value cut-off. Defualts to 0.01}
#' }
#' }
#' @param graph A weighted igraph object. Weights must be in \code{edge.weights} or \code{weight}
#' edge attributes.
#' @param method Which path ranking method to use.
#' @param start A list of start vertices, given by their vertex id.
#' @param end  A list of terminal vertices, given by their vertex id.
#' @param verbose Whether to display the progress of the function.
#' @param ... Method-specific parameters. See Details section.
#'
#' @return
#' A list of paths where each path has the following items:
#' \item{gene}{The ordered sequence of genes visited along the path.}
#' \item{compounds}{The ordered sequence of compounds visited along the path.}
#' \item{weights}{The ordered sequence of the log(ECDF edge weights) along the path.}
#' \item{distance}{The sum of the log(ECDF edge weights) along each path.  (a sum of logs is a product)}
#'
#'
#' @author Timothy Hancock, Ichigaku Takigawa, Nicolas Wicker and Ahmed Mohamed
#' @family Path ranking methods
#' @seealso getPathsAsEIDs, extractPathNetwork
#' @export
#' @examples
#' 	## Prepare a weighted reaction network.
#' 	## Conver a metabolic network to a reaction network.
#'  data(ex_sbml) # bipartite metabolic network of Carbohydrate metabolism.
#'  rgraph <- makeReactionNetwork(ex_sbml, simplify=TRUE)
#'
#' 	## Assign edge weights based on Affymetrix attributes and microarray dataset.
#'  # Calculate Pearson's correlation.
#' 	data(ex_microarray)	# Part of ALL dataset.
#' 	rgraph <- assignEdgeWeights(microarray = ex_microarray, graph = rgraph,
#' 		weight.method = "cor", use.attr="miriam.uniprot",
#' 		y=factor(colnames(ex_microarray)), bootstrap = FALSE)
#'
#' 	## Get ranked paths using probabilistic shortest paths.
#'  ranked.p <- pathRanker(rgraph, method="prob.shortest.path",
#' 					K=20, minPathSize=6)
#'
#' 	## Get significantly correlated paths using "p-valvue" method.
#' 	##   First, establish path score distribution by calling "samplePaths"
#'  pathsample <- samplePaths(rgraph, max.path.length=10,
#'                         num.samples=100, num.warmup=10)
#'
#' 	##   Get all significant paths with p<0.1
#' 	significant.p <- pathRanker(rgraph, method = "pvalue",
#'                 sampledpaths = pathsample ,alpha=0.1)
#'
pathRanker <- function(graph, method=c("prob.shortest.path", "pvalue") ,start, end, verbose=TRUE, ...){
    # Checking the graph
    if(is.null(E(graph)$edge.weights)){
        if(!is.null(E(graph)$weight))
            E(graph)$edge.weights <- E(graph)$weight
        else{
            stop("No edge weights provided.")
        }
    }

    if(length(method)>1 || !method %in% c("prob.shortest.path", "pvalue"))
        stop("Please specify the ranking method 'prob.shortest.path' or 'pvalue'.")

    if(method == "prob.shortest.path")
        return(rankShortestPaths(graph, start=start, end=end, verbose=verbose, ...))
    else return(rankPvalue(graph, start=start, end=end, verbose=verbose, ...))

}

rankShortestPaths <- function(graph, K=10, minPathSize=1, start, end, normalize = TRUE, verbose) {
    pg = processNetwork(graph, start, end, scale="ecdf" ,normalize)

    zret <- list()
    for (i in 1:ncol(pg$weights)) {
        if(verbose){
            if (ncol(pg$weights) > 1) message("Extracting the ",K," most probable paths for ",graph$y.labels[i])
            else message("Extracting the ",K," most probable paths.")
        }

        # min path size is increased by 2 to include start and end compounds.
        ps <- .Call("pathranker",
                pg$nodes,     #nodes
                pg$edges,                #Edges
                pg$weights[,i],            #Edge weights
                K=K,minpathsize = minPathSize+2)    #add 2 nodes to min path length ("s" & "t")



        idx <- which(sapply(ps,is.null))
        if (length(idx)>0) ps <- ps[-idx]
        if(length(ps)==0){
            if(ncol(pg$weights) > 1)
                message("  Warning:Counldn't find paths matching the criteria for ",graph$y.labels[i])
            else message("  Warning:Counldn't find paths matching the criteria.")
        }

        ps <- ps[order(sapply(ps,"[[", "distance"))] #order paths by distance

        if (ncol(pg$weights) > 1){
            zret[[i]] <- ps
        }else zret <- ps
    }

    if(length(zret)==0)return(NULL)

    if (ncol(pg$weights) > 1) {
        names(zret) <- graph$y.labels
        colnames(pg$weights) <- paste("prob",graph$y.labels,sep = ":")
    } else colnames(pg$weights) <- "prob"

    return(list(paths = zret,edge.weights = pg$weights, y.labels=graph$y.labels, source.net=graph$type))
}

rankPvalue <- function(graph, sampledpaths, start, end, alpha = 0.01, verbose) {
    # Checking the graph
    if(is.null(E(graph)$edge.weights)){
        if(!is.null(E(graph)$weight))
            E(graph)$edge.weights <- E(graph)$weight
        else{
            stop("No edge weights provided.")
        }
    }

    range <- ifelse(missing(sampledpaths), ecount(graph),
                    if(length(graph$y.labels)==1)nrow(sampledpaths)-1 else nrow(sampledpaths[[1]])-1)

    # Defining start reactions end their scopes.
    if(missing(start)){
        if(verbose) message("Start nodes weren't provided. Using entry nodes as start points.")
        start <- V(graph)[degree(graph,mode="in")==0]$name
    }
    if(missing(end)){
        end <- V(graph)[ unique(unlist(neighborhood(graph, order=range, nodes=start, mode="out"))) ]
    }

    pg = processNetwork(graph, start, end, scale="rescale", normalize=FALSE)

    zret=NULL
    for (i in 1:ncol(pg$weights)) {
        if(verbose){
            if (ncol(pg$weights) > 1) message("Extracting non-random path p<",alpha," for ",graph$y.labels[i])
            else message("Extracting non-random path p<",alpha)
        }

        p.sample <- if(missing(sampledpaths)) NULL
                    else if(ncol(pg$weights) > 1) t(sampledpaths[[i]])
                    else t(sampledpaths)

        ps <- .Call("scope",
                pg$nodes,
                pg$edges,
                pg$weights[,i],    #add column subscript
                SAMPLEDPATHS = p.sample,
                ALPHA = alpha,
                ECHO = as.integer(verbose))

        idx <- which(sapply(ps$paths,is.null))
        ps$paths <- ps$paths[-idx]

        if(length(ps)==0) stop("couldn't find any significant paths")

        ps$paths <- ps$paths[order(sapply(ps$paths,"[[", "pvalue"))] #order paths by pvalue

        if (ncol(pg$weights) > 1) zret[[i]] <- ps
        else zret <- ps
    }
    if (ncol(pg$weights) > 1)
        names(zret) <- graph$y.labels

    zret$y.labels=graph$y.labels
    zret$source.net = graph$type

    return(zret)
}

#' Creates a set of sample path p-values for each length given a weighted network
#'
#' Randomly traverses paths of increasing lengths within a set network to create an
#' empirical pathway distribution for more accurate determination of path significance.
#'
#' Can take a bit of time.
#'
#' @param graph A weighted igraph object. Weights must be in \code{edge.weights} or \code{weight}
#' edge attributes.
#' @param max.path.length The maxmimum path length.
#' @param num.samples The numner of paths to sample
#' @param num.warmup The number of warm up paths to sample.
#' @param verbose Whether to display the progress of the function.
#'
#' @return A matrix where each row is a path length and each column is the number of paths sampled.
#'
#' @author Timothy Hancock
#' @author Ahmed Mohamed
#' @family Path ranking methods
#' @export
#' @examples
#' 	## Prepare a weighted reaction network.
#' 	## Conver a metabolic network to a reaction network.
#'  data(ex_sbml) # bipartite metabolic network of Carbohydrate metabolism.
#'  rgraph <- makeReactionNetwork(ex_sbml, simplify=TRUE)
#'
#' 	## Assign edge weights based on Affymetrix attributes and microarray dataset.
#'  # Calculate Pearson's correlation.
#' 	data(ex_microarray)	# Part of ALL dataset.
#' 	rgraph <- assignEdgeWeights(microarray = ex_microarray, graph = rgraph,
#' 		weight.method = "cor", use.attr="miriam.uniprot",
#' 		y=factor(colnames(ex_microarray)), bootstrap = FALSE)
#'
#' 	## Get significantly correlated paths using "p-valvue" method.
#' 	##   First, establish path score distribution by calling "samplePaths"
#'  pathsample <- samplePaths(rgraph, max.path.length=10,
#'                         num.samples=100, num.warmup=10)
#'
#' 	##   Get all significant paths with p<0.1
#' 	significant.p <- pathRanker(rgraph, method = "pvalue",
#'                 sampledpaths = pathsample ,alpha=0.1)
#'
samplePaths <- function(graph, max.path.length, num.samples=1000, num.warmup=10, verbose=TRUE){
    # Checking the graph
    if(is.null(E(graph)$edge.weights)){
        if(!is.null(E(graph)$weight))
            E(graph)$edge.weights <- E(graph)$weight
        else{
            stop("No edge weights provided.")
        }
    }

    if (max.path.length > vcount(graph)-1) {
        if(verbose) message("Maximum Path Length is larger than Number of Network Vertices ... setting them to be equal.")
        max.path.length <-  vcount(graph)-1
    }

    # Get edge.weights and apply ecdf on each column
    edge.weights = do.call("rbind", E(graph)$edge.weights)
    edge.probs <- apply(edge.weights, 2, rescale, c(1,0))


    # Prepare edgelist as required by PathRanker method.
    label = lapply(E(graph)$attr, paste, collapse=' + ')    # For edges annotated with >1 compound, concatenate.
    edgelist = get.edgelist(graph, names=FALSE)
    edgelist = data.frame(from=edgelist[,1], to=edgelist[,2], label=unlist(label), stringsAsFactors=FALSE)

    pg = list(nodes=V(graph)$name, edges=edgelist, weights=edge.probs)

    if(ncol(pg$weights) == 1){
        ps <- .Call("samplepaths",
                pg$nodes,
                pg$edges,
                pg$weights,
                MAXPATHLENGTH = as.integer(max.path.length),
                SAMPLEPATHS = as.integer(num.samples),
                WARMUPSTEPS = as.integer(num.warmup)
              )

        return( matrix(ps,max.path.length+1,num.samples,byrow = TRUE) )
    }else{
        ps <- list()
        for (i in 1:ncol(pg$weights)) {
            if(verbose) message("Sampling",num.samples," for",graph$y.labels[i],"\n")
            p.samplei <- .Call("samplepaths",
                    pg$nodes,
                    pg$edges,
                    pg$weights,
                    MAXPATHLENGTH = as.integer(max.path.length),
                    SAMPLEPATHS = as.integer(num.samples),
                    WARMUPSTEPS = as.integer(num.warmup)
            )
            ps[[i]] <- matrix(p.samplei, max.path.length+1, num.samples, byrow = TRUE)
        }
        names(ps) <- graph$y.labels
        return(ps)
    }
}

processNetwork <- function(graph, start, end, scale=c("ecdf", "rescale"), normalize){
    # Add S, T vetrices for the shortest path algorithm.
    snodes <- if(missing(start)) V(graph)[degree(graph,mode="in")==0]$name else V(graph)[start]$name
    enodes <- if(missing(end)) V(graph)[degree(graph,mode="out")==0]$name else V(graph)[end]$name
    enodes <- enodes[!enodes %in% snodes]

    graph <- graph + vertices("s", "t")
    graph <- add.edges(graph,
            c(rbind("s",snodes),
              rbind(enodes,"t")),
            attr=list(edge.weights=list(rep(1, length(unlist(graph$y.labels)) )),
                    compound=""))

    # Get edge.weights and apply ecdf on each column
    edge.weights <- do.call("rbind", as.list(E(graph)$edge.weights))

    if(sum(!is.finite(edge.weights))>0){
        warning("Edge weights contain non-finite numbers. Setting them to the minimum edge weight")

        edge.weights <- apply(edge.weights, 2,
                            function(x){
                                x[ !is.finite(x) ] <- min( x[ is.finite(x) ] )
                                return(x)
                            })
    }
    if(scale=="ecdf")
        edge.probs <- apply(edge.weights, 2, function(x) ecdf(x[1:ecount(graph)])(x))
    else edge.probs <- apply(edge.weights, 2, rescale, c(1,0))


    # Normalize across Y labels
    if (ncol(edge.probs) > 1 & normalize == TRUE) {
        edge.probs <- edge.probs / rowSums(edge.probs)
    }

    if(scale=="ecdf")
        edge.probs <- -log(edge.probs)

    # Prepare edgelist as required by PathRanker method.
    label = lapply(E(graph)$compound, paste, collapse=' + ')    # For edges annotated with >1 compound, concatenate.
    edgelist = get.edgelist(graph, names=FALSE)
    edgelist = data.frame(from=edgelist[,1], to=edgelist[,2], label=unlist(label), stringsAsFactors=FALSE)


    return(list(nodes=V(graph)$name, edges=edgelist, weights=edge.probs))
}

#Adapted from scales package
rescale <- function (x, to = c(0, 1), from = range(x, na.rm = TRUE)){
    (x - from[1])/diff(from) * diff(to) + to[1]
}
Any scripts or data that you put into this service are public.
NetPathMiner documentation built on Nov. 8, 2020, 8:20 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
NetPathMiner
NetPathMiner for Biological Network Construction, Path Mining and Visualization

R/pathRank.R
In NetPathMiner: NetPathMiner for Biological Network Construction, Path Mining and Visualization

Defines functions processNetwork samplePaths rankPvalue rankShortestPaths pathRanker getPaths getPathsAsEIDs extractPathNetwork

Documented in extractPathNetwork getPathsAsEIDs pathRanker samplePaths

Try the NetPathMiner package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

NetPathMiner NetPathMiner for Biological Network Construction, Path Mining and Visualization

R/pathRank.R In NetPathMiner: NetPathMiner for Biological Network Construction, Path Mining and Visualization

Defines functions processNetwork samplePaths rankPvalue rankShortestPaths pathRanker getPaths getPathsAsEIDs extractPathNetwork

Documented in extractPathNetwork getPathsAsEIDs pathRanker samplePaths

Try the NetPathMiner package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

NetPathMiner
NetPathMiner for Biological Network Construction, Path Mining and Visualization

R/pathRank.R
In NetPathMiner: NetPathMiner for Biological Network Construction, Path Mining and Visualization