R/PCSF_rand.R

#' Prize-collecting Steiner Forest (PCSF) with randomized edge costs
#'
#' \code{PCSF_rand} returns a union of subnetworks obtained by solving the PCSF on the 
#' given interaction network by adding a random noise to edge costs each time.
#' @param ppi An interaction network as an \pkg{igraph} object.
#' @param terminals  A list of terminal genes with prizes to be analyzed in the PCSF context. 
#' A named \code{numeric} vector, where terminal genes are named same as in the interaction network 
#' and numeric values correspond to the importance of the gene within the study.
#' @param n An \code{integer} value to determine the number of runs with random noise added edge costs. 
#' A default value is 10.
#' @param r A \code{numeric} value to determine additional random noise to edge costs. 
#' A random noise upto r percent of the edge cost is added to each edge. A default value is 0.1
#' @param w A  \code{numeric} value for tuning the number of trees in the output. A default value is 2.
#' @param b A  \code{numeric} value for tuning the node prizes. A default value is 1.
#' @param mu A  \code{numeric} value for a hub penalization. A default value is 0.0005.
#' @param dummies A list of nodes that are to connected to the root of the tree. If missing the root will be connected to all terminals.
#' @return The final subnetwork obtained by taking the union of the PCSF outputs generated by 
#' adding a random noise to edge costs each time. It returns an \pkg{igraph} object with the node prize 
#' and edge cost attributes representing the total number of show ups throughout all runs.
#' @import igraph
#' @export
#' 
#' 
#' @details 
#' 
#' In order to increase the robustness of the resulting structure, 
#' it is recommended to solve the PCSF several times on the same network
#' while adding some noise to the edge costs each time, and combine all results 
#' in a final subnetwork. The union of all outputs may explain 
#' the underlying biology better. 
#' 
#' @examples
#' \dontrun{
#' library("PCSF")
#' data("STRING")
#' data("Tgfb_phospho")
#' terminals <- Tgfb_phospho
#' ppi <- construct_interactome(STRING)
#' subnet <- PCSF_rand(ppi, terminals, n = 10, r =0.1, w = 2, b = 2, mu = 0.0005)}
#' 
#' @author Murodzhon Akhmedov
#'  
#' @seealso \code{\link{PCSF}}, \code{\link{plot.PCSFe}}
#' 
#' @references 
#' Akhmedov M., LeNail A., Bertoni F., Kwee I., Fraenkel E., and Montemanni R. (2017)
#' A Fast Prize-Collecting Steiner Forest Algorithm for Functional Analyses in Biological Networks.
#' \emph{Lecture Notes in Computer Science}, to appear.


PCSF_rand <-
function(ppi, terminals, n = 10, r = 0.1, w = 2, b = 1, mu = 0.0005,dummies){
  
  # Checking function arguments
  if (missing(ppi))
    stop("Need to specify an interaction network \"ppi\".")
  if (class(ppi) != "igraph")
    stop("The interaction network \"ppi\" must be an igraph object.")
  if (missing(terminals))
    stop("  Need to provide terminal nodes as a named numeric vector, 
    where node names must be same as in the interaction network.")
  if(is.null(names(terminals)))
    stop("  The terminal nodes must be provided as a named numeric vector, 
    where node names must be same as in the interaction network.")

  
  # Gather the terminal genes to be analyzed, and their scores
  terminal_names = names(terminals)
  terminal_values = as.numeric(terminals)
  
  # Incorporate the node prizes
  node_names = V(ppi)$name
  node_prz = vector(mode = "numeric", length = length(node_names))
  index = match(terminal_names, node_names)
  percent = signif((length(index) - sum(is.na(index)))/length(index)*100, 4)
  if (percent < 5)
    stop("  Less than 1% of your terminal nodes are matched in the interactome, check your terminals!")
  cat(paste0("  ", percent, "% of your terminal nodes are included in the interactome\n"))
  terminal_names = terminal_names[!is.na(index)]
  terminal_values = terminal_values[!is.na(index)]
  index = index[!is.na(index)]
  node_prz[index] =  terminal_values

  ## Prepare input file for MST-PCSF implementation in C++
  if(missing(dummies)||is.null(dummies)||is.na(dummies))
    dummies = terminal_names #re-assign this to allow for input
  
  cat("  Solving the PCSF by adding random noise to the edge costs...\n")
  
  # Calculate the hub penalization scores
  node_degrees = igraph::degree(ppi)
  hub_penalization = - mu*node_degrees
  
  
  # Update the node prizes
  node_prizes = b*node_prz
  index = which(node_prizes==0)
  node_prizes[index] = hub_penalization[index]
  
  # Construct the list of edges 
  edges = ends(ppi,es = E(ppi))
  from = c(rep("DUMMY", length(dummies)), edges[,1])
  to = c(dummies, edges[,2])

  all_nodes=NULL
  
  # Run the MST-PCSF algorithm for n times with random noise added to edge costs at each time
  for(i in 1:n){
    
    # Randomize the edge costs
    cost = c(rep(w, length(dummies)), E(ppi)$weight + E(ppi)$weight*stats::runif(length(E(ppi)), 0, r))
    
    # Feed in the input files into MSt-PCSF algorithm
    output = call_sr(from,to,cost,node_names,node_prizes)
    
    # Construct an igraph object for current parameter set, and save it
    edge = data.frame(as.character(output[[1]]),as.character(output[[2]]))
    colnames(edge)= c("source", "target")
    edge = edge[which(edge[,1]!="DUMMY"), ]
    edge = edge[which(edge[,2]!="DUMMY"), ]
    graph = graph.data.frame(edge,directed=F)
    assign(paste0("graph_",i), get("graph"))
    all_nodes = c(all_nodes, V(graph)$name)
  }
  
  # Calculate graph statistics
  node_frequency = table(all_nodes)
  node_names = names(node_frequency)
  node_prizes =  as.numeric(node_frequency)
  
  # Combine the graphs in order to get unionized graph
  adj_matrix = matrix(0, length(node_names), length(node_names))
  colnames(adj_matrix) = node_names
  rownames(adj_matrix) = node_names
  for(i in 1:n){
    assign("graph", get(paste0("graph_",i)))
    edges = ends(graph,es = E(graph))
    x = match(edges[,1],node_names)
    y = match(edges[,2],node_names)
    if(length(x)>0 & length(y)>0){
      for( j in 1:length(x)){
        if(x[j]>=y[j]){
          k = x[j]
          l = y[j]
        }else{
          k = y[j]
          l = x[j]
        }
        adj_matrix[k,l] = adj_matrix[k,l]+1
      }
    }
  }
  
  # Check the size of output subnetwork and print a warning if it is 0
  if(sum(adj_matrix) != 0){
    
    # Construct the igraph object from the union graph
    subnet = graph_from_adjacency_matrix(adj_matrix, weighted=TRUE, mode="undirected")
    index = match(V(subnet)$name, node_names)
    V(subnet)$prize = node_prizes[index]
    
    #   # Associate the type of nodes to shape
    #   V(subnet)$shape = "triangle"
    #   index = match(terminal_names, V(subnet)$name)
    #   index = index[!is.na(index)]
    #   V(subnet)$shape[index] = "circle"
    
    # Associate the type of nodes
    V(subnet)$type = "Steiner"
    index = match(terminal_names, V(subnet)$name)
    index = index[!is.na(index)]
    V(subnet)$type[index] = "Terminal"
    
    
    class(subnet) <- c("PCSF", "igraph")
    
    return (subnet)
    
  } else {

    stop("  Subnetwork can not be identified for a given parameter set.
    Provide a compatible b or mu value with your terminal prize list...\n\n")
  }


}
IOR-Bioinformatics/PCSF documentation built on June 2, 2019, 10:03 p.m.