R/utils_network.R

Defines functions sif_to_igraph import_graph pathway_statistics igraph_to_sif determine_union_groups create_union_network

Documented in create_union_network determine_union_groups igraph_to_sif import_graph pathway_statistics sif_to_igraph

#' Creates a union network for the pathways of a specific configuration
#'
#' @param configuration  Configuration object for which the union network should be computed.
#'
#' @return The union network as a pathway object
#' @importFrom igraph graph_from_data_frame
create_union_network <- function(configuration) {
  pathways <- configuration@pathways

  if (length(pathways) != 0) {
    # Creates a union network of all pathways of a specific configuration
    for (i in 1:length(pathways)) {
      if (i == 1) {
        union_network <- igraph::graph_from_data_frame(vertices = pathways[[i]]@nodes, d = pathways[[i]]@edges, directed = FALSE)
      } else {
        union_network <- igraph::graph.union(union_network, graph_from_data_frame(vertices = pathways[[i]]@nodes, d = pathways[[i]]@edges, directed = FALSE), byname = "auto")
      }
    }
    # Change col names
    edges <- igraph::as_data_frame(x = union_network, what = "edges")
    names(edges) <- c("source", "target")
    nodes <- igraph::as_data_frame(x = union_network, what = "vertices")
    names(nodes) <- c("node")
    nodes <- determine_union_groups(nodes, configuration)
    return(new("Pathway",
      edges = edges,
      nodes = nodes,
      num_edges = as.integer(igraph::ecount(union_network)),
      num_nodes = as.integer(igraph::vcount(union_network))
    ))
  } else {
    return(new("Pathway"))
  }
}

#' Determines from which pathways a node originates
#'
#' @param configuration  Configuration object for which the groups should be determined,
#'
#' @return Data frame with the node ids and the pathways they originate from
determine_union_groups <- function(union_nodes, configuration) {
  union_nodes[["group"]] <- list(c())
  pathways <- configuration@pathways

  for (id in union_nodes$node) {
    for (pathway in names(pathways)) {
      if (id %in% pathways[[pathway]]@nodes$node) {
        union_nodes$group[union_nodes$node == id][[1]] <- c(union_nodes$group[union_nodes$node == id][[1]], pathway)
      }
    }
  }
  union_nodes$group <- lapply(X = union_nodes$group, FUN = function(x) paste(x, collapse = ", "))
  return(union_nodes)
}

#' Converts igraph object to sif file.
#'
#' @param biological_network Biologial network as igraph object.
#' @param path Path where the file should be saved.
#'
#' @export
#'
#' @import igraph
igraph_to_sif <- function(biological_netwrok, path) {
  edges <- as_data_frame(biological_netwrok, what = c("edges"))
  edges$interaction_type <- "pp"
  edges <- edges[, c("from", "interaction_type", "to")]
  write.table(edges, row.names = FALSE, col.names = FALSE, quote = FALSE, sep = "\t", file = path)
}

#' Computes pathway statistics for all pathways in a result object.
#'
#' @param indicator_matrix Indicator matrix for the computation of the pathway statistics.
#' @param result Result object obtained from a KeyPathwayMiner execution.
#'
#' @export
pathway_statistics <- function(indicator_matrix, result) {
  configurations <- get_configurations(result)
  message("Computing pathway statistics.")

  # Filter out configurations with 0 pathways ####
  message("\tFiltering out configurations with no pathways.")
  removed <- 0
  for (configuration in configurations) {
    if (length(get_pathways(result_object = result, configuration_name = configuration)) == 0) {
      # If no pathways exist remove configuration from solution set
      result <- remove_configuration(result_object = result, configuration_name = configuration)
      removed <- removed + 1
    }
  }
  message(paste0("\tFiltering completed. Removed ", removed, " configurations."))
  # Get updated configurations
  configurations <- get_configurations(result)
  # Computing pathway statistics####
  message(paste0("Computing pathway statistics for ", length(configurations), " configurations and ", length(configurations) * result@parameters$computed_pathways, " pathways."))

  # For all configurations in the result object
  for (configuration in configurations) {
    configuration_object <- get_configuration(result_object = result, configuration_name = configuration)
    # Pathways
    for (pathway_name in names(configuration_object@pathways)) {
      pathway <- get_pathway(configuration = configuration_object, pathway_name = pathway_name)
      nodes <- pathway@nodes$node
      pathway_matrix <- indicator_matrix[indicator_matrix[, 1] %in% nodes, ]
      pathway <- set_avg_exp(pathway = pathway, new_avg_exp = round(sum(rowSums(pathway_matrix[-1])) / length(nodes), 2))
      pathway <- set_num_edges(pathway = pathway, num_edges = nrow(pathway@edges))
      pathway <- set_num_nodes(pathway = pathway, num_nodes = length(nodes))
      if (result@parameters$strategy == "INES") {
        # If it was an INES run we also determine the exception nodes of the pathway
        # At most l exceptions are allowed
        exceptions <- tibble(exception = !(ncol(pathway_matrix[-1]) - rowSums(pathway_matrix[-1]) <= configuration_object@l_values[1]), pathway_matrix[1])
        # Get nodes that were not in the provide indicator matrix and set them as exception node since no information is provided
        temp <- tibble(exception = c(TRUE), nodes[!as.character(nodes) %in% unlist(exceptions[, 2])])
        names(temp)[2] <- names(exceptions)[2]
        exceptions <- rbind(exceptions, temp)
        names(exceptions$exception) <- NULL
        pathway@nodes <- merge(x = pathway@nodes, y = exceptions, by.x = names(pathway@nodes)[1], by.y = names(exceptions)[2])
      }
      result <- set_pathway(result_object = result, configuration_name = configuration, pathway_name = pathway_name, pathway = pathway)
    }
    # Union network
    union_network <- configuration_object@union_network
    nodes <- union_network@nodes$node
    pathway_matrix <- indicator_matrix[indicator_matrix[, 1] %in% nodes, ]
    union_network <- set_avg_exp(pathway = configuration_object@union_network, new_avg_exp = round(sum(rowSums(pathway_matrix[-1])) / length(nodes), 2))
    union_network <- set_num_edges(pathway = union_network, num_edges = nrow(union_network@edges))
    union_network <- set_num_nodes(pathway = union_network, num_nodes = length(nodes))
    if (result@parameters$strategy == "INES") {
      exceptions <- tibble(exception = !(ncol(pathway_matrix[-1]) - rowSums(pathway_matrix[-1]) <= configuration_object@l_values[1]), pathway_matrix[1])
      # Get nodes that were not in the provide indicator matrix and set them as exception node since no information is provided
      temp <- tibble(exception = c(TRUE), nodes[!as.character(nodes) %in% unlist(exceptions[, 2])])
      names(temp)[2] <- names(exceptions)[2]
      exceptions <- rbind(exceptions, temp)
      names(exceptions$exception) <- NULL
      union_network@nodes <- merge(x = union_network@nodes, y = exceptions, by.x = names(union_network@nodes)[1], by.y = names(exceptions)[2])
    }
    result <- set_pathway(result_object = result, configuration_name = configuration, pathway = union_network, union = TRUE)
  }
  return(result)
}

#' Reads graph file and converts it to igraph object
#'
#' Possible formats are c("sif","gml","graphml", "xlsx","custom").
#'
#' \strong{Important}: For the sif format please also specify if the sep is a TAB or a SPACE.
#'
#' @param file Path to file
#' @param format Fomrat of the File
#' @param sep You can also use a custom seperator like  TAB or SPACE
#'
#' @return igraph object from the network
#' @export
#'
#' @import igraph
#' @importFrom openxlsx read.xlsx
import_graph <- function(file, format, sep) {
  if (tolower(format) == "gml") {
    message("GML-->iGraph")
    return(igraph::read_graph(file = file, format = "gml"))
  } else if (tolower(format) == "graphml") {
    message("GraphML-->iGraph")
    return(igraph::read_graph(file = file, format = "graphml"))
  } else if (tolower(format) == "xlsx") {
    message("XLSX-->iGraph")
    df <- openxlsx::read.xlsx(xlsxFile = file)
    return(igraph::graph_from_data_frame(df, directed = FALSE, vertices = NULL))
  } else if (tolower(format) == "sif") {
    message("SIF-->iGraph")
    return(sif_to_igraph(filepath = file, sep = sep))
  } else if (tolower(format) == "csv") {
    message("CSV-->iGraph")
    df <- read.csv(file = file)
    return(igraph::graph_from_data_frame(data.frame(source = df[, 1], target = df[, 2]),
      directed = FALSE, vertices = NULL
    ))
  } else if (tolower(format) == "custom") {
    message("CUSTOM-->iGraph")
    df <- read.table(file, sep = sep)
    return(igraph::graph_from_data_frame(data.frame(source = df[, 1], target = df[, 2]),
      directed = FALSE, vertices = NULL
    ))
  } else {
    message("Not a supported format. Please specifiy supported format.")
  }
}

#' Convert sif file to iGraph object
#'
#' @param filepath Filepath to the sif file
#' @param sep Either TAB or SPACE
#'
#' @return igraph object from the network
#' @export
#'
#' @importFrom igraph graph_from_data_frame
sif_to_igraph <- function(filepath, sep = sep) {
  file <- file(filepath, "r")
  df <- data.frame(source = c(), target = c())
  while (T) {
    line <- readLines(file, n = 1, warn = F)
    # If we reached the end of the file
    if (length(line) == 0) {
      break
    }
    # Split string
    str_vector <- unlist(strsplit(x = line, split = sep))
    # Save source node
    source <- str_vector[1]
    if (length(str_vector) >= 3) {
      for (string in str_vector[3:length(str_vector)]) {
        df <- rbind(df, c(source, string))
      }
    }
  }
  close(file)
  return(igraph::graph_from_data_frame(d = df, directed = FALSE))
}
baumbachlab/keypathwayminer-R documentation built on June 29, 2023, 11:21 a.m.