Use read_simple_graphs
to read graph data from all files in a directory that
match a specific filename pattern in a format suitable for calculating
graphlet-based feature counts using the
ORCA package.
We use igraph::read_graph
to read graph data from files, so support
all file formats it supports. See help for igraph::read_graph
for a list of
supported values for the format
parameter and the igraph documentation
for descriptions of each of the supported file formats.
The ORCA package we use to efficiently calculate graphlet and orbit counts
requires that graphs are undirected, simple (i.e. have no self-loops or
multiple edges) and connected (i.e. have no isolated vertices). Therefore, by
default, graphs loaded by read_simple_graphs
will be coerced to have the above
properties. This can be avoided by setting the relevant as_undirected
,
remove_loops
, remove_multiple
or remove_isolates
parameters to FALSE
.
library("netdist") edge_format = "ncol" # Load reference graph (used for Netdis. Not required for NetEMD) ref_path = file.path(system.file(file.path("extdata", "random"), package = "netdist"), "ER_1250_10_1") ref_graph <- read_simple_graph(ref_path, format = edge_format) # Set source directory and file properties for Virus PPI graph edge files source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist") edge_format <- "ncol" file_pattern <- "*" # Load all graphs in the source folder matching the filename pattern query_graphs <- read_simple_graphs(source_dir, format = edge_format, pattern = file_pattern) print(names(query_graphs))
In this example we will use counts of graphlets containing up to 4 nodes and consider ego-network neighbourhoods of size 2 (i.e. the immediate neighbours of each node plus their immediate neighbours).
# Set the maximum graphlet size to compute counts for max_graphlet_size <- 4 neighbourhood_size <- 2
# Calculate netdis measure for graphlets up to size max_graphlet_size netdis_result <- netdis_many_to_many(query_graphs, ref_graph, max_graphlet_size = max_graphlet_size, neighbourhood_size = neighbourhood_size) # Netdis measure for graphlets of size 3 res3 <- netdis_result$netdis["netdis3", ] netdis3_mat <- cross_comp_to_matrix(res3, netdis_result$comp_spec) print("Netdis: graphlet size = 3") print(netdis3_mat) # Netdis measure for graphlets of size 4 res4 <- netdis_result$netdis["netdis4", ] netdis4_mat <- cross_comp_to_matrix(res4, netdis_result$comp_spec) print("Netdis: graphlet size = 4") print(netdis4_mat)
graphdists <- as.dist(netdis4_mat) par(mfrow = c(1, 2)) cex <- 1 # Dendrogram based on Netdis measure for graphlets of size 3 title <- paste("Netdis: graphlet size = ", 3, sep = "") plot(phangorn::upgma(as.dist(netdis3_mat), method = "average"), use.edge.length = FALSE, edge.width = cex*2, main = title, cex.lab = cex, cex.axis = cex, cex.main = cex, cex.sub = cex, cex = cex) # Dendrogram based on Netdis measure for graphlets of size 4 title = paste("Netdis: graphlet size = ", 4, sep = "") plot(phangorn::upgma(as.dist(netdis4_mat), method = "average"), use.edge.length = FALSE, edge.width = cex*2, main = title, cex.lab = cex, cex.axis = cex, cex.main = cex, cex.sub = cex, cex = cex)
cex <- 1.5 col <- colorRampPalette(colors = c("blue","white"))(100) title <- paste("Netdis: graphlet size = ", 3, sep = "") heatmap(netdis3_mat, Rowv = NULL, Colv = NULL, col = col, main = title, cexRow = cex, cexCol = cex, symm = TRUE)
cex <- 1.5 col <- colorRampPalette(colors = c("blue","white"))(100) title <- paste("Netdis: graphlet size = ", 4, sep = "") heatmap(netdis4_mat, Rowv = NULL, Colv = NULL, col = col, main = title, cexRow = cex, cexCol = cex, symm = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.