inst/doc/benchmarking.R

## ----setup, include = FALSE---------------------------------------------------
library("leiden")
library("reticulate")
py_available()
module <- py_available() && py_numpy_available() && py_module_available("leidenalg") && py_module_available("igraph")
# if(module){
#   reticulate::install_miniconda()
#   py_config()$python
#   reticulate::conda_create("r-reticulate")
#   reticulate::use_condaenv("r-reticulate")
#   conda_install("r-reticulate", "numpy")
#   conda_install("r-reticulate", "scipy")
#   reticulate::conda_install("r-reticulate", "python-igraph")
#   reticulate::py_install("r-reticulate", "leidenalg")
#   module <- py_module_available("leidenalg") && py_module_available("igraph")
# }
if(module){
  leidenalg <- import("leidenalg", delay_load = TRUE)
  ig <- import("igraph", delay_load = TRUE)
}

## -----------------------------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  eval = module
)

## -----------------------------------------------------------------------------
print(module)

## -----------------------------------------------------------------------------
paste(Sys.info()[c(4, 2, 1)])

## -----------------------------------------------------------------------------
R.version$version.string

## ----eval=FALSE---------------------------------------------------------------
#  library("reticulate")
#  py_install("python-igraph")
#  py_install("leidenalg")

## ----eval=module--------------------------------------------------------------
partition <- py$partition$membership + 1
table(partition)

## -----------------------------------------------------------------------------
library("igraph")
library("reticulate")
library("RColorBrewer")
graph_object <- graph.famous("Zachary")
node.cols <- brewer.pal(max(c(3, partition)),"Pastel1")[partition]
plot(graph_object, vertex.color = node.cols, layout=layout_with_kk)

## ----eval=module--------------------------------------------------------------
partition <- py$partition$membership + 1
table(partition)

## -----------------------------------------------------------------------------
graph_object <- graph.famous("Zachary")
node.cols <- brewer.pal(max(c(3, partition)),"Pastel1")[partition]
plot(graph_object, vertex.color = node.cols, layout=layout_with_kk)

## ----eval=module--------------------------------------------------------------
partition <- py$partition$membership + 1
table(partition)

## ----eval=module--------------------------------------------------------------
graph_object <- graph.famous("Zachary")
node.cols <- brewer.pal(max(c(3, partition)),"Pastel1")[partition]
plot(graph_object, vertex.color = node.cols, layout=layout_with_kk)

## ----eval=module--------------------------------------------------------------
bash_py_time <- as.numeric(readLines("bash_py_time"))

## ----eval=module--------------------------------------------------------------
leidenalg <- import("leidenalg", delay_load = TRUE)
ig <- import("igraph", delay_load = TRUE)
G = ig$Graph$Famous('Zachary')
G$summary()
partition = leidenalg$find_partition(G, leidenalg$ModularityVertexPartition)
partition$membership

## ----eval=module--------------------------------------------------------------
leidenalg <- import("leidenalg", delay_load = TRUE)
ig <- import("igraph", delay_load = TRUE)
G = ig$Graph$Famous('Zachary')
G$summary()
start <- Sys.time()
for(ii in 1:100){
  partition = leidenalg$find_partition(G, leidenalg$ModularityVertexPartition)
}
end <- Sys.time()
partition$membership
reticulate_time <- difftime(end, start)[[1]]
print(paste(c("leiden time:", reticulate_time, "seconds"), collapse = " "))

## ----eval=FALSE---------------------------------------------------------------
#  install.packages("leiden")

## ----eval=FALSE, include=FALSE------------------------------------------------
#  install.packages("leiden",  quiet = TRUE, repos = 1)
#  devtools::install_github("TomKellyGenetics/leiden", ref = "dev")

## -----------------------------------------------------------------------------
R.version.string

## -----------------------------------------------------------------------------
library("igraph")
library("leiden")

## -----------------------------------------------------------------------------
G <- graph.famous("Zachary")
summary(G)

## ----eval=module--------------------------------------------------------------
partition <- leiden(G, "ModularityVertexPartition", legacy = TRUE)
partition

## ----eval=module--------------------------------------------------------------
table(partition)

## -----------------------------------------------------------------------------
library("igraph")
library("reticulate")
library("RColorBrewer")
node.cols <- brewer.pal(max(c(3, partition)),"Pastel1")[partition]
plot(G, vertex.color = node.cols, layout=layout_with_kk)

## ----eval=module--------------------------------------------------------------
partition <- leiden(G, "CPMVertexPartition", resolution_parameter = 0.05, legacy = TRUE)
partition

## ----eval=module--------------------------------------------------------------
table(partition)

## ----eval=module--------------------------------------------------------------
node.cols <- brewer.pal(max(c(3, partition)),"Pastel1")[partition]
plot(G, vertex.color = node.cols, layout=layout_with_kk)

## ----eval=module--------------------------------------------------------------
partition <- leiden(G, "RBConfigurationVertexPartition", resolution_parameter = 1.5)
partition

## ----eval=module--------------------------------------------------------------
table(partition)

## ----eval=module--------------------------------------------------------------
node.cols <- brewer.pal(max(c(3, partition)),"Pastel1")[partition]
plot(G, vertex.color = node.cols, layout=layout_with_kk)

## ----eval=module--------------------------------------------------------------
G <- as.undirected(G, mode = "each")
is.directed(G)
partition <- leiden(G, "ModularityVertexPartition", legacy = FALSE)
partition

## ----eval=module--------------------------------------------------------------
table(partition)

## -----------------------------------------------------------------------------
library("igraph")
library("reticulate")
library("RColorBrewer")
node.cols <- brewer.pal(max(c(3, partition)),"Pastel1")[partition]
plot(G, vertex.color = node.cols, layout=layout_with_kk)

## ----eval=module--------------------------------------------------------------
partition <- membership(cluster_leiden(G, objective_function = "modularity"))
partition
table(partition)

## ----eval=module--------------------------------------------------------------
partition <- leiden(G, "CPMVertexPartition", resolution_parameter = 0.1, legacy = FALSE)
partition
table(partition)

## ----eval=module--------------------------------------------------------------
node.cols <- brewer.pal(max(c(3, partition)),"Pastel1")[partition]
plot(G, vertex.color = node.cols, layout=layout_with_kk)

## ----cache=TRUE, , eval=module------------------------------------------------
G <- graph.famous('Zachary')
summary(G)
start <- Sys.time()
for(ii in 1:100){
  partition <- leiden(G, "ModularityVertexPartition", legacy = TRUE)
}
end <- Sys.time()
table(partition)
R_graph_time = difftime(end, start)[[1]]
print(paste(c("leiden time:", R_graph_time, "seconds"), collapse = " "))

## ----cache=TRUE, eval=module--------------------------------------------------
G <- graph.famous('Zachary')
summary(G)

start <- Sys.time()
for(ii in 1:100){
  adj_mat <- as_adjacency_matrix(G, sparse = FALSE)
}
end <- Sys.time()
dim(adj_mat)
R_mat_cast_time = difftime(end, start)[[1]]
paste(print(c("cast time:", R_mat_cast_time, "seconds"), collapse = " "))

start <- Sys.time()
for(ii in 1:100){
  partition <- leiden(adj_mat, "ModularityVertexPartition")
}
end <- Sys.time()
table(partition)
R_mat_time = difftime(end, start)[[1]]
print(paste(c("leiden time:", R_mat_time, "seconds"), collapse = " "))

## ----cache=TRUE, eval=module--------------------------------------------------
G <- graph.famous('Zachary')
summary(G)

start <- Sys.time()
for(ii in 1:100){
  adj_mat <- as_adjacency_matrix(G, sparse = TRUE)
}
end <- Sys.time()
class(adj_mat)
dim(adj_mat)
R_sparse_mat_cast_time = difftime(end, start)[[1]]
paste(print(c("cast time:", R_sparse_mat_cast_time, "seconds"), collapse = " "))

start <- Sys.time()
for(ii in 1:100){
  partition <- leiden(adj_mat, "ModularityVertexPartition")
}
end <- Sys.time()
table(partition)
R_sparse_mat_time = difftime(end, start)[[1]]
print(paste(c("leiden time:", R_mat_time, "seconds"), collapse = " "))

## ----cache=TRUE, eval=module--------------------------------------------------
adjacency_matrix <- rbind(cbind(matrix(round(rbinom(1000000, 1, 0.008)), 1000, 1000),
                                matrix(round(rbinom(1000000, 1, 0.003)), 1000, 1000),
                                matrix(round(rbinom(1000000, 1, 0.001)), 1000, 1000)),
                          cbind(matrix(round(rbinom(1000000, 1, 0.003)), 1000, 1000),
                                matrix(round(rbinom(1000000, 1, 0.008)), 1000, 1000),
                                matrix(round(rbinom(0000000, 1, 0.002)), 1000, 1000)),
                          cbind(matrix(round(rbinom(1000000, 1, 0.003)), 1000, 1000),
                                matrix(round(rbinom(1000000, 1, 0.001)), 1000, 1000),
                                matrix(round(rbinom(1000000, 1, 0.009)), 1000, 1000)))
rownames(adjacency_matrix) <- 1:3000
colnames(adjacency_matrix) <- 1:3000
G <- graph_from_adjacency_matrix(adjacency_matrix)

start <- Sys.time()
for(ii in 1:10){
  adj_mat <- as_adjacency_matrix(G, sparse = FALSE)
}
end <- Sys.time()
class(adj_mat)
dim(adj_mat)
R_mat_large_cast_time = difftime(end, start)[[1]]
paste(print(c("cast time:", R_mat_large_cast_time, "seconds"), collapse = " "))

start <- Sys.time()
for(ii in 1:10){
  partition <- leiden(adj_mat, "ModularityVertexPartition")
}
end <- Sys.time()
table(partition)
R_mat_large_time = difftime(end, start)[[1]]
print(paste(c("leiden time:", R_mat_large_time, "seconds"), collapse = " "))

## ----cache=TRUE, eval=module--------------------------------------------------
start <- Sys.time()
for(ii in 1:100){
  adj_mat <- as_adjacency_matrix(G, sparse = TRUE)
}
end <- Sys.time()
class(adj_mat)
dim(adj_mat)
R_mat_large_cast_time = difftime(end, start)[[1]]
paste(print(c("cast time:", R_mat_large_cast_time, "seconds"), collapse = " "))

start <- Sys.time()
for(ii in 1:10){
  partition <- leiden(adj_mat, "ModularityVertexPartition")
}
end <- Sys.time()
table(partition)
R_mat_large_time = difftime(end, start)[[1]]
print(paste(c("leiden time:", R_mat_large_time, "seconds"), collapse = " "))

## ----eval=module--------------------------------------------------------------
partition_type <- "RBConfigurationVertexPartition"
initial_membership <- NULL
weights <- NULL
node_sizes = NULL
resolution_parameter = 1

G <- graph.famous('Zachary')
summary(G)
time1 <- Sys.time()
object <- as.matrix(as_adjacency_matrix(G))
time2 <- Sys.time()
timing = difftime(time2, time1)[[1]]
print(paste(c("cast to adjacent:", timing, "seconds"), collapse = " "))

#run matrix method
leidenalg <- import("leidenalg", delay_load = TRUE)
ig <- import("igraph", delay_load = TRUE)

#convert matrix input (corrects for sparse matrix input)
if(is.matrix(object) || is(adj_mat_sparse, "Matrix")){
  adj_mat <- object
} else{
  adj_mat <- as.matrix(object)
}

#compute weights if non-binary adjacency matrix given
is_pure_adj <- all(as.logical(adj_mat) == adj_mat)
if (is.null(weights) && !is_pure_adj) {
  #assign weights to edges (without dependancy on igraph)
  t_mat <- t(adj_mat)
  weights <- t_mat[t_mat!=0]
  #remove zeroes from rows of matrix and return vector of length edges
}

time3 <- Sys.time()
##convert to python numpy.ndarray, then a list
adj_mat_py <- r_to_py(adj_mat)
adj_mat_py <- adj_mat_py$tolist()
time4 <- Sys.time()
timing = difftime(time4, time3)[[1]]
print(paste(c("pass to python matrix:", timing, "seconds"), collapse = " "))


#convert graph structure to a Python compatible object
GraphClass <- if (!is.null(weights) && !is_pure_adj){
  ig$Graph$Weighted_Adjacency
} else {
  ig$Graph$Adjacency
}
time5 <- Sys.time()
snn_graph <- GraphClass(adj_mat_py)
time6 <- Sys.time()
timing = difftime(time6, time5)[[1]]
reticulate_create_time = difftime(time6, time5)[[1]]
print(paste(c("generate graph in python:", timing, "seconds"), collapse = " "))


# test performance for computing matrix to graph in R
# other option is to passing snn_graph to Python

time7 <- Sys.time()
#compute partitions
source("../R/find_partition.R")

partition <- find_partition(snn_graph, partition_type = partition_type,
                            initial_membership = initial_membership ,
                            weights = weights,
                            node_sizes = node_sizes,
                            resolution_parameter = resolution_parameter
)
time8 <- Sys.time()
timing = difftime(time8, time7)[[1]]
print(paste(c("compute partitions:", timing, "seconds"), collapse = " "))
timing = difftime(time8, time1)[[1]]
print(paste(c("total:", timing, "seconds"), collapse = " "))
partition

## ----eval=module--------------------------------------------------------------
partition_type <- "RBConfigurationVertexPartition"
initial_membership <- NULL
weights <- NULL
node_sizes = NULL
resolution_parameter = 1

G <- graph.famous('Zachary')
summary(G)
time1 <- Sys.time()
object <- as.matrix(as_adjacency_matrix(G))
time2 <- Sys.time()
timing = difftime(time2, time1)[[1]]
print(paste(c("cast to adjacent:", timing, "seconds"), collapse = " "))

#run matrix method
leidenalg <- import("leidenalg", delay_load = TRUE)
ig <- import("igraph", delay_load = TRUE)

time3 <- Sys.time()
##convert to python numpy.ndarray, then a list
object <- graph_from_adjacency_matrix(adj_mat)
time4 <- Sys.time()
timing = difftime(time4, time3)[[1]]
print(paste(c("generate graph in R:", timing, "seconds"), collapse = " "))

#convert graph structure to a Python compatible object
time5 <- Sys.time()
##convert to list for python input
    if(!is.named(object)){
        vertices <- as.list(as.character(V(object)))
    } else {
        vertices <- as.list(names(V(object)))
    }

    edges <- as_edgelist(object)
    dim(edges)
    edgelist <- list(rep(NA, nrow(edges)))
    for(ii in 1:nrow(edges)){
        edgelist[[ii]] <- as.character(edges[ii,])
    }

    snn_graph <- ig$Graph()
    snn_graph$add_vertices(r_to_py(vertices))
    snn_graph$add_edges(r_to_py(edgelist))
time6 <- Sys.time()
timing = difftime(time6, time5)[[1]]
print(paste(c("pass to python graph:", timing, "seconds"), collapse = " "))



# test performance for computing matrix to graph in R
# other option is to passing snn_graph to Python

time7 <- Sys.time()
#compute partitions
partition <- find_partition(snn_graph, partition_type = partition_type,
                            initial_membership = initial_membership ,
                            weights = weights,
                            node_sizes = node_sizes,
                            resolution_parameter = resolution_parameter
)
time8 <- Sys.time()
timing = difftime(time8, time7)[[1]]
print(paste(c("compute partitions:", timing, "seconds"), collapse = " "))
timing = difftime(time8, time1)[[1]]
print(paste(c("total:", timing, "seconds"), collapse = " "))
partition

## ----eval=module--------------------------------------------------------------
partition_type <- "RBConfigurationVertexPartition"
initial_membership <- NULL
weights <- NULL
node_sizes = NULL
resolution_parameter = 1

G <- graph.famous('Zachary')
summary(G)
time1 <- Sys.time()
object <- as.matrix(as_adjacency_matrix(G))
time2 <- Sys.time()
timing = difftime(time2, time1)[[1]]
print(paste(c("cast to adjacent:", timing, "seconds"), collapse = " "))

time3 <- Sys.time()
##convert to python numpy.ndarray, then a list
object <- graph_from_adjacency_matrix(adj_mat)
time4 <- Sys.time()
timing = difftime(time4, time3)[[1]]
R_graph_create_time = difftime(time4, time3)[[1]]
print(paste(c("generate graph in R:", timing, "seconds"), collapse = " "))


#convert graph structure to a Python compatible object
time5 <- Sys.time()
##convert to list for python input
   snn_graph <- object
time6 <- Sys.time()
timing = difftime(time6, time5)[[1]]
print(paste(c("pass to R graph:", timing, "seconds"), collapse = " "))



# test performance for computing matrix to graph in R
# other option is to passing snn_graph to Python

time7 <- Sys.time()
#compute partitions
partition <- leiden(snn_graph, partition_type = partition_type,
                            initial_membership = initial_membership ,
                            weights = weights,
                            node_sizes = node_sizes,
                            resolution_parameter = resolution_parameter
)
time8 <- Sys.time()
timing = difftime(time8, time7)[[1]]
print(paste(c("compute partitions:", timing, "seconds"), collapse = " "))
timing = difftime(time8, time1)[[1]]
print(paste(c("total:", timing, "seconds"), collapse = " "))
partition

## -----------------------------------------------------------------------------
time9 <- Sys.time()
partition <- membership(cluster_leiden(G, objective_function = "modularity"))
partition
table(partition)
time10 <- Sys.time()
timing = difftime(time10, time9)[[1]]
print(paste(c("run with igraph:", timing, "seconds"), collapse = " "))

## ----eval=module--------------------------------------------------------------
time11 <- Sys.time()
partition <- leiden(G, "ModularityVertexPartition", legacy = FALSE)
partition
table(partition)
time12 <- Sys.time()
timing = difftime(time12, time11)[[1]]
print(paste(c("run with leiden in igraph:", timing, "seconds"), collapse = " "))

## ----eval=module--------------------------------------------------------------
time13 <- Sys.time()
partition <- leiden(G, "ModularityVertexPartition", legacy = TRUE)
partition
table(partition)
time14 <- Sys.time()
timing = difftime(time14, time13)[[1]]
print(paste(c("run with leiden with reticulate:", timing, "seconds"), collapse = " "))

## ----eval=module--------------------------------------------------------------
library("Matrix")
adj_mat <- as(as(as(as_adjacency_matrix(G), Class = "CsparseMatrix"), "generalMatrix"), "dMatrix")
time15 <- Sys.time()
partition <- leiden(adj_mat, "ModularityVertexPartition", legacy = TRUE)
partition
table(partition)
time16 <- Sys.time()
timing = difftime(time16, time15)[[1]]
print(paste(c("run with leiden with reticulate:", timing, "seconds"), collapse = " "))

## ----eval=module--------------------------------------------------------------
adj_mat <- as_adjacency_matrix(G)
time15 <- Sys.time()
partition <- leiden(adj_mat, "ModularityVertexPartition", legacy = TRUE)
partition
table(partition)
time16 <- Sys.time()
timing = difftime(time16, time15)[[1]]
print(paste(c("run with leiden with reticulate:", timing, "seconds"), collapse = " "))

## ----cache=TRUE, , eval=module------------------------------------------------
G <- graph.famous('Zachary')
summary(G)
start <- Sys.time()
for(ii in 1:100){
  partition <- membership(cluster_leiden(G, objective_function = "modularity"))
}
end <- Sys.time()
table(partition)
igraph_time = difftime(end, start)[[1]]
print(paste(c("leiden time:", igraph_time, "seconds"), collapse = " "))

## ----cache=TRUE, , eval=module------------------------------------------------
G <- graph.famous('Zachary')
summary(G)
start <- Sys.time()
for(ii in 1:100){
  partition <- leiden(G, "ModularityVertexPartition", legacy =  FALSE)
}
end <- Sys.time()
table(partition)
R_cigraph_time = difftime(end, start)[[1]]
print(paste(c("leiden time:", R_cigraph_time, "seconds"), collapse = " "))

## ----fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last', eval=module----
barplot(c(bash_py_time, py$py_time, reticulate_time, R_graph_time,
          R_cigraph_time, igraph_time, R_mat_time, R_sparse_mat_time), 
        names = c("Python (shell)", "Python (Rmd)", "Reticulate",
                  "R igraph reticulate", "R igraph (C)", "R igraph cluster_leiden",
                  "R matrix","R dgCMatrix"), 
        col = brewer.pal(9,"Pastel1"), las = 2, srt = 45,
        ylab = "time (seconds)", main = "benchmarking 100 computations")
abline(h=0)

## ----fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last', eval=module----
barplot(c(bash_py_time, py$py_time, reticulate_time, R_graph_time,
          R_cigraph_time, igraph_time, R_mat_time+R_mat_cast_time, 
          R_sparse_mat_time+R_sparse_mat_cast_time), 
        names = c("Python (shell)", "Python (Rmd)", "Reticulate",
                  "R igraph reticulate", "R igraph (C)", "R igraph cluster_leiden",
                  "R matrix","R dgCMatrix"), 
        col = "grey80", las = 2, srt = 45,
        ylab = "time (seconds)", main = "benchmarking 100 computations")
barplot(c(bash_py_time, py$py_time, reticulate_time, R_graph_time,
          R_cigraph_time, igraph_time, R_mat_time,  R_sparse_mat_time), 
        names = c("Python (shell)", "Python (Rmd)", "Reticulate",
                  "R igraph reticulate", "R igraph (C)", "R igraph cluster_leiden",
                  "R matrix","R dgCMatrix"),
        col = brewer.pal(9,"Pastel1"), las = 2, srt = 45,
        ylab = "time (seconds)", main = "benchmarking 100 computations", add = TRUE)
abline(h=0)

## ----fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last', eval=module----
R_graph_create_time = difftime(time4, time3)[[1]]
barplot(c(bash_py_time, py$py_time+reticulate_create_time*100, reticulate_time+reticulate_create_time*100, R_graph_time+R_graph_create_time*100,
        R_cigraph_time, igraph_time, R_mat_time, R_sparse_mat_time), 
        names = c("Python (shell)", "Python (Rmd)", "Reticulate",
                  "R igraph reticulate", "R igraph (C)", "R igraph cluster_leiden",
                  "R matrix","R dgCMatrix"), 
        col = "grey80", las = 2, srt = 45,
        ylab = "time (seconds)", main = "benchmarking 100 computations")
barplot(c(bash_py_time, py$py_time, reticulate_time, R_graph_time, 
        R_cigraph_time, igraph_time, R_mat_time,  R_sparse_mat_time), 
        names = c("Python (shell)", "Python (Rmd)", "Reticulate",
                  "R igraph reticulate", "R igraph (C)", "R igraph cluster_leiden",
                  "R matrix","R dgCMatrix"), 
        col = brewer.pal(9,"Pastel1"), las = 2, srt = 45,
        ylab = "time (seconds)", main = "benchmarking 100 computations", add = TRUE)
abline(h=0)

Try the leiden package in your browser

Any scripts or data that you put into this service are public.

leiden documentation built on Nov. 17, 2023, 5:08 p.m.