knitr::opts_chunk$set(echo = TRUE)
library(igraph) library(tidyverse) drake::loadd() galsimple <- igraph::simplify(gal, remove.multiple = F, remove.loops = T) %>% delete.vertices(., degree(.) == 0) gen_random_graphs <- function(.graph, .n){ # Calculate graph density for network randomization gd <- edge_density(.graph) vcount <- gorder(.graph) # Generate n random graphs gl <- vector('list', .n) set.seed(1403) for(i in 1:.n){ gl[[i]] <- erdos.renyi.game(n = vcount, p.or.m = gd, type = "gnp", directed = T, loops = F) } return(gl) } gl <- gen_random_graphs(galsimple, 1000) ga <- subgraph.edges(galsimple, as.character(artists_lean$id), delete.vertices = T) %>% delete.vertices(., degree(.) == 0)
r vcount(g) vertices and r ecount(g) edges.r farthest_vertices(g)$distance. r tc <- triad_census(g); ((tc[4] + tc[9] + tc[12]) / sum(tc)) * 100% of the vertices driving movements to other exhibition places (two outward ties). The questions is r edge_density(g).plot(galsimple, vertex.label = NA, edge.arrow.width = 0, edge.arrow.size = 0, margin = 0, vertex.size = 2)
The importance of exhibition venues for the career of artists cannot be assessed without referring to its specific context. On the one hand, exhibitions in certain places can be the goal of many artists. On the other hand, certain exhibitions are the catalyst for exhibitions in even more significant places. (Both high in and outdegree?)
# Calculate the "out" degrees out_degree <- degree(galsimple, mode = "out") ## ... and "in" degrees in_degree <- degree(galsimple, mode = "in") data_frame(in_degree = in_degree, out_degree = out_degree) %>% ggplot(aes(out_degree, in_degree)) + geom_point() + geom_abline(intercept = 0, slope = 1, color = "red") + geom_smooth(se = F, method = "lm") + theme_bw()
There is a strong relation between relationship between the vertices' in- and out degree (r = r cor(out_degree, in_degree) %>% round(2)).
First, we'll look at our graph and see the distribution of in degree and out degree, and then use that to set up a working definition for what an "important product" is (something that has > X out degrees and < Z in degrees).
# The most centralized graph according to eigenvector centrality g0 <- graph( c(2,1), n=10, dir=FALSE ) g1 <- make_star(10, mode="undirected") centr_eigen(g0)$centralization centr_eigen(g1)$centralization par(mfrow = c(1,2)) plot(g0) plot(g1)
# Calculate eigenvector centrality on graph level g.eigen <- centr_eigen(galsimple, directed = T)$centralization # Calculate eigenvector centrality of n random graphs gl.eigens <- map_dbl(gl, ~ centr_eigen(., directed = TRUE)$centralization) # Plot the distribution of centralities data_frame(gl.eigens) %>% ggplot(aes(gl.eigens)) + geom_histogram(bins = 100) + geom_vline(xintercept = g.eigen, color = "red", size = 1, linetype = "dashed") + theme_bw() # Calculate the proportion of graphs with an eigenvector centrality lower than our observed mean(gl.eigens < g.eigen) # The network is far more centralized than we would expect by chance as zero random networks have a higher eigenvector centrality than the observed network.
# Assign largest cliques output to object 'lc' lc <- largest_cliques(galsimple) # Create two new undirected subgraphs, each containing only the vertices of each largest clique. gs1 <- as.undirected(subgraph(galsimple, lc[[1]])) gs2 <- as.undirected(subgraph(galsimple, lc[[2]])) plot(gs2, vertex.label = V(gs1)$name_exhplace, vertex.label.color = "black", vertex.label.cex = 0.9, vertex.size = 0, edge.color = 'gray28', main = "Largest Clique", layout = layout.circle(gs1) )
Which artists exhibited in all these exhibition places?
vertex_attr(gs1, name = "name", index = V(gs1)) clique_exhplaces <- exhplaces %>% filter(id %in% vertex_attr(gs1, name = "name", index = V(gs1))) table(clique_exhplaces$type_exhplace) table(clique_exhplaces$country)
This is a measure of how preferentially attached vertices are to other vertices with identical attributes.
# Convert the exhibition type attribute into a numeric value values <- as.numeric(factor(V(galsimple)$type_exhplace)) observed.assortativity <- assortativity(galsimple, values, directed = T) # Calculate the assortativity of the network randomizing the type attribute n times results <- vector('list', 1000) for(i in 1:1000){ results[[i]] <- assortativity(galsimple, sample(values), directed = T) } # Plot the distribution of assortativity values and add a red vertical line at the original observed value data_frame(assortativity = unlist(results)) %>% ggplot(aes(assortativity)) + geom_histogram(bins = 100) + geom_vline(xintercept = observed.assortativity, color = "red", size = 1, linetype = "dashed") + theme_bw()
Degree assortativity determines how preferentially attached vertices are to other vertices of a similar degree.
g.dassor <- assortativity.degree(galsimple, directed = TRUE) g.dassors <- map_dbl(gl, ~ assortativity.degree(., directed = TRUE)) # Plot the distribution data_frame(g.dassors) %>% ggplot(aes(g.dassors)) + geom_histogram(bins = 100) + geom_vline(xintercept = g.dassor, color = "red", size = 1, linetype = "dashed") + theme_bw() # Calculate the proportion of graphs with an eigenvector centrality lower than our observed mean(g.dassors < g.dassor)
The simulation with r n randomly generated networks based on a similar edge density shows that a assortativity degree of r assortativity.degree(galsimple, directed = TRUE) from the observed network is highly unlikely.
The reciprocity of a directed network reflects the proportion of edges that are symmetrical. That is, the proportion of outgoing edges that also have an incoming edge. It is commonly used to determine how inter-connected directed networks are.
g.recip <- igraph::reciprocity(galsimple) n_nodes <- gorder(galsimple) edge_dens <- edge_density(galsimple) simulated_recip <- rep(NA, 1000) for(i in 1:1000) { simulated_graph <- erdos.renyi.game(n_nodes, edge_dens, directed = TRUE, type = "gnp") simulated_recip[i] <- reciprocity(simulated_graph) } # Plot the distribution data_frame(simulated_recip) %>% ggplot(aes(simulated_recip)) + geom_histogram(bins = 1000) + geom_vline(xintercept = g.recip, color = "red", size = 1, linetype = "dashed") + theme_bw()
# Perform fast-greedy community detection on network graph kc = fastgreedy.community(galsimple) # Determine sizes of each community sizes(kc) # Determine which individuals belong to which community membership(kc) # Plot the community structure of the network plot(kc, g) # Create an object 'i' containin the memberships of the fast-greedy community detection i <- membership(kc) # Check the number of different communities sizes(kc) # Add a color attribute to each vertex, setting the vertex color based on community membership g <- set_vertex_attr(g, "color", value = c("yellow", "blue", "red")[i]) # Plot the graph using threejs graphjs(g)
library(threejs) # Create numerical vector of vertex eigenvector centralities ec <- as.numeric(eigen_centrality(galsimple)$vector) # Create new vector 'v' that is equal to the square-root of 'ec' multiplied by 5 v <- 2*sqrt(ec) # Plot threejs plot of graph setting vertex size to v graphjs(galsimple, vertex.size = v)
create_subgraph_period <- function(.graph, .start_year, .years_to_end){ # specify time period begin <- .start_year end <- .start_year + .years_to_end edges_subset <- E(.graph)[[exh_start_Y_from >= begin & exh_start_Y_from <= end & exh_start_Y_to >= begin & exh_start_Y_to <= end]] subgraph <- subgraph.edges(.graph, eids = edges_subset, delete.vertices = T) return(subgraph) } # create graphs for 1917-1936, 1937-1957, ..., 1997-2017 time_graphs <- seq(from = 1917, to = 1997, by = 10) %>% map(~ create_subgraph_period(galsimple, .start_year = ., .years_to_end = 9)) time_graphs_tbl <- seq(from = 1917, to = 1997, by = 10) %>% map(~ create_subgraph_period(galsimple, .start_year = ., .years_to_end = 9) %>% tidygraph::as_tbl_graph()) are_equal_graphs <- function(.graph1, .graph2){ # EDGES # if graphs do not have the same number of EDGES they are different if(ecount(.graph1) == ecount(.graph2)){ # if they have same number of edges: are any edges not the same? any_edge_unlike <- any(E(.graph1) != E(.graph2)) if(any_edge_unlike == T) message("Edges are NOT the same.") else writeLines("Edges are the same.") } else {message("Graphs have different number of edges.")} # VERTICES # if graphs do not have the same number of VERTICES they are different if(vcount(.graph1) == vcount(.graph2)){ # any vertex not the same? any_vertex_unlike <- any(V(.graph1) != V(.graph2)) if(any_vertex_unlike == T) message("Vertices are NOT the same.") else writeLines("Vertices are the same.") } else {message("Graphs have different number of vertices.")} }
year <- 2017 year_diff <- 99 df <- compute_centr_year(galsimple, .lower_boundary_yr = year - year_diff, .yr_centr_computed = year) %>% mutate(name = as.integer(name)) %>% left_join(exhplaces, by = c("name" = "id")) df_galsimple <- as_tbl_graph(galsimple) %>% mutate(name = as.integer(name)) %>% activate(nodes) %>% left_join(df , "name") are_equal_graphs(galsimple, df_galsimple)
edge_attr(.graph1, "exh_id_to") %>% head() .graph1 <- time_graphs_tbl[[1]] .graph2 <- time_graphs[[1]] are_equal_graphs(time_graphs[[1]], time_graphs_tbl[[1]]) ec1 <- igraph::eigen_centrality(.graph1, directed = T, scale = T)$vector ec2 <- igraph::eigen_centrality(.graph2, directed = T, scale = T)$vector # che any(round(ec1, 4) != round(ec2, 4)) library(tidygraph) ec1tbl <- .graph1 %>% activate(nodes) %>% mutate(eigen = centrality_eigen(directed = T)) %>% pull(eigen) %>% round(4)
library(ggraph) plot_graph_period <- function(.graph, label = F){ edges <- igraph::as_data_frame(.graph) max <- max(max(edges$exh_start_Y_from), max(edges$exh_start_Y_to)) min <- min(min(edges$exh_start_Y_from), min(edges$exh_start_Y_to)) gplot <- ggraph(.graph, layout = "fr") + geom_edge_link(#aes(color = exh_type_to), arrow = arrow(length = unit(4, 'mm'), angle = 12), end_cap = circle(3, 'mm'), alpha = 0.4) + geom_node_point(aes(color = type_exhplace), size = 3) + theme_bw() + labs(color = "Exhibition Place") + ggtitle(paste0(min, "-", max)) if(label == T){ gplot <- gplot + geom_node_text(aes(label = name_exhplace), nudge_y = 0.2, size = 2, check_overlap = T) } return(gplot) } plot_graph_period(time_graphs[[1]], label = T)
time_graphs[[1]] %>% plot(vertex.label = V(.)$name_exhplace)
ecount(g1917_1926) vcount(g1917_1926) ego18761 <- induced_subgraph(galsimple, vids = unlist(ego(g1917_1936, order = 15, nodes = V(g1917_1936)[name == "18761"])) ) plot(ego18761) g1917_1936 %>% igraph::as_data_frame() %>% View() g1917_1936 %>% plot(., vertex.label = NA, vertex.label.cex = 0.7, vertex.label.dist=0.9, edge.arrow.width = 0.65, edge.arrow.size = 0.65, margin = 1, vertex.size = 2) # get relevant edges # Loop over time graphs calculating out degree degree_count_list <- lapply(time_graph, degree, mode = "out") # Flatten it degree_count_flat <- unlist(degree_count_list) degree_data <- data.frame( # Use the flattened counts degree_count = degree_count_flat, # Use the names of the flattened counts vertex_name = names(degree_count_flat), # Repeat the dates by the lengths of the count list date = rep(d, lengths(degree_count_list)) ) # Using important_degree_data, plot degree_count vs. date, colored by vertex_name ggplot(important_degree_data, aes(date, degree_count, color = vertex_name)) + # Add a path layer geom_path()
library(tidygraph); library(ggraph) nodes <- V(time_graphs[[2]])$name g18761 <- igraph::make_ego_graph(g1917_1936, nodes = V(g1917_1936)[name == 18761]) ggraph(g1917_1936) + geom_edge_link(arrow = arrow(length = unit(4, 'mm'), angle = 12), end_cap = circle(3, 'mm')) + geom_node_point(size = 3) + geom_node_text(aes(label = name_exhplace), nudge_y = 0.2, size = 3, check_overlap = T) + theme_bw()
places_exhibiting_per_year <- exhibitions %>% left_join(exhplaces, by = c("exh_place_id" = "id")) %>% distinct(exh_start_Y, exh_place_id, .keep_all = T) %>% group_by(exh_start_Y, type_exhplace) %>% summarise(places_exhibiting = n()) exhibitions_per_year <- exhibitions %>% left_join(exhplaces, by = c("exh_place_id" = "id")) %>% distinct(exh_start_Y, id, .keep_all = T) %>% group_by(exh_start_Y, type_exhplace) %>% summarise(num_of_exh = n()) # extract edges from graph galsimple_edges <- galsimple %>% igraph::as_data_frame("edges") places_exhibiting_per_year <- galsimple_edges %>% distinct(from, exh_start_Y_from, .keep_all = T) %>% group_by(year = exh_start_Y_from) %>% summarise(num_places_exhibiting = n()) edges_per_year <- full_join( galsimple_edges %>% group_by(year = exh_start_Y_from) %>% summarise(edges_out = n()), galsimple_edges %>% group_by(year = exh_start_Y_to) %>% summarise(edges_in = n()), by = "year") %>% full_join(places_exhibiting_per_year, "year") edges_per_year %>% tidyr::gather(var, val, - year) %>% ggplot(aes(year, val, color = var)) + geom_line() + theme_bw() + theme(legend.position = "bottom") + scale_x_continuous(breaks = seq(1905, 2015, 10), limits = c(1905, NA)) + scale_y_continuous(breaks = seq(0, 10000, 2000)) + labs(x = "Year", y = "Number of ties", color = element_blank())
exhibitions_per_year %>% full_join(places_exhibiting_per_year, by = c("type_exhplace", "exh_start_Y")) %>% ggplot(aes(x = exh_start_Y, color = type_exhplace)) + geom_line(aes(y = num_of_exh)) + geom_line(aes(y = places_exhibiting, linetype = type_exhplace))+ theme_bw() + scale_x_continuous(breaks = seq(1905, 2015, 10), limits = c(1905, NA)) + theme(legend.position = "bottom") + labs(x = "Year", y = "Number of exhibitions", color = "Venue") + guides(linetype = FALSE)
exhibitions %>% distinct(id, .keep_all = T) %>% group_by(solo_group, exh_start_Y) %>% summarise(num_solo_group = n()) %>% ggplot(aes(x = exh_start_Y)) + geom_line(aes(y = num_solo_group, color = solo_group)) + geom_line(aes(y = num_places_exhibiting), linetype = 2, data = places_exhibiting_per_year) + theme_bw() + scale_x_continuous(breaks = seq(1905, 2015, 10), limits = c(1905, NA)) + theme(legend.position = "bottom") + labs(x = "Year", y = "Number of exhibitions", color = "Venue") + guides(linetype = FALSE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.