In Framus94/HierarchiesAndCareers:

knitr::opts_chunk$set(echo = TRUE)

library(igraph)
library(tidyverse)
drake::loadd()



galsimple <- igraph::simplify(gal, remove.multiple = F, remove.loops = T) %>% 
  delete.vertices(., degree(.) == 0)

gen_random_graphs <- function(.graph, .n){

  # Calculate graph density for network randomization
  gd <- edge_density(.graph)
  vcount <- gorder(.graph)

  # Generate n random graphs
  gl <- vector('list', .n)

  set.seed(1403)


  for(i in 1:.n){
    gl[[i]] <- erdos.renyi.game(n = vcount, p.or.m = gd, type = "gnp", directed = T, loops = F)
  }

  return(gl)

}

gl <- gen_random_graphs(galsimple, 1000)

ga <- subgraph.edges(galsimple, as.character(artists_lean$id), delete.vertices = T) %>% 
  delete.vertices(., degree(.) == 0)

The graph object contains r vcount(g) vertices and r ecount(g) edges.
The longest path in the network is r farthest_vertices(g)$distance.
According to triad census are r tc <- triad_census(g); ((tc[4] + tc[9] + tc[12]) / sum(tc)) * 100% of the vertices driving movements to other exhibition places (two outward ties). The questions is
Edge density is r edge_density(g).

Graph

plot(galsimple, vertex.label = NA, edge.arrow.width = 0, edge.arrow.size = 0, margin = 0, vertex.size = 2)

Important vertices

The importance of exhibition venues for the career of artists cannot be assessed without referring to its specific context. On the one hand, exhibitions in certain places can be the goal of many artists. On the other hand, certain exhibitions are the catalyst for exhibitions in even more significant places. (Both high in and outdegree?)

# Calculate the "out" degrees
out_degree <- degree(galsimple, mode = "out") 

## ... and "in" degrees
in_degree <- degree(galsimple, mode = "in")

data_frame(in_degree = in_degree, out_degree = out_degree) %>% 

  ggplot(aes(out_degree, in_degree)) + 
  geom_point() + 
  geom_abline(intercept = 0, slope = 1, color = "red") +
  geom_smooth(se = F, method = "lm") +
  theme_bw()

There is a strong relation between relationship between the vertices' in- and out degree (r = r cor(out_degree, in_degree) %>% round(2)).

First, we'll look at our graph and see the distribution of in degree and out degree, and then use that to set up a working definition for what an "important product" is (something that has > X out degrees and < Z in degrees).

# The most centralized graph according to eigenvector centrality
g0 <- graph( c(2,1), n=10, dir=FALSE )
g1 <- make_star(10, mode="undirected")
centr_eigen(g0)$centralization
centr_eigen(g1)$centralization
par(mfrow = c(1,2))
plot(g0)
plot(g1)

# Calculate eigenvector centrality on graph level
g.eigen <- centr_eigen(galsimple, directed = T)$centralization

# Calculate eigenvector centrality of n random graphs
gl.eigens <- map_dbl(gl, ~ centr_eigen(., directed = TRUE)$centralization)

# Plot the distribution of centralities
data_frame(gl.eigens) %>% 
  ggplot(aes(gl.eigens)) + 
  geom_histogram(bins = 100) +
  geom_vline(xintercept = g.eigen, color = "red", size = 1, linetype = "dashed") +
  theme_bw()

# Calculate the proportion of graphs with an eigenvector centrality lower than our observed
mean(gl.eigens < g.eigen)


# The network is far more centralized than we would expect by chance as zero random networks have a higher eigenvector centrality than the observed network.

Largest cliques

# Assign largest cliques output to object 'lc'
lc <- largest_cliques(galsimple)

# Create two new undirected subgraphs, each containing only the vertices of each largest clique.
gs1 <- as.undirected(subgraph(galsimple, lc[[1]]))
gs2 <- as.undirected(subgraph(galsimple, lc[[2]]))



plot(gs2,
     vertex.label = V(gs1)$name_exhplace,
     vertex.label.color = "black", 
     vertex.label.cex = 0.9,
     vertex.size = 0,
     edge.color = 'gray28',
     main = "Largest Clique",
     layout = layout.circle(gs1)
)

Which artists exhibited in all these exhibition places?

vertex_attr(gs1, name = "name", index = V(gs1))

clique_exhplaces <- exhplaces %>% 
  filter(id %in% vertex_attr(gs1, name = "name", index = V(gs1)))

table(clique_exhplaces$type_exhplace)
table(clique_exhplaces$country)

Assotativity

This is a measure of how preferentially attached vertices are to other vertices with identical attributes.

Categorical assortativity

# Convert the exhibition type attribute into a numeric value
values <- as.numeric(factor(V(galsimple)$type_exhplace))

observed.assortativity <- assortativity(galsimple, values, directed = T)

# Calculate the assortativity of the network randomizing the type attribute n times
results <- vector('list', 1000)
for(i in 1:1000){
  results[[i]] <- assortativity(galsimple, sample(values), directed = T)
}

# Plot the distribution of assortativity values and add a red vertical line at the original observed value
data_frame(assortativity = unlist(results)) %>% 
  ggplot(aes(assortativity)) + 
  geom_histogram(bins = 100) +
  geom_vline(xintercept = observed.assortativity, color = "red", size = 1, linetype = "dashed") +
  theme_bw()

Degree assortativity

Degree assortativity determines how preferentially attached vertices are to other vertices of a similar degree.

g.dassor <- assortativity.degree(galsimple, directed = TRUE)

g.dassors <- map_dbl(gl, ~ assortativity.degree(., directed = TRUE))

# Plot the distribution 
data_frame(g.dassors) %>% 
  ggplot(aes(g.dassors)) + 
  geom_histogram(bins = 100) +
  geom_vline(xintercept = g.dassor, color = "red", size = 1, linetype = "dashed") +
  theme_bw()

# Calculate the proportion of graphs with an eigenvector centrality lower than our observed
mean(g.dassors < g.dassor)

The simulation with r n randomly generated networks based on a similar edge density shows that a assortativity degree of r assortativity.degree(galsimple, directed = TRUE) from the observed network is highly unlikely.

Reciprocity

The reciprocity of a directed network reflects the proportion of edges that are symmetrical. That is, the proportion of outgoing edges that also have an incoming edge. It is commonly used to determine how inter-connected directed networks are.

g.recip <- igraph::reciprocity(galsimple)

n_nodes <- gorder(galsimple)
edge_dens <- edge_density(galsimple)

simulated_recip <- rep(NA, 1000)

for(i in 1:1000) {
  simulated_graph <- erdos.renyi.game(n_nodes, edge_dens, directed = TRUE,  type = "gnp")
  simulated_recip[i] <- reciprocity(simulated_graph)
}

# Plot the distribution 
data_frame(simulated_recip) %>% 
  ggplot(aes(simulated_recip)) + 
  geom_histogram(bins = 1000) +
  geom_vline(xintercept = g.recip, color = "red", size = 1, linetype = "dashed") +
  theme_bw()

# Perform fast-greedy community detection on network graph
kc = fastgreedy.community(galsimple)

# Determine sizes of each community
sizes(kc)

# Determine which individuals belong to which community
membership(kc)

# Plot the community structure of the network
plot(kc, g)

# Create an object 'i' containin the memberships of the fast-greedy community detection
i <-  membership(kc)

# Check the number of different communities
sizes(kc)

# Add a color attribute to each vertex, setting the vertex color based on community membership
g <- set_vertex_attr(g, "color", value = c("yellow", "blue", "red")[i])

# Plot the graph using threejs
graphjs(g)

library(threejs)
# Create numerical vector of vertex eigenvector centralities 
ec <- as.numeric(eigen_centrality(galsimple)$vector)

# Create new vector 'v' that is equal to the square-root of 'ec' multiplied by 5
v <- 2*sqrt(ec)

# Plot threejs plot of graph setting vertex size to v
graphjs(galsimple, vertex.size = v)

Explore network over time

create_subgraph_period <- function(.graph, .start_year, .years_to_end){

  # specify time period
  begin <- .start_year
  end   <- .start_year + .years_to_end

  edges_subset <- E(.graph)[[exh_start_Y_from   >= begin 
                             & exh_start_Y_from <= end 
                             & exh_start_Y_to   >= begin 
                             & exh_start_Y_to   <= end]]

  subgraph <- subgraph.edges(.graph, eids = edges_subset, delete.vertices = T)

  return(subgraph)

}

# create graphs for 1917-1936, 1937-1957, ..., 1997-2017
time_graphs <- seq(from = 1917, to = 1997, by = 10) %>% 
  map(~ create_subgraph_period(galsimple, 
                               .start_year = ., 
                               .years_to_end = 9))

time_graphs_tbl <- seq(from = 1917, to = 1997, by = 10) %>% 
  map(~ create_subgraph_period(galsimple, 
                               .start_year = ., 
                               .years_to_end = 9) %>% 
        tidygraph::as_tbl_graph())

are_equal_graphs <- function(.graph1, .graph2){

  # EDGES
  # if graphs do not have the same number of EDGES they are different
  if(ecount(.graph1) == ecount(.graph2)){

    # if they have same number of edges: are any edges not the same?
    any_edge_unlike <- any(E(.graph1) != E(.graph2))

    if(any_edge_unlike == T) message("Edges are NOT the same.") 
    else writeLines("Edges are the same.")

  } else {message("Graphs have different number of edges.")}


  # VERTICES
  # if graphs do not have the same number of VERTICES they are different
  if(vcount(.graph1) == vcount(.graph2)){

    # any vertex not the same?
    any_vertex_unlike <- any(V(.graph1) != V(.graph2))

    if(any_vertex_unlike == T) message("Vertices are NOT the same.") 
    else writeLines("Vertices are the same.")

  } else {message("Graphs have different number of vertices.")}

}

year <- 2017 
year_diff <- 99
df <- compute_centr_year(galsimple, .lower_boundary_yr = year - year_diff, .yr_centr_computed = year) %>%
  mutate(name = as.integer(name)) %>%  
  left_join(exhplaces, by = c("name" = "id"))

df_galsimple <- as_tbl_graph(galsimple) %>%
  mutate(name = as.integer(name)) %>% 
  activate(nodes) %>% left_join(df , "name")

are_equal_graphs(galsimple, df_galsimple)

edge_attr(.graph1, "exh_id_to") %>% head()

.graph1 <- time_graphs_tbl[[1]]
.graph2 <- time_graphs[[1]]

are_equal_graphs(time_graphs[[1]], time_graphs_tbl[[1]])


ec1 <- igraph::eigen_centrality(.graph1, directed = T, scale = T)$vector
ec2 <- igraph::eigen_centrality(.graph2, directed = T, scale = T)$vector

# che
any(round(ec1, 4) != round(ec2, 4))

library(tidygraph)

ec1tbl <- .graph1 %>% activate(nodes) %>% 
  mutate(eigen = centrality_eigen(directed = T)) %>% pull(eigen) %>% round(4)

library(ggraph)



plot_graph_period <- function(.graph, label = F){

  edges <- igraph::as_data_frame(.graph)

  max <- max(max(edges$exh_start_Y_from), max(edges$exh_start_Y_to)) 
  min <- min(min(edges$exh_start_Y_from), min(edges$exh_start_Y_to))

  gplot <- ggraph(.graph, layout = "fr") + 
    geom_edge_link(#aes(color = exh_type_to), 
                   arrow = arrow(length = unit(4, 'mm'), angle = 12), end_cap = circle(3, 'mm'), 
                   alpha = 0.4) + 
    geom_node_point(aes(color = type_exhplace), size = 3) +
    theme_bw() +
    labs(color = "Exhibition Place") +
    ggtitle(paste0(min, "-", max))


  if(label == T){
    gplot <- gplot +
      geom_node_text(aes(label = name_exhplace), nudge_y = 0.2, size = 2, check_overlap = T)
  }

  return(gplot)

}

plot_graph_period(time_graphs[[1]], label = T)

time_graphs[[1]] %>%   plot(vertex.label = V(.)$name_exhplace)

ecount(g1917_1926)
vcount(g1917_1926)

ego18761 <- induced_subgraph(galsimple, 
                             vids = unlist(ego(g1917_1936, order = 15, nodes = V(g1917_1936)[name == "18761"]))
)





plot(ego18761)

g1917_1936 %>% igraph::as_data_frame() %>% View()

g1917_1936 %>% 
  plot(., 
       vertex.label = NA, 
       vertex.label.cex = 0.7,
       vertex.label.dist=0.9,
       edge.arrow.width = 0.65, 
       edge.arrow.size = 0.65, 
       margin = 1, 
       vertex.size = 2)


# get relevant edges 


# Loop over time graphs calculating out degree
degree_count_list <- lapply(time_graph, degree, mode = "out")

# Flatten it
degree_count_flat <- unlist(degree_count_list)

degree_data <- data.frame(
  # Use the flattened counts
  degree_count = degree_count_flat,
  # Use the names of the flattened counts
  vertex_name = names(degree_count_flat),
  # Repeat the dates by the lengths of the count list
  date = rep(d, lengths(degree_count_list))
)


# Using important_degree_data, plot degree_count vs. date, colored by vertex_name 
ggplot(important_degree_data, aes(date, degree_count, color = vertex_name)) + 
  # Add a path layer
  geom_path()

library(tidygraph); library(ggraph)



nodes <- V(time_graphs[[2]])$name 



g18761 <- igraph::make_ego_graph(g1917_1936, nodes = V(g1917_1936)[name == 18761])

ggraph(g1917_1936) + 
  geom_edge_link(arrow = arrow(length = unit(4, 'mm'), angle = 12), 
                 end_cap = circle(3, 'mm')) + 
  geom_node_point(size = 3) +
  geom_node_text(aes(label = name_exhplace), nudge_y = 0.2, size = 3, check_overlap = T) +
  theme_bw()

places_exhibiting_per_year <- exhibitions %>% 
  left_join(exhplaces, by = c("exh_place_id" = "id")) %>% 
  distinct(exh_start_Y, exh_place_id, .keep_all = T) %>% 
  group_by(exh_start_Y, type_exhplace) %>% 
  summarise(places_exhibiting = n())


exhibitions_per_year <- exhibitions %>% 
  left_join(exhplaces, by = c("exh_place_id" = "id")) %>% 
  distinct(exh_start_Y, id, .keep_all = T) %>%
  group_by(exh_start_Y, type_exhplace) %>% 
  summarise(num_of_exh = n()) 




# extract edges from graph
galsimple_edges <- galsimple %>% 
  igraph::as_data_frame("edges")

places_exhibiting_per_year <- galsimple_edges %>% 
  distinct(from, exh_start_Y_from, .keep_all = T) %>% 
  group_by(year = exh_start_Y_from) %>% 
  summarise(num_places_exhibiting = n())


edges_per_year <- full_join(
  galsimple_edges %>% 
    group_by(year = exh_start_Y_from) %>% 
    summarise(edges_out = n()),
  galsimple_edges %>% 
    group_by(year = exh_start_Y_to) %>% 
    summarise(edges_in = n()), 
  by = "year") %>% 
  full_join(places_exhibiting_per_year, "year")

edges_per_year %>% 
  tidyr::gather(var, val, - year) %>% 
  ggplot(aes(year, val, color = var)) + 
  geom_line() +
  theme_bw() +
  theme(legend.position = "bottom") +
  scale_x_continuous(breaks = seq(1905, 2015, 10), limits = c(1905, NA)) +
  scale_y_continuous(breaks = seq(0, 10000, 2000)) +
  labs(x = "Year", y = "Number of ties", color = element_blank())

exhibitions_per_year %>%
  full_join(places_exhibiting_per_year, by = c("type_exhplace", "exh_start_Y")) %>% 
  ggplot(aes(x = exh_start_Y, color = type_exhplace)) +
  geom_line(aes(y = num_of_exh)) +
  geom_line(aes(y = places_exhibiting, linetype = type_exhplace))+
  theme_bw() +
  scale_x_continuous(breaks = seq(1905, 2015, 10), limits = c(1905, NA)) +
  theme(legend.position = "bottom") +
  labs(x = "Year", y = "Number of exhibitions", color = "Venue") +
  guides(linetype = FALSE)

exhibitions %>%
  distinct(id, .keep_all = T) %>% 
  group_by(solo_group, exh_start_Y) %>% 
  summarise(num_solo_group = n()) %>% 
  ggplot(aes(x = exh_start_Y)) +
  geom_line(aes(y = num_solo_group, color = solo_group)) +
  geom_line(aes(y = num_places_exhibiting), linetype = 2, 
            data = places_exhibiting_per_year) +
  theme_bw() +
  scale_x_continuous(breaks = seq(1905, 2015, 10), limits = c(1905, NA)) +
  theme(legend.position = "bottom") +
  labs(x = "Year", y = "Number of exhibitions", color = "Venue") +
  guides(linetype = FALSE)