R/create_edgelist.R

create_edgelist <- function(.exhibitions,
                            .artists, # necessary when remove_exibition_after_death == T
                            .exhplaces,
                            .venue_max_years_between_exhibitions = NULL,
                            .venue_min_num_of_exh = NULL,
                            .venue_remove_only_self_referential = F,
                            .artist_remove_exhibition_after_death = F,
                            .order_1_network = T
                            ){


  exhibitions <- .exhibitions %>% dplyr::left_join(.exhplaces, by = c("exh_place_id" = "id"))


  if(!is.null(.venue_min_num_of_exh)){

    exhplaces_n_exh <- exhibitions %>%
      distinct(id, .keep_all = T) %>%
      group_by(exh_place_id) %>%
      add_count() %>%
      filter(n >= .venue_min_num_of_exh)

    exhibitions <- exhibitions %>%
      dplyr::semi_join(exhplaces_n_exh, by = "exh_place_id")
  }

  # from
  e1 <- exhibitions %>% select(from = exh_place_id,
                               exh_id_from = id,
                               exh_type_from = solo_group,
                               exh_venue_from = type_exhplace,
                               artist_id,
                               exh_start_Y_from = exh_start_Y,
                               exh_start_Ym_from = exh_start_Ym,
                               exh_start_Ymd_from = exh_start_Ymd)
  # to
  e2 <- exhibitions %>% select(to = exh_place_id,
                               exh_id_to = id,
                               exh_type_to = solo_group,
                               exh_venue_to = type_exhplace,
                               artist_id,
                               exh_start_Y_to = exh_start_Y,
                               exh_start_Ym_to = exh_start_Ym,
                               exh_start_Ymd_to = exh_start_Ymd)



  # create df that contains the edges
  edges <- inner_join(e1, e2, by = "artist_id") %>%

    # tbl_graph requires edges to be encoded in a to and from column, or in the two first columns, as integers
    mutate_at(vars(from, to), as.integer) %>%

    # important: only keep edges that relate exhibition places in a chronogical order
    filter(exh_start_Ymd_from < exh_start_Ymd_to) %>%

    {if(.order_1_network == T)

      group_by(., exh_id_from, artist_id) %>%
        slice(which.min(exh_start_Ymd_to))

      else .} %>%

    # feature: indicate sequence of exhibition
    group_by(artist_id) %>%
    mutate(tie_number = rank(exh_start_Ymd_from) %>% as.integer()) %>%
    ungroup() %>%

    # rearrange columns of data frame according to igraph syntax
    select(from, to, exh_id_from, exh_id_to, everything()) %>%
    arrange(from, artist_id, exh_start_Ymd_from) %>%
    ungroup()

  # remove edges with an time intervall greater than x
  if(!is.null(.venue_max_years_between_exhibitions)){

    edges <- edges %>%
      filter(exh_start_Y_to <= exh_start_Y_from + .venue_max_years_between_exhibitions)

  }


  # removes edges that are only loops
  if(.venue_remove_only_self_referential == T){

    edges <- edges %>%

      # how many ties per exhibition place
      group_by(from) %>%
      mutate(n_from = n()) %>%

      # count exh. place ids that are the same
      mutate(equal_ids = if_else(from == to, 1,0),
             n_equal_ids = sum(equal_ids)) %>%

      # remove edges if this number equals the number of ties per exh. place
      filter(n_from != n_equal_ids) %>%

      select(- n_from, - equal_ids, - n_equal_ids) %>%
      ungroup()

  }

  if(.artist_remove_exhibition_after_death == T){

    # artists that have been alive during exhibition
    edges_artists_alive <- edges %>%
      inner_join(.artists, by = c("artist_id" = "id")) %>%
      # With <= I assume that artists did not die during exhibition year
      filter(exh_start_Y_from <= if_else(!is.na(death), death, as.integer(birth + 95)),
             exh_start_Y_to   <= if_else(!is.na(death), death, as.integer(birth + 95)))

    # keep edges that meet this criterium
    edges <- edges %>%
      semi_join(edges_artists_alive, by = c("exh_id_from", "exh_id_to", "artist_id"))

  }

  return(edges)
}
Framus94/HierarchiesAndCareers documentation built on June 5, 2019, 8:52 a.m.