In zambujo/swissparticipation: Swiss Participation in EU's Research Programmes

knitr::opts_chunk$set(
  dev = "svg",
  echo = FALSE,
  message = FALSE,
  warning = FALSE,
  comment = FALSE
)

source(here::here("R", "package.r"))
source(here::here("R", "utils.r"))

df_schema <- here("data", "schema-cordis-plus.yml") %>%
  read_yaml() %>%
  as.list()

df_raw <-
  list.files(here("data"), pattern = "cordis-plus-", full.names = TRUE) %>%
  map_df(read_csv, col_types = df_schema)

df_raw <-
  df_raw %>%
  mutate(
    pillar = fct_inseq(pillar),
    pillar = fct_rev(pillar),
    pillar = fct_relevel(pillar, "Cross-theme", after = 0),
    pillar = fct_recode(
      pillar, I = "1", II = "2", III = "3", IV = "4", V = "5"),
    org_type = fct_collapse(
      legal_entity_type,
      `Research & Education` = c("HES", "REC"),
      `Others (Excl. R&Ed)` = c("PRC", "PUB", "OTH")))

df <- df_raw %>%
    mutate(start_semester = floor_date(start_date, "6 months")) %>%
    # removing most international organizations (CERN, WHO, ...)
    filter(
      !str_detect(legal_name, "EUROPEAN|MONDIALE|WORLD") |
        !str_detect(legal_name, "ORGANIZATION|ORGANISATION|ASSOCIATION")
    )

An Overview of Swiss Participation {.tabset .tabset-fade .tabset-pills}

get_particip <- function(x) {
  ch <- x %>%
    filter(country_code == "CH") %>%
    group_by(framework_programme, time_period = start_semester) %>%
    summarise(n_ch = n(),
              contrib_ch = sum(re_contribution, na.rm = TRUE))
  total <- x %>%
    group_by(time_period = start_semester) %>%
    summarise(n_total = n(),
              contrib_all = sum(re_contribution, na.rm = TRUE))

  ch2 <- x %>%
    filter(country_code == "CH") %>%
    filter(partner_role == "Coordinator") %>%
    group_by(framework_programme, time_period = start_semester) %>%
    summarise(coord_ch = n())
  total2 <- x %>%
    filter(partner_role == "Coordinator") %>%
    count(time_period = start_semester, name = "coord_total")

  coord <- left_join(total2, ch2, by = "time_period") %>%
    mutate(share_coord = 100 * coord_ch / coord_total)

  left_join(total, ch, by = "time_period") %>%
    mutate(
      share_particip = 100 * n_ch / n_total,
      share_contrib = 100 * contrib_ch / contrib_all
    ) %>%
    left_join(coord, by = c("time_period", "framework_programme")) %>%
    filter(!is.na(framework_programme))
}

get_avg_particip <- function(x) {
  x <- x %>%
    filter(!is.na(framework_programme))

  total <- x %>%
    group_by(framework_programme) %>%
    summarise(n_total = n(),
              contrib_all = sum(re_contribution, na.rm = TRUE))
  ch <- x %>%
    filter(country_code == "CH") %>%
    group_by(framework_programme) %>%
    summarise(n_ch = n(),
              contrib_ch = sum(re_contribution, na.rm = TRUE))

  # coordinations
  total2 <- x %>%
    filter(partner_role == "Coordinator") %>%
    count(framework_programme, name = "coord_total")
  ch2 <- x %>%
    filter(country_code == "CH") %>%
    filter(partner_role == "Coordinator") %>%
    group_by(framework_programme) %>%
    summarise(coord_ch = n())
  coord <- left_join(total2, ch2, by = "framework_programme") %>%
    mutate(avg_share_coord = 100 * coord_ch / coord_total)

  left_join(total, ch, by = "framework_programme") %>%
    mutate(
      avg_share_particip = 100 * n_ch / n_total,
      avg_share_contrib = 100 * contrib_ch / contrib_all
    ) %>%
    left_join(coord, by = c("framework_programme"))
}

participation <- get_particip(df)
get_avg_particip(df) %>%
  select(starts_with("frame"), starts_with("avg")) %>%
  mutate(
    avg_share_particip = round(avg_share_particip, 2),
    avg_share_contrib = round(avg_share_contrib, 2),
    avg_share_coord = round(avg_share_coord, 2)
  ) %>%
  rename(
    Programme = framework_programme,
    `CH Participation (%)` = avg_share_particip,
    `Contrib. to CH Participants (%)` = avg_share_contrib,
    `CH Coordinations (%)` = avg_share_coord) %>%
  reactable()

For the official figures, please visit SERI's page.

Share Swiss Participations

pal <- c("#6B7AA1", "#11324D")
participation %>%
  ggplot(aes(x = time_period, y = share_particip, fill = framework_programme)) +
  geom_bar(position = "stack", stat = "identity") +
  labs(
    title = "Swiss Participation",
    subtitle = "By semester of project starting date, FP7 and H2020",
    x = "Semester (Project Starting Date)",
    y = "%"
  ) +
  scale_fill_manual(values = pal) +
  own_theme()

Contribution to Swiss Participants

participation %>%
  ggplot(aes(x = time_period, y = share_contrib, fill = framework_programme)) +
  geom_bar(position = "stack", stat = "identity") +
  labs(
    title = "Contributions to Swiss Participants",
    subtitle = "By semester of project starting date, FP7 and H2020",
    x = "Semester (Project Starting Date)",
    y = "%"
  ) +
  scale_fill_manual(values = pal) +
  own_theme()

Swiss Coordinations

participation %>%
  ggplot(aes(x = time_period, y = share_coord, fill = framework_programme)) +
  geom_bar(position = "stack", stat = "identity") +
  labs(
    title = "Swiss Coordinations",
    subtitle = "By semester of project starting date, FP7 and H2020",
    x = "Semester (Project Starting Date)",
    y = "%"
  ) +
  scale_fill_manual(values = pal) +
  scale_shape_manual(values = c(16, 16)) +
  own_theme()

Top Participating Countries {.tabset .tabset-fade .tabset-pills}

Switzerland ranks 7th by total EU contribution in FP7 and 8th in H2020. (The CERN is not taken into account, see CERN).

FP7

Participation count, number of projects (distinct), and total EU contribution for the top 15 participating countries (by total EU contribution).

# find and sort top participants
top_participants <- df %>%
  filter(framework_programme == "FP7") %>%
  group_by(Country = country) %>%
  summarise(contrib_country = sum(re_contribution, na.rm = TRUE)) %>%
  ungroup() %>%
  arrange(desc(contrib_country)) %>%
  mutate(
    country_txt = str_replace(Country, "United Kingdom", "UK"),
    country_txt = sprintf("%s %d.", country_txt, row_number()))

ch_rank <- which(pull(top_participants, Country) == "Switzerland")

pal <- c(
  rep("#DFD8CA", ch_rank - 1),
  "#B91646",
  rep("#DFD8CA", 15 - ch_rank)) %>%
  rev()

top_participants %>%
  mutate(
    country_txt = fct_rev(fct_inorder(country_txt)),
    share = 100 * prop.table(contrib_country)
  ) %>%
  head(15) %>%
  ggplot(aes(x = contrib_country, y = country_txt, fill = country_txt)) +
  geom_col() +
  geom_text(
    aes(label = sprintf("%.1f%%", share)),
    hjust = 1.15,
    size = 4,
    fontface = "bold",
    family = "Gill Sans"
  ) +
  labs(x = NULL, y = NULL) +
  scale_fill_manual(values = pal, guide = "none") +
  theme_void() +
  theme(
    plot.margin = margin(c(4, 2, 4, 8)),
    axis.text.y = element_text(
      margin = margin(t = 0, r = -10, b = 0, l = 5),
      size = 14,
      family = "Gill Sans",
      hjust = 1,
      color = "gray10"
    )
  )

country_participation <- df %>%
  filter(framework_programme == "FP7") %>%
  group_by(
    Country = country,
    `Project Start Year` = start_year,
    Type = org_type
  ) %>%
  summarise(
    `Participation Count` = n(),
    `Nb Projects` = n_distinct(project_id),
    `EU Contribution` = sum(re_contribution, na.rm = TRUE)
  ) %>%
  ungroup() %>%
  arrange(`Type`, `Project Start Year`)

top_participants %>%
  select(Country) %>%
  head(15) %>%
  inner_join(country_participation, by = "Country") %>%
  reactable(
    groupBy = c("Country", "Project Start Year"),
    columns = list(
      `Participation Count` = colDef(
        aggregate = "sum",
        format = colFormat(separators = TRUE)
      ),
      `Nb Projects` = colDef(
        aggregate = "sum",
        format = colFormat(separators = TRUE)
      ),
      `EU Contribution` = colDef(
        aggregate = "sum",
        format = colFormat(
          currency = "EUR",
          separators = TRUE,
          digits = 2
        )
      )
    ),
    highlight = TRUE,
    compact = TRUE
  )

H2020

Participation count, number of projects (distinct), and total EU contribution for the top 15 participating countries (by total EU contribution).

# find and sort top participants
top_participants <- df %>%
  filter(framework_programme == "H2020") %>%
  group_by(Country = country) %>%
  summarise(contrib_country = sum(eu_contribution, na.rm = TRUE)) %>%
  ungroup() %>%
  arrange(desc(contrib_country)) %>%
  mutate(
    country_txt = str_replace(Country, "United Kingdom", "UK"),
    country_txt = sprintf("%s %d.", country_txt, row_number()))

ch_rank <- which(pull(top_participants, Country) == "Switzerland")

pal <- c(
  rep("#DFD8CA", ch_rank - 1),
  "#B91646",
  rep("#DFD8CA", 15 - ch_rank)) %>%
  rev()

top_participants %>%
  mutate(
    country_txt = fct_rev(fct_inorder(country_txt)),
    share = 100 * prop.table(contrib_country)
  ) %>%
  head(15) %>%
  ggplot(aes(x = contrib_country, y = country_txt, fill = country_txt)) +
  geom_col() +
  geom_text(
    aes(label = sprintf("%.1f%%", share)),
    hjust = 1.15,
    size = 4,
    fontface = "bold",
    family = "Gill Sans"
  ) +
  labs(x = NULL, y = NULL) +
  scale_fill_manual(values = pal, guide = "none") +
  theme_void() +
  theme(
    plot.margin = margin(c(4, 2, 4, 8)),
    axis.text.y = element_text(
      margin = margin(t = 0, r = -10, b = 0, l = 5),
      size = 14,
      family = "Gill Sans",
      hjust = 1,
      color = "gray10"
    )
  )

country_participation <- df %>%
  filter(framework_programme == "H2020") %>%
  group_by(
    Country = country,
    `Call Year` = call_year,
    Type = org_type
  ) %>%
  summarise(
    `Participation Count` = n(),
    `Nb Projects` = n_distinct(project_id),
    `EU Contribution` = sum(eu_contribution, na.rm = TRUE)
  ) %>%
  ungroup() %>%
  arrange(`Type`, `Call Year`)

# show with reactable
top_participants %>%
  select(Country) %>%
  head(15) %>%
  inner_join(country_participation, by = "Country") %>%
  reactable(
    groupBy = c("Country", "Call Year"),
    columns = list(
      `Participation Count` = colDef(
        aggregate = "sum",
        format = colFormat(separators = TRUE)
      ),
      `Nb Projects` = colDef(
        aggregate = "sum",
        format = colFormat(separators = TRUE)
      ),
      `EU Contribution` = colDef(
        aggregate = "sum",
        format = colFormat(
          currency = "EUR",
          separators = TRUE,
          digits = 2
        )
      )
    ),
    highlight = TRUE,
    compact = TRUE
  )

CERN

total_cern_fp7 <- df_raw %>%
  filter(country_code == "CH") %>%
  filter(framework_programme == "FP7") %>%
  filter(legal_name == "EUROPEAN ORGANIZATION FOR NUCLEAR RESEARCH") %>%
  summarise(total_eu_contrib = sum(re_contribution, na.rm = TRUE)) %>%
  pull() %>%
  format(big.mark = ",", decimal.mark = ".", trim = TRUE, digits = 11)

total_cern_h2020 <- df_raw %>%
  filter(country_code == "CH") %>%
  filter(framework_programme == "H2020") %>%
  filter(legal_name == "EUROPEAN ORGANIZATION FOR NUCLEAR RESEARCH") %>%
  summarise(total_eu_contrib = sum(eu_contribution, na.rm = TRUE)) %>%
  pull() %>%
  format(big.mark = ",", decimal.mark = ".", trim = TRUE, digits = 11)

EU Contribution to CERN amounted to r str_c("€", total_cern_fp7) under FP7 and to r str_c("€", total_cern_h2020) under H2020.

# TODO: add get_participation to utils.R
df_raw %>%
  filter(country_code == "CH") %>%
  filter(legal_name == "EUROPEAN ORGANIZATION FOR NUCLEAR RESEARCH") %>%
  get_participation()