In RichardMeyer-Eppler/studentenstatistikNRW: Student Statistics from the Landesdatenbank NRW (Table Series 21311)

# suppress a section title from toc #1430
# https://github.com/r-lib/pkgdown/issues/1430
# https://github.com/r-lib/pkgdown/pull/1432/commits/8a4643f855d14f67704d6cfa2ba25fadbc4dfaa2
# https://afeld.github.io/bootstrap-toc/#:~:text=in%20the%20sidebar.-,Skipping,-To%20prevent%20a

# <h2 data-toc-skip>Floating TOC issue
# Random table of contents on a vignette #1349
# https://github.com/r-lib/pkgdown/issues/1349
# documentation on toc depth #1357
# https://github.com/r-lib/pkgdown/issues/1357
# Document _output.yml rather than _site.yml for vignette output format
# https://github.com/r-lib/pkgdown/issues/1764
# https://github.com/r-lib/pkgdown/blob/d5e60a7232ee5f414f2a1de2aca5bf44054e7a08/vignettes/test/output.Rmd#L3-L5

# https://www.w3schools.com/css/css_rwd_viewport.asp
# https://pkgdown.r-lib.org/reference/build_articles.html
# https://stackoverflow.com/questions/34906002/increase-width-of-entire-html-rmarkdown-output

# https://r-pkgs.org/vignettes.html
# https://httr2.r-lib.org/articles/wrapping-apis.html#secret-management
# https://pkgdown.r-lib.org/articles/linking.html

library(studentenstatistikNRW)
library(tibble)

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

As of March 13, 2024, the Federal Statistical Office of Germany (Destatis) lists a total of 427 institutions of higher education in the country. The table below lists most of these institutions, together with links providing a wide variety of additional information.

title <- "Institutions of Higher Education in Germany"
#subtitle <- "Sub"
source_note_link <- "https://richardmeyer-eppler.github.io/studentenstatistikNRW"
source_note <- glue::glue(
  "<a href = {source_note_link}>{source_note_link}</a>"
)
#max_rows <- nrow(hochschulen)
max_rows <- 200L
table_width <- 1200L
university_logo_height <- 45L

org_urls <- list(
  eu_pic = "https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/how-to-participate/participant-register",
  gepris = "https://gepris.dfg.de/gepris/OCTOPUS?language=en",
  gerit = "https://gerit.org/en/",
  hrk = "https://www.hochschulkompass.de/en/higher-education-institutions/downloads.html",
  ror = "https://ror.org",
  whed = "https://www.whed.net",
  risis = "https://register.orgreg.joanneum.at"
)

ort_urls <- list(
  daad_cities = "https://www.study-in-germany.de/en/germany/cities/",
  daad_che_cities = "https://www.daad.de/en/studying-in-germany/universities/che-ranking/?che-a=universities-and-towns",
  zeit_che_cities = "https://studiengaenge.zeit.de/studienorte/deutschland"
)

university_urls <- list(
  daad_che = "https://www.daad.de/en/studying-in-germany/universities/che-ranking/?che-a=universities-and-towns",
  studieren_de = "https://studieren.de/hochschulliste.0.html",
  zeit_che = "https://studiengaenge.zeit.de/hochschulen"
)

pattern <- c(
  "facebook",
  "instagram",
  "mastodon",
  "x-twitter",
  "youtube",
  "tiktok",
  "linkedin"
) |> 
  stringr::str_flatten(
    collapse = "|"
  )

neg_pattern <- c(
  "square",
  "facebook-f",
  "facebook-messenger",
  "fab fa-linkedin-in"
) |> 
  stringr::str_flatten(
    collapse = "|"
  )

fa_icons <- fontawesome::fa_metadata()[["icon_names_full_fab"]] |> 
  stringr::str_subset(
    pattern = pattern
  ) |> 
  stringr::str_subset(
    pattern = neg_pattern,
    negate = TRUE
  )

fa_icon <- function(name, height = "1.1em", fill = "#333333") {
  fontawesome::fa(
    name = name,
    fill = fill,
    height = height
  ) |> 
  as.character() |> 
  gt::html()
}

social_media_icons <- function(...) {

  dots <- rlang::list2(...)

  if (
    length(dots) > 0 && !rlang::is_named(dots)
  ) {
    cli::cli_abort("All elements of {.arg ...} must be named.")
  }

  names <- names(dots)

  dots_not_na <- !is.na(dots)

  fa_icons <- purrr::map(
    names,
    fa_icon
  )

  fa_icons_with_link <- purrr::map2(
    fa_icons[dots_not_na], 
    dots[dots_not_na],
    \(x, y) htmltools::a(
      x,
      title = y,
      href = y,
      target = "_blank"
    )
  )

  div <- htmltools::div(
    title = "Social Media Icons",
    role = "img",
    fa_icons_with_link
  )

  div_out <- as.character(div) |> 
    gt::html()

  return(div_out)
}

# tibble::tribble(
#   ~linkedin, ~tiktok,
#  "https://www.facebook.com/unikassel", "https://www.instagram.com/unikassel",
#   NA, "https://www.instagram.com/unikassel"
# ) |>
#   dplyr::rowwise() |>
#   dplyr::mutate(
#     social_media = list(
#       social_media_icons(
#         "fab fa-linkedin" = linkedin,
#         "fab fa-tiktok" = tiktok
#       )
#     )
#   ) |>
#   dplyr::ungroup() |>
#   gt::gt()

# https://www.w3schools.com/tags/tag_summary.asp
# https://themockup.blog/posts/2020-10-31-embedding-custom-features-in-gt-tables/

# label_socal_media <- function(x) {
#   label <- dplyr::case_when(
#     stringr::str_detect(
#       x,
#       pattern = "x\\.com"
#     ) ~ "x",
#     stringr::str_detect(
#       x,
#       pattern = "facebook"
#     ) ~ "fb",
#     stringr::str_detect(
#       x,
#       pattern = "youtube"
#     ) ~ "yt",
#     stringr::str_detect(
#       x,
#       pattern = "instagram"
#     ) ~ "insta",
#     stringr::str_detect(
#       x,
#       pattern = "linkedin",
#     ) ~ "linkedin",
#     stringr::str_detect(
#       x,
#       pattern = "p=4033"
#     ) ~ "mastodon",
#     .default = NA_character_
#   )
#   
#   return(label)
# }

label_ranking <- function(x) {
  label <- dplyr::case_when(
    stringr::str_detect(
      x,
      pattern = "umultirank"
    ) ~ "umultirank",
    stringr::str_detect(
      x,
      pattern = "daad"
    ) ~ "che",
    stringr::str_detect(
      x,
      pattern = "timeshighereducation"
    ) ~ "the",
    stringr::str_detect(
      x,
      pattern = "shanghairanking"
    ) ~ "arwu",
    stringr::str_detect(
      x,
      pattern = "topuniversities"
    ) ~ "qs",
    .default = NA_character_
  )

  return(label)
}

create_span <- function(string, label) {

  # background-color: rgba(255, 0, 0, 0.5)
  if(is.na(string)) {
    span <- glue::glue(
      '<span style="white-space: pre;"><a" target="_blank" style="color:rgba(0, 0, 0);display: inline-block;">{label}</a></span>'
    )
  } else {
    span <- glue::glue(
      '<span style="white-space: pre;"><a href="{string}" target="_blank" style="color:#008B8B;text-decoration:underline;text-underline-position: under;display: inline-block;">{label}</a></span>'
    )
  }

  return(span)
}

build_wiki_span <- function(urls, labels) {

  wiki_span <- purrr::map2(
    urls,
    labels,
    create_span
  ) |> 
    purrr::discard(
      is.na
    ) |> 
    purrr::list_c() |> 
    stringr::str_flatten(
      collapse = " "
    )

  return(wiki_span)
}


file_name <- function(path) {

  # path <- "https://upload.wikimedia.org/wikipedia/commons/0/0e/Hochschule_f%C3%BCr_Oekonomie_%26_Management_2012_logo.svg"

  file_path <- path |> 
      fs::path_file()

  file_ext <- file_path |> 
    fs::path_ext()

  file_name <- file_path |> 
      fs::path_ext_remove() |> 
      fs::path_sanitize() |> 
      stringr::str_replace_all(
        pattern = "[[:punct:]]",
        replacement = "_"
      )

  file_name_full <- paste0(
    file_name,
    ".",
    file_ext
  )

  return(file_name_full)
}

img_path <- function(logo_url) {

  nas <- is.na(logo_url)

  logo_url_decoded <- URLdecode(logo_url)

  file <- logo_url_decoded |> 
    file_name()

  img_path <- fs::path(
    here::here(),
    file
  )

  img_path[nas] <- NA

  return(img_path)
}

build_logo_html <- function(logo_url, hochschule_url, height = university_logo_height, data_uri = NULL) {
  # https://www.w3schools.com/html/html_images.asp
  # https://www.smashingmagazine.com/2020/03/setting-height-width-images-important-again/
  # https://www.w3schools.com/tags/att_img_height.asp

  # stringi::stri_unescape_unicode(
  #   gsub("%", paste0("\\", "u00"), logo_url_written[16], fixed = T)
  # )
  # hochsch
  #       logo_url = logo_url,
  #     hochschule_url = url
  # URLdecode(logo_url_written[16])
  # 

  img_path <- if(
    is.na(logo_url)
  ) {
    img_path <- gt::test_image()
  } else {
    img_path <- img_path(logo_url)
  }

  hochschule_url_na <- ifelse(
    is.na(hochschule_url),
    "#0",
    hochschule_url
  )

  file_ext <- tolower(
    dplyr::coalesce(
      fs::path_ext(img_path),
      NA_character_
    )
  )

  if(!is.na(data_uri)) {
    img_tag <- htmltools::a(
      href = hochschule_url_na,
      target = "_blank",
      htmltools::img(
        src = data_uri
        ,style = glue::glue(
          "height:{university_logo_height}px;border:0px none;"
        )
      )
    ) 

    img_tag_gt <- img_tag |> 
      as.character() |> 
      gt::html()

    return(img_tag_gt)
  }

  if(file_ext == "svg") {
    img_tag <- htmltools::a(
      href = hochschule_url_na,
      target = "_blank",
      htmltools::img(
        src = logo_url
      ),
      style = glue::glue(
          "height:{university_logo_height}px;border:0px none;"
       )
    )

    img_tag_gt <- img_tag |> 
      as.character() |> 
      gt::html()

    return(img_tag_gt)
  }

  img_uri <- gt:::get_image_uri(
    file = img_path
  )

  img_tag <- htmltools::a(
    href = hochschule_url_na,
    target = "_blank",
    htmltools::img(
      src = img_uri
      ,style = glue::glue(
          "height:{university_logo_height}px;border:0px none;"
       )
    )
  )

  img_tag_gt <- img_tag |> 
    as.character() |> 
    gt::html()

  return(img_tag_gt)

  # img_tag <- gt::local_image(
  #   filename = img_path,
  #   height = height
  # )
  # 
  # pattern_backslash <- stringr::regex(
  #   '(?<=\\()(.*?)(?=\\))'
  # )
  # 
  # stringr::str_remove_all(
  #   gt::local_image(gt::test_image(type="svg")),
  #   stringr::fixed(
  #     r'(\\)'
  #   )
  # )
  # 

  # 
  # htmltools::a(
  #   href = "wwww.test.ca",
  #  # href = hochschule_url_na,
  #   target = "_blank",
  #   gt::local_image(gt::test_image(type="svg"))
  # )
  # 
  # test <- gt::local_image(gt::test_image(type="svg")) |> htmltools::as.tags()
  # 
  # test$style
  # 
  # img_tag_wrangled <- img_tag |> 
  #   stringr::str_replace(
  #      pattern = glue::glue(
  #        'style=\\"height:{height}px;\\">'
  #       ),
  #      replacement = glue::glue(
  #        'style=\\"height:{height}px;border:0px none;\\"</a>'
  #      )
  #   )
  # 
  # html <- glue::glue(
  #   '<a href="{hochschule_url_na}" target="_blank"> {img_tag_wrangled}'
  # )

 # return(html)
}

write_img <- function(logo_url, height = university_logo_height) {

  if(is.na(logo_url)) {return(fs::path(NA))}

  img_path <- img_path(logo_url)
  img_ext <- tolower(
    fs::path_ext(
      img_path
    )
  )

  if(img_ext == "svg") {
    download.file(
      url = URLdecode(logo_url),
      destfile = img_path,
      quiet = TRUE,
      cacheOK = TRUE,
      method = "libcurl"
    )

    return(img_path)
  }

  if(img_ext == "svg") {
    img <- magick::image_read_svg(
      logo_url
    )
  } else {
    img <- magick::image_read(
      logo_url
    )
  }

  img_trimmed <- magick::image_trim(
    img
  )

  img_resized <- magick::image_resize(
    img_trimmed,
    paste0(
      "x",
      height,
      ">"
    )
  )

  magick::image_write(
    img_resized,
    img_path
  )

  return(img_path)
}

logo_url <- hochschulen[c(1:max_rows),] |>
    col_to_url(
      dplyr::everything()
    ) |>
    dplyr::mutate(
      logo_url = dplyr::case_when(
        tolower(
          fs::path_ext(
            Wiki_Logo_DE_URL
          )
        ) == "svg" &
        # Uni Jena == 11615
        Hochschulart_Hochschule_Signatur != "11615" ~ Wiki_Logo_DE_URL,
        .default = Wiki_Logo_DE_Thumbnail_URL
      )
    ) |> 
    dplyr::pull(
      logo_url
   )

# Using purrr shouldn't be necessary because Magick functions are vectorized, but I kept getting errors
# purrr::map_chr(logo_url, write_img)

write_img_safely <- purrr::safely(
  write_img
)

#debugonce(write_img)

list_logo_url_written <- purrr::map(
  #"https://upload.wikimedia.org/wikipedia/commons/e/e5/Ebc-hochschule-logo.jpg",
  logo_url,
  write_img_safely
  #logo_url[540],
  #write_img
)

logo_url_written <- list_logo_url_written |> 
  purrr::map(
    "result"
  ) |> 
  purrr::list_c()

id_function <- function(label, ...) {
  dots <- rlang::list2(...)

  nas <- purrr::map_chr(
    dots,
    "org_id_label"
  ) |> 
    is.na()

  # If all IDs are NA return just the label
  if(
    length(
      dots[!nas]
    ) == 0
  ) {
    return(label)
  }

  create_table_row <- function(
    list, 
    a_style = "color:#008B8B;text-decoration:underline;text-underline-position: under;display:inline-block;"
  ) {
    org_url <- list$org_url
    org_label <- list$org_label
    org_id_url <- list$org_id_url
    org_id_label <- list$org_id_label

    # Inherits from div.rt-td-inner min-width: 100px; width: 100px;

    html <- htmltools::tags$tr(
      style = "min-width:200px",
      htmltools::tags$td(
        htmltools::tags$a(
          href = org_url,
          style = a_style,
          target = "_blank",
          org_label
        )
      ),
      htmltools::tags$td(
        htmltools::tags$a(
          href = org_id_url,
          style = a_style,
          target = "_blank",
          org_id_label
        )
      )
    )
  }

  list_table_rows <- purrr::map(
    dots[!nas],
    create_table_row
  )

  table_out <- htmltools::tags$details(
    htmltools::tags$summary(
      label
    ),
    htmltools::tags$table(
      list_table_rows
    )
  ) |> 
  as.character() |> 
  gt::html()

  return(table_out)
}

rankings_html <- function(...) {

  dots <- rlang::list2(...)
  dots_not_na <- !is.na(dots)

  labels <- purrr::map_chr(
    dots[dots_not_na],
    label_ranking
  )

  # Could do this in css
  # https://css-tricks.com/how-to-add-commas-between-a-list-of-items-dynamically-with-css/
  # https://blog.union.io/code/2018/03/07/add-commas-with-css/
  labels_with_comma <- dplyr::case_when(
    labels != labels[length(labels)] ~ paste0(
      labels,
      ","
    ),
    .default = labels
  )

  create_span <- function(
    url, 
    url_label,
    a_style = "color:#008B8B;text-decoration:underline;text-underline-position: under;"
  ) {

     html <- htmltools::span(
      #style = "white-space: pre;",
      htmltools::a(
        {url_label},
        href = url,
        target = "_blank",
        style = a_style
      )
    )

     return(html)
  }

  list_spans <- purrr::map2(
    dots[dots_not_na],
    labels_with_comma,
    create_span
  ) |> 
    htmltools::tagList()

  spans_out <- list_spans |> 
    as.character() |> 
    gt::html()

  return(spans_out)
}

#debugonce(label_socal_media)
#debugonce(rankings_html)

# tibble::tribble(
#   ~umultirank, ~the,
#   "https://www.umultirank.org/study-at/university-of-kassel-rankings/", "https://www.timeshighereducation.com/world-university-rankings/university-kassel"
# ) |>
#   dplyr::rowwise() |>
#   dplyr::mutate(
#     rankings = rankings_html(
#         umultirank,
#         the
#     )
#   ) |>
#   dplyr::ungroup() |>
#   dplyr::mutate(
#     rankings = purrr::map(
#       rankings,
#       gt::html
#     )
#   ) |>
#  gt::gt()

hochschulen[c(1:max_rows),] |> 
  dplyr::mutate(
    gerit_id = gepris_id,
    zeit_che_university = `Hs-Nr.`,
    CHE_DAAD_University_ID = as.character(
      CHE_DAAD_University_ID
    ),
    CHE_DAAD_University_ID_orig = CHE_DAAD_University_ID,
    zeit_che_university_orig = zeit_che_university,
    gepris_id_orig = gepris_id,
    gerit_id_orig = gerit_id,
    eu_pic_orig = eu_pic,
    hs_nr_orig = `Hs-Nr.`, #`[[`(hochschulen, "Hs-Nr."),
    ror_id_orig = ROR_ID,
    risis_orgreg_entity_id_orig = RISIS_ORGREG_ENTITY_ID,
    studieren_id_orig = STUDIEREN_ID,
    whed_id_orig = WHED_ID,
    logo = fs::path_file(
      img_path(
        Wiki_Logo_DE_URL
      )
    )
  ) |> 
   dplyr::left_join(
    hochschulen_logos,
    by = dplyr::join_by(
      logo
    ),
    na_matches = "never"
  ) |> 
  dplyr::select(
    -logo
  ) |> 
  col_to_url(
    dplyr::everything()
  ) |> 
  dplyr::mutate(
    DAAD_ORT_LABEL = dplyr::if_else(
      is.na(DAAD_ORT),
      NA,
      Ort
    ),
    CHE_DAAD_Town_ID_Label = dplyr::if_else(
      is.na(CHE_DAAD_Town_ID),
      NA,
      Ort
    ),
    STUDIEREN_HS_LABEL = dplyr::if_else(
      is.na(STUDIEREN_ID),
      NA,
      studieren_id_orig
    ),
    CHE_DAAD_HS_LABEL = dplyr::if_else(
      is.na(CHE_DAAD_University_ID),
      NA,
      CHE_DAAD_University_ID_orig
    ),
    ZEIT_CHE_HS_LABEL = dplyr::if_else(
      is.na(zeit_che_university),
      NA,
      zeit_che_university_orig
    ),
    logo_url = logo_url_written
  ) |> 
  dplyr::rowwise() |>
  dplyr::mutate(
    IDs = list(
      id_function(
        label = Hochschulart_Hochschule_Signatur,
        list(
          org_url = org_urls[["eu_pic"]],
          org_label = 'EU PIC',
          org_id_url = eu_pic,
          org_id_label = eu_pic_orig
        ),
        list(
          org_url = org_urls[["gepris"]],
          org_label = 'GEPRIS',
          org_id_url = gepris_id,
          org_id_label = gepris_id_orig
        ),
        list(
          org_url = org_urls[["gerit"]],
          org_label = 'GERiT',
          org_id_url = gerit_id,
          org_id_label = gerit_id_orig
        ),
        list(
          org_url = org_urls[["hrk"]],
          org_label = 'HRK',
          org_id_url = `Hs-Nr.`,
          org_id_label = hs_nr_orig
        ),
        list(
          org_url = org_urls[["risis"]],
          org_label = 'RISIS',
          org_id_url = RISIS_ORGREG_ENTITY_ID,
          org_id_label = risis_orgreg_entity_id_orig
        ),
        list(
          org_url = org_urls[["ror"]],
          org_label = 'ROR',
          org_id_url = ROR_ID,
          org_id_label = ror_id_orig
        ),
        list(
          org_url = org_urls[["whed"]],
          org_label = 'WHED',
          org_id_url = WHED_ID,
          org_id_label = whed_id_orig
        )
      )
    ),
    Ort = list(
      id_function(
        label = Ort,
        list(
          org_url = ort_urls[["daad_cities"]],
          org_label = 'DAAD',
          org_id_url = DAAD_ORT,
          org_id_label = DAAD_ORT_LABEL
        ),
        list(
          org_url = ort_urls[["daad_che_cities"]],
          org_label = 'DAAD CHE',
          org_id_url = CHE_DAAD_Town_ID,
          org_id_label = CHE_DAAD_Town_ID_Label
        )
      )
    ),
    Hochschule_EN = list(
      id_function(
        label = Hochschule_EN,
        list(
          org_url = university_urls[["daad_che"]],
          org_label = 'DAAD CHE',
          org_id_url = CHE_DAAD_University_ID,
          org_id_label = CHE_DAAD_HS_LABEL
        ),
        list(
          org_url = university_urls[["studieren_de"]],
          org_label = 'studieren.de',
          org_id_url = STUDIEREN_ID,
          org_id_label = STUDIEREN_HS_LABEL
        ),
        list(
          org_url = university_urls[["zeit_che"]],
          org_label = 'ZEIT CHE',
          org_id_url = zeit_che_university,
          org_id_label = ZEIT_CHE_HS_LABEL
        )
      )
    ),
    Wiki = build_wiki_span(
      urls = c(
        Wiki_ID_DE,
        Wiki_ID_EN,
        Wikidata_ID
      ),
      labels = c(
        'de',
        'en',
        'data'
      )
    ),
    Logo = build_logo_html(
      logo_url = logo_url,
      hochschule_url = url,
      data_uri = logo_data_uri
    ),
    social_media = list(
      social_media_icons(
        "fab fa-facebook" = facebook_id, 
        "fab fa-instagram" = insta_id,
        "fab fa-linkedin" = linkedin,
        "fab fa-mastodon" = mastodon_id,
        "fab fa-tiktok" = tiktok,
        "fab fa-x-twitter" = x_user,
        "fab fa-youtube" = youtube_channel_id
      )
    ),
    Rankings = list(
      rankings_html(
        arwu_id,
        CHE_DAAD_University_ID,
        Multirank,
        the_id,
        qs_world_id
      )
    )
  ) |>
  dplyr::ungroup() |> 
  dplyr::mutate(
    Logo = purrr::map(
      Logo,
      gt::html
    ),
    Wiki = purrr::map(
      Wiki,
      gt::html
    ),
    Rankings = purrr::map(
      Rankings,
      gt::html
    )
  ) |> 
  dplyr::select(
    Logo,
    IDs,
    Ort,
    Hochschule_EN,
    Wiki,
    Rankings,
    social_media
  ) |> 
  gt::gt(
    id = "hochschulen"
    # rowname_col = "Hochschule_Signatur",
    # rownames_to_stub  = TRUE
  ) |>
  gt::tab_header(
    title = title
    #,subtitle = subtitle
  ) |> 
  gt::tab_source_note(
    source_note = gt::html(
      source_note
    )
  ) |> 
  gt::cols_label(
    Hochschule_EN = "Hochschule",
    social_media = "Social Media"
  ) |> 
  gt::cols_align(
    align = "left",
    columns = dplyr::everything()
  ) |> 
  gt::cols_align(
    align = "right",
    columns = Logo
  ) |>
  gt::cols_width(
    Logo ~ gt::px(
      325L
    ),
    IDs ~ gt::px(
      150L
    ),
    Hochschule_EN ~ gt::px(
      250L
    )
  ) |> 
  gt::opt_interactive(
    #use_search = TRUE, # not really useful with so many columns having multiple values
    use_highlight = TRUE,
    use_page_size_select = TRUE,
    page_size_default = 50
  ) |> 
  gt::tab_style(
    style = gt::cell_text(
      align = "left"
    ),
    locations = gt::cells_body(
      columns = c(
        IDs, Ort, Hochschule_EN
      )
    )
  ) |> 
  gt::tab_options(
    container.width = gt::px(table_width),
    table.width = gt::px(table_width)
  ) |> 
  gt::opt_css(
    css = "
    #hochschulen rt-td{
      flex-wrap:wrap;
      align-items:center;
    }
    #hochschulen .rt-align-left{
      align-items:center;
    }
    #hochschulen .rt-align-right{
      align-items:center;
    }
    "
  )

# <style type="text/css">
# .container template-article{
#   max-width: 1500px !important;
# }
# </style>

Data source

The data is sourced from the following tables and lists:

The key tables for the student statistics (table series 372) provided by the Federal Statistical Agency (Destatis) on its data collection portal (Erhebungsportal). There are four key tables / csv files containing data on institutions of higher education in Germany: 1.) Hochschule, 2.) HochschulStandort, 3.) HochschuleErsteinschreibung, and 4.) HochschulFachbereich, each with slight differences in how and where institutions are counted (e.g. when a university has subsidiary campuses in other cities and German states, or in the classification of university hospitals),
the list of all institutions of higher education in Germany provided by the German Rector's Conference (HRK). The Federal Statistical Agency data together with the HRK list are the "most authoritative German numbers" that exist.
the German and English Wikipedia article for each institution,
the Wikidata entry for each institution,
Logos are sourced as data URIs from Wikimedia Commons (preferred), or from the German or English Wikipedia entry. If available, SVGs are preferred to raster formats.
as described in vignette("ror"), we use the global Research Organization Registry (ROR) REST API to retrieve additional data and identifiers which are then matched to the existing data.

Identifiers

If available, the following identifiers for each institution are included:

the 9-digit Participant Identification Code (PIC) used in the EU Funding & Tenders Portal (Wikidata Property P5785),
the identifier for the GEPRIS database of funded research projects, published by the German Research Foundation (DFG) (Wikidata Property P4870),
the German Research Institution (GERiT) ID of each institution (see https://gerit.org/en/about),
the ID used by the German Rector's Conference (HRK),
the ETER (European Tertiary Education Register) ID by the Research Infrastructure for Research and Innovation Studies (RSIS) see https://register.orgreg.joanneum.at/#/about/1). Note that a free account is required to use their services.
the Research Organization Registry (ROR) ID (see vignette("ror") and Wikidata Property P6782),
the ID of the World Higher Education Database (WHED) by the International Association of Universities.

City

If available, the following websites based on the institution's city are linked:

the German Foreign Exchange Service page,
the CHE University Ranking page as hosted by the German Academic Exchange Service.

Institutions of higher education

If available, the following websites providing further details on each instituion of higher eduation and their programs are linked:

the CHE University Ranking as hosted by the German Academic Exchange Service,
the studieren.de page,
the CHE University Ranking as hosted by ZEIT Online / Hey Studium.

Rankings

If available, the following rankings are linked:

the CHE University Ranking as hosted by the German Academic Exchange Service,
the U-Multirank (Wikidata Property P5600),
the Times Higher Education Ranking (Wikidata Property P5586),
the Shanghai Ranking (Wikidata Property P5242),
the QS World University Rankings (Wikidata Property P5584).

Social Media

If available, these social media sites are linked:

APIs

The following APIs were helpful in creating the data above:

YouYoube Data API v3 for retrieving the proper channel IDs,
Wiki Core REST API to search for Wikipedia entries and retrieve translated pages,
Wikidata SPARQL query service to obtain data from Wikidata pages (can be queried through the {WikidataR} package),
SOAP XML web service from Destatis to obtain data from the Federal Statistical Office of Germany (can be queried through the {wiesbaden} package).

A rant about the Higher Education Compass / Hochschulkompass and data licences

One obvious addition to the ?hochschulen dataset would be a link to the Higher Education Compass / Hochschulkompass by the HRK. Unfortunately, the compass website uses a lot of obfuscating Javascript to make direct links very difficult. Now, the HRK is a semi-public institution funded by the Stiftung zur Förderung der Hochschulrektorenkonferenz. This foundation mainly raises its funds through grants from the Länder and from the Federal Ministry of Education and Research. Moreover, the mostly public members (around 271 institutions of higher education) of the HRK pay annual membership fees.

According to the website, "all information found in the Higher Education Compass is authorized by the universities and is updated by employees at the universities themselves". Against this backdrop, it is incomprehensible that the HRK deems it worthwhile to protect the Higher Education Compass data. Use of this data by the public and/or private competitors to provide better offerings with additional data or different UI should be encouraged, not made more difficult! After all, this is non-confidential data on public institutions largely paid for by taxpayers' money, administered by staff mostly in the public sector...

The ?hochschulen dataset consciously consists almost exclusively of identifiers and links, so as to avoid data licencing issues. Particularly the private providers, like ZEIT or studieren.de, naturally have a vested business interest to protect their data and to discourage direct links as best they can.

About this project

Like most good projects, this one started out of a sense of annoyance. In particular, I looked at the Destatis list of 427 institutions of higher education and realized that they did NOT provide a detailed breakdown of the members institutions in each category. For the time being, only the Länder statistical agencies provide this level of detail. Moreover, I noticed that the list of all institutions of higher education in Germany provided by the German Rector's Conference (HRK) did not use the instituional identifiers used by the Länder statistical agencies (as one would expect). Nor did their numbers match up with Destatis. I will admit, writing these two slights down, I may come off as easily annoyed - but I can assure you, these sort of things can be heart-wrenching, really!

Annoyed as I was, I started to dig, and dig, and dig a little deeper still. I discovered along the way that trying to count institutions of higher education was by no means a particularly innovative endeavor - after all, ETER does it at the European level, WHED does it for the global level, GERiT and ROR do it for research institutions (slightly different emphasis). Had I known about these initiatives from the start - rather than stumbling upon them during the process - I would have avoided a lot of work. Alas, the proof of the pudding is in the eating...

Aims of this project

A lot of the data assembled in the ?hochschulen dataset will find its more permanent home on Wikidata - another great resource I have only come to appreciate as this project has progressed. For the time being, the idea is that this data set can save other people time matching up different data sources and IDs. People who do work on rankings, or people who work in communication departments come to mind, but there are certainly lots of other applications.

To Dos

Add bundesland to gt table
Consistent use of English in the ?hochschulen dataset, rename German columns
Document every column of the ?hochschulen dataset
Crop whitespace for svg logo images, e.g. for the Chemnitz University of Technology (11305)
The CHE-Ranking is currently listed twice: in the hochschule column and in the ranking column
The HRK ID in the ID column should have no URL attached to it
Find a better home for this article (own R package? Personal website?)
Find a way to get rid of the floating TOC for this vignette
Cite R packages used
Updata Wikidata with missing social media links and other identifiers
Provide csv download of raw data

fs::file_delete(
  unique(logo_url_written[!is.na(logo_url_written)])
)

RichardMeyer-Eppler/studentenstatistikNRW documentation built on July 27, 2024, 3:14 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

RichardMeyer-Eppler/studentenstatistikNRW
Student Statistics from the Landesdatenbank NRW (Table Series 21311)

In RichardMeyer-Eppler/studentenstatistikNRW: Student Statistics from the Landesdatenbank NRW (Table Series 21311)

Data source

Identifiers

City

Institutions of higher education

Rankings

Social Media

APIs

A rant about the Higher Education Compass / Hochschulkompass and data licences

About this project

Aims of this project

To Dos

R Package Documentation

Browse R Packages

We want your feedback!

RichardMeyer-Eppler/studentenstatistikNRW Student Statistics from the Landesdatenbank NRW (Table Series 21311)

In RichardMeyer-Eppler/studentenstatistikNRW: Student Statistics from the Landesdatenbank NRW (Table Series 21311)

Data source

Identifiers

City

Institutions of higher education

Rankings

Social Media

APIs

A rant about the Higher Education Compass / Hochschulkompass and data licences

About this project

Aims of this project

To Dos

R Package Documentation

Browse R Packages

We want your feedback!

RichardMeyer-Eppler/studentenstatistikNRW
Student Statistics from the Landesdatenbank NRW (Table Series 21311)