# suppress a section title from toc #1430
# https://github.com/r-lib/pkgdown/issues/1430
# https://github.com/r-lib/pkgdown/pull/1432/commits/8a4643f855d14f67704d6cfa2ba25fadbc4dfaa2
# https://afeld.github.io/bootstrap-toc/#:~:text=in%20the%20sidebar.-,Skipping,-To%20prevent%20a

# <h2 data-toc-skip>Floating TOC issue
# Random table of contents on a vignette #1349
# https://github.com/r-lib/pkgdown/issues/1349
# documentation on toc depth #1357
# https://github.com/r-lib/pkgdown/issues/1357
# Document _output.yml rather than _site.yml for vignette output format
# https://github.com/r-lib/pkgdown/issues/1764
# https://github.com/r-lib/pkgdown/blob/d5e60a7232ee5f414f2a1de2aca5bf44054e7a08/vignettes/test/output.Rmd#L3-L5

# https://www.w3schools.com/css/css_rwd_viewport.asp
# https://pkgdown.r-lib.org/reference/build_articles.html
# https://stackoverflow.com/questions/34906002/increase-width-of-entire-html-rmarkdown-output

# https://r-pkgs.org/vignettes.html
# https://httr2.r-lib.org/articles/wrapping-apis.html#secret-management
# https://pkgdown.r-lib.org/articles/linking.html

library(studentenstatistikNRW)
library(tibble)

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

As of March 13, 2024, the Federal Statistical Office of Germany (Destatis) lists a total of 427 institutions of higher education in the country. The table below lists most of these institutions, together with links providing a wide variety of additional information.

title <- "Institutions of Higher Education in Germany"
#subtitle <- "Sub"
source_note_link <- "https://richardmeyer-eppler.github.io/studentenstatistikNRW"
source_note <- glue::glue(
  "<a href = {source_note_link}>{source_note_link}</a>"
)
#max_rows <- nrow(hochschulen)
max_rows <- 200L
table_width <- 1200L
university_logo_height <- 45L

org_urls <- list(
  eu_pic = "https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/how-to-participate/participant-register",
  gepris = "https://gepris.dfg.de/gepris/OCTOPUS?language=en",
  gerit = "https://gerit.org/en/",
  hrk = "https://www.hochschulkompass.de/en/higher-education-institutions/downloads.html",
  ror = "https://ror.org",
  whed = "https://www.whed.net",
  risis = "https://register.orgreg.joanneum.at"
)

ort_urls <- list(
  daad_cities = "https://www.study-in-germany.de/en/germany/cities/",
  daad_che_cities = "https://www.daad.de/en/studying-in-germany/universities/che-ranking/?che-a=universities-and-towns",
  zeit_che_cities = "https://studiengaenge.zeit.de/studienorte/deutschland"
)

university_urls <- list(
  daad_che = "https://www.daad.de/en/studying-in-germany/universities/che-ranking/?che-a=universities-and-towns",
  studieren_de = "https://studieren.de/hochschulliste.0.html",
  zeit_che = "https://studiengaenge.zeit.de/hochschulen"
)
pattern <- c(
  "facebook",
  "instagram",
  "mastodon",
  "x-twitter",
  "youtube",
  "tiktok",
  "linkedin"
) |> 
  stringr::str_flatten(
    collapse = "|"
  )

neg_pattern <- c(
  "square",
  "facebook-f",
  "facebook-messenger",
  "fab fa-linkedin-in"
) |> 
  stringr::str_flatten(
    collapse = "|"
  )

fa_icons <- fontawesome::fa_metadata()[["icon_names_full_fab"]] |> 
  stringr::str_subset(
    pattern = pattern
  ) |> 
  stringr::str_subset(
    pattern = neg_pattern,
    negate = TRUE
  )
fa_icon <- function(name, height = "1.1em", fill = "#333333") {
  fontawesome::fa(
    name = name,
    fill = fill,
    height = height
  ) |> 
  as.character() |> 
  gt::html()
}

social_media_icons <- function(...) {

  dots <- rlang::list2(...)

  if (
    length(dots) > 0 && !rlang::is_named(dots)
  ) {
    cli::cli_abort("All elements of {.arg ...} must be named.")
  }

  names <- names(dots)

  dots_not_na <- !is.na(dots)

  fa_icons <- purrr::map(
    names,
    fa_icon
  )

  fa_icons_with_link <- purrr::map2(
    fa_icons[dots_not_na], 
    dots[dots_not_na],
    \(x, y) htmltools::a(
      x,
      title = y,
      href = y,
      target = "_blank"
    )
  )

  div <- htmltools::div(
    title = "Social Media Icons",
    role = "img",
    fa_icons_with_link
  )

  div_out <- as.character(div) |> 
    gt::html()

  return(div_out)
}

# tibble::tribble(
#   ~linkedin, ~tiktok,
#  "https://www.facebook.com/unikassel", "https://www.instagram.com/unikassel",
#   NA, "https://www.instagram.com/unikassel"
# ) |>
#   dplyr::rowwise() |>
#   dplyr::mutate(
#     social_media = list(
#       social_media_icons(
#         "fab fa-linkedin" = linkedin,
#         "fab fa-tiktok" = tiktok
#       )
#     )
#   ) |>
#   dplyr::ungroup() |>
#   gt::gt()
# https://www.w3schools.com/tags/tag_summary.asp
# https://themockup.blog/posts/2020-10-31-embedding-custom-features-in-gt-tables/

# label_socal_media <- function(x) {
#   label <- dplyr::case_when(
#     stringr::str_detect(
#       x,
#       pattern = "x\\.com"
#     ) ~ "x",
#     stringr::str_detect(
#       x,
#       pattern = "facebook"
#     ) ~ "fb",
#     stringr::str_detect(
#       x,
#       pattern = "youtube"
#     ) ~ "yt",
#     stringr::str_detect(
#       x,
#       pattern = "instagram"
#     ) ~ "insta",
#     stringr::str_detect(
#       x,
#       pattern = "linkedin",
#     ) ~ "linkedin",
#     stringr::str_detect(
#       x,
#       pattern = "p=4033"
#     ) ~ "mastodon",
#     .default = NA_character_
#   )
#   
#   return(label)
# }

label_ranking <- function(x) {
  label <- dplyr::case_when(
    stringr::str_detect(
      x,
      pattern = "umultirank"
    ) ~ "umultirank",
    stringr::str_detect(
      x,
      pattern = "daad"
    ) ~ "che",
    stringr::str_detect(
      x,
      pattern = "timeshighereducation"
    ) ~ "the",
    stringr::str_detect(
      x,
      pattern = "shanghairanking"
    ) ~ "arwu",
    stringr::str_detect(
      x,
      pattern = "topuniversities"
    ) ~ "qs",
    .default = NA_character_
  )

  return(label)
}

create_span <- function(string, label) {

  # background-color: rgba(255, 0, 0, 0.5)
  if(is.na(string)) {
    span <- glue::glue(
      '<span style="white-space: pre;"><a" target="_blank" style="color:rgba(0, 0, 0);display: inline-block;">{label}</a></span>'
    )
  } else {
    span <- glue::glue(
      '<span style="white-space: pre;"><a href="{string}" target="_blank" style="color:#008B8B;text-decoration:underline;text-underline-position: under;display: inline-block;">{label}</a></span>'
    )
  }

  return(span)
}

build_wiki_span <- function(urls, labels) {

  wiki_span <- purrr::map2(
    urls,
    labels,
    create_span
  ) |> 
    purrr::discard(
      is.na
    ) |> 
    purrr::list_c() |> 
    stringr::str_flatten(
      collapse = " "
    )

  return(wiki_span)
}


file_name <- function(path) {

  # path <- "https://upload.wikimedia.org/wikipedia/commons/0/0e/Hochschule_f%C3%BCr_Oekonomie_%26_Management_2012_logo.svg"

  file_path <- path |> 
      fs::path_file()

  file_ext <- file_path |> 
    fs::path_ext()

  file_name <- file_path |> 
      fs::path_ext_remove() |> 
      fs::path_sanitize() |> 
      stringr::str_replace_all(
        pattern = "[[:punct:]]",
        replacement = "_"
      )

  file_name_full <- paste0(
    file_name,
    ".",
    file_ext
  )

  return(file_name_full)
}

img_path <- function(logo_url) {

  nas <- is.na(logo_url)

  logo_url_decoded <- URLdecode(logo_url)

  file <- logo_url_decoded |> 
    file_name()

  img_path <- fs::path(
    here::here(),
    file
  )

  img_path[nas] <- NA

  return(img_path)
}

build_logo_html <- function(logo_url, hochschule_url, height = university_logo_height, data_uri = NULL) {
  # https://www.w3schools.com/html/html_images.asp
  # https://www.smashingmagazine.com/2020/03/setting-height-width-images-important-again/
  # https://www.w3schools.com/tags/att_img_height.asp

  # stringi::stri_unescape_unicode(
  #   gsub("%", paste0("\\", "u00"), logo_url_written[16], fixed = T)
  # )
  # hochsch
  #       logo_url = logo_url,
  #     hochschule_url = url
  # URLdecode(logo_url_written[16])
  # 

  img_path <- if(
    is.na(logo_url)
  ) {
    img_path <- gt::test_image()
  } else {
    img_path <- img_path(logo_url)
  }

  hochschule_url_na <- ifelse(
    is.na(hochschule_url),
    "#0",
    hochschule_url
  )

  file_ext <- tolower(
    dplyr::coalesce(
      fs::path_ext(img_path),
      NA_character_
    )
  )

  if(!is.na(data_uri)) {
    img_tag <- htmltools::a(
      href = hochschule_url_na,
      target = "_blank",
      htmltools::img(
        src = data_uri
        ,style = glue::glue(
          "height:{university_logo_height}px;border:0px none;"
        )
      )
    ) 

    img_tag_gt <- img_tag |> 
      as.character() |> 
      gt::html()

    return(img_tag_gt)
  }

  if(file_ext == "svg") {
    img_tag <- htmltools::a(
      href = hochschule_url_na,
      target = "_blank",
      htmltools::img(
        src = logo_url
      ),
      style = glue::glue(
          "height:{university_logo_height}px;border:0px none;"
       )
    )

    img_tag_gt <- img_tag |> 
      as.character() |> 
      gt::html()

    return(img_tag_gt)
  }

  img_uri <- gt:::get_image_uri(
    file = img_path
  )

  img_tag <- htmltools::a(
    href = hochschule_url_na,
    target = "_blank",
    htmltools::img(
      src = img_uri
      ,style = glue::glue(
          "height:{university_logo_height}px;border:0px none;"
       )
    )
  )

  img_tag_gt <- img_tag |> 
    as.character() |> 
    gt::html()

  return(img_tag_gt)

  # img_tag <- gt::local_image(
  #   filename = img_path,
  #   height = height
  # )
  # 
  # pattern_backslash <- stringr::regex(
  #   '(?<=\\()(.*?)(?=\\))'
  # )
  # 
  # stringr::str_remove_all(
  #   gt::local_image(gt::test_image(type="svg")),
  #   stringr::fixed(
  #     r'(\\)'
  #   )
  # )
  # 

  # 
  # htmltools::a(
  #   href = "wwww.test.ca",
  #  # href = hochschule_url_na,
  #   target = "_blank",
  #   gt::local_image(gt::test_image(type="svg"))
  # )
  # 
  # test <- gt::local_image(gt::test_image(type="svg")) |> htmltools::as.tags()
  # 
  # test$style
  # 
  # img_tag_wrangled <- img_tag |> 
  #   stringr::str_replace(
  #      pattern = glue::glue(
  #        'style=\\"height:{height}px;\\">'
  #       ),
  #      replacement = glue::glue(
  #        'style=\\"height:{height}px;border:0px none;\\"</a>'
  #      )
  #   )
  # 
  # html <- glue::glue(
  #   '<a href="{hochschule_url_na}" target="_blank"> {img_tag_wrangled}'
  # )

 # return(html)
}

write_img <- function(logo_url, height = university_logo_height) {

  if(is.na(logo_url)) {return(fs::path(NA))}

  img_path <- img_path(logo_url)
  img_ext <- tolower(
    fs::path_ext(
      img_path
    )
  )

  if(img_ext == "svg") {
    download.file(
      url = URLdecode(logo_url),
      destfile = img_path,
      quiet = TRUE,
      cacheOK = TRUE,
      method = "libcurl"
    )

    return(img_path)
  }

  if(img_ext == "svg") {
    img <- magick::image_read_svg(
      logo_url
    )
  } else {
    img <- magick::image_read(
      logo_url
    )
  }

  img_trimmed <- magick::image_trim(
    img
  )

  img_resized <- magick::image_resize(
    img_trimmed,
    paste0(
      "x",
      height,
      ">"
    )
  )

  magick::image_write(
    img_resized,
    img_path
  )

  return(img_path)
}
logo_url <- hochschulen[c(1:max_rows),] |>
    col_to_url(
      dplyr::everything()
    ) |>
    dplyr::mutate(
      logo_url = dplyr::case_when(
        tolower(
          fs::path_ext(
            Wiki_Logo_DE_URL
          )
        ) == "svg" &
        # Uni Jena == 11615
        Hochschulart_Hochschule_Signatur != "11615" ~ Wiki_Logo_DE_URL,
        .default = Wiki_Logo_DE_Thumbnail_URL
      )
    ) |> 
    dplyr::pull(
      logo_url
   )

# Using purrr shouldn't be necessary because Magick functions are vectorized, but I kept getting errors
# purrr::map_chr(logo_url, write_img)

write_img_safely <- purrr::safely(
  write_img
)

#debugonce(write_img)

list_logo_url_written <- purrr::map(
  #"https://upload.wikimedia.org/wikipedia/commons/e/e5/Ebc-hochschule-logo.jpg",
  logo_url,
  write_img_safely
  #logo_url[540],
  #write_img
)

logo_url_written <- list_logo_url_written |> 
  purrr::map(
    "result"
  ) |> 
  purrr::list_c()
id_function <- function(label, ...) {
  dots <- rlang::list2(...)

  nas <- purrr::map_chr(
    dots,
    "org_id_label"
  ) |> 
    is.na()

  # If all IDs are NA return just the label
  if(
    length(
      dots[!nas]
    ) == 0
  ) {
    return(label)
  }

  create_table_row <- function(
    list, 
    a_style = "color:#008B8B;text-decoration:underline;text-underline-position: under;display:inline-block;"
  ) {
    org_url <- list$org_url
    org_label <- list$org_label
    org_id_url <- list$org_id_url
    org_id_label <- list$org_id_label

    # Inherits from div.rt-td-inner min-width: 100px; width: 100px;

    html <- htmltools::tags$tr(
      style = "min-width:200px",
      htmltools::tags$td(
        htmltools::tags$a(
          href = org_url,
          style = a_style,
          target = "_blank",
          org_label
        )
      ),
      htmltools::tags$td(
        htmltools::tags$a(
          href = org_id_url,
          style = a_style,
          target = "_blank",
          org_id_label
        )
      )
    )
  }

  list_table_rows <- purrr::map(
    dots[!nas],
    create_table_row
  )

  table_out <- htmltools::tags$details(
    htmltools::tags$summary(
      label
    ),
    htmltools::tags$table(
      list_table_rows
    )
  ) |> 
  as.character() |> 
  gt::html()

  return(table_out)
}
rankings_html <- function(...) {

  dots <- rlang::list2(...)
  dots_not_na <- !is.na(dots)

  labels <- purrr::map_chr(
    dots[dots_not_na],
    label_ranking
  )

  # Could do this in css
  # https://css-tricks.com/how-to-add-commas-between-a-list-of-items-dynamically-with-css/
  # https://blog.union.io/code/2018/03/07/add-commas-with-css/
  labels_with_comma <- dplyr::case_when(
    labels != labels[length(labels)] ~ paste0(
      labels,
      ","
    ),
    .default = labels
  )

  create_span <- function(
    url, 
    url_label,
    a_style = "color:#008B8B;text-decoration:underline;text-underline-position: under;"
  ) {

     html <- htmltools::span(
      #style = "white-space: pre;",
      htmltools::a(
        {url_label},
        href = url,
        target = "_blank",
        style = a_style
      )
    )

     return(html)
  }

  list_spans <- purrr::map2(
    dots[dots_not_na],
    labels_with_comma,
    create_span
  ) |> 
    htmltools::tagList()

  spans_out <- list_spans |> 
    as.character() |> 
    gt::html()

  return(spans_out)
}

#debugonce(label_socal_media)
#debugonce(rankings_html)

# tibble::tribble(
#   ~umultirank, ~the,
#   "https://www.umultirank.org/study-at/university-of-kassel-rankings/", "https://www.timeshighereducation.com/world-university-rankings/university-kassel"
# ) |>
#   dplyr::rowwise() |>
#   dplyr::mutate(
#     rankings = rankings_html(
#         umultirank,
#         the
#     )
#   ) |>
#   dplyr::ungroup() |>
#   dplyr::mutate(
#     rankings = purrr::map(
#       rankings,
#       gt::html
#     )
#   ) |>
#  gt::gt()
hochschulen[c(1:max_rows),] |> 
  dplyr::mutate(
    gerit_id = gepris_id,
    zeit_che_university = `Hs-Nr.`,
    CHE_DAAD_University_ID = as.character(
      CHE_DAAD_University_ID
    ),
    CHE_DAAD_University_ID_orig = CHE_DAAD_University_ID,
    zeit_che_university_orig = zeit_che_university,
    gepris_id_orig = gepris_id,
    gerit_id_orig = gerit_id,
    eu_pic_orig = eu_pic,
    hs_nr_orig = `Hs-Nr.`, #`[[`(hochschulen, "Hs-Nr."),
    ror_id_orig = ROR_ID,
    risis_orgreg_entity_id_orig = RISIS_ORGREG_ENTITY_ID,
    studieren_id_orig = STUDIEREN_ID,
    whed_id_orig = WHED_ID,
    logo = fs::path_file(
      img_path(
        Wiki_Logo_DE_URL
      )
    )
  ) |> 
   dplyr::left_join(
    hochschulen_logos,
    by = dplyr::join_by(
      logo
    ),
    na_matches = "never"
  ) |> 
  dplyr::select(
    -logo
  ) |> 
  col_to_url(
    dplyr::everything()
  ) |> 
  dplyr::mutate(
    DAAD_ORT_LABEL = dplyr::if_else(
      is.na(DAAD_ORT),
      NA,
      Ort
    ),
    CHE_DAAD_Town_ID_Label = dplyr::if_else(
      is.na(CHE_DAAD_Town_ID),
      NA,
      Ort
    ),
    STUDIEREN_HS_LABEL = dplyr::if_else(
      is.na(STUDIEREN_ID),
      NA,
      studieren_id_orig
    ),
    CHE_DAAD_HS_LABEL = dplyr::if_else(
      is.na(CHE_DAAD_University_ID),
      NA,
      CHE_DAAD_University_ID_orig
    ),
    ZEIT_CHE_HS_LABEL = dplyr::if_else(
      is.na(zeit_che_university),
      NA,
      zeit_che_university_orig
    ),
    logo_url = logo_url_written
  ) |> 
  dplyr::rowwise() |>
  dplyr::mutate(
    IDs = list(
      id_function(
        label = Hochschulart_Hochschule_Signatur,
        list(
          org_url = org_urls[["eu_pic"]],
          org_label = 'EU PIC',
          org_id_url = eu_pic,
          org_id_label = eu_pic_orig
        ),
        list(
          org_url = org_urls[["gepris"]],
          org_label = 'GEPRIS',
          org_id_url = gepris_id,
          org_id_label = gepris_id_orig
        ),
        list(
          org_url = org_urls[["gerit"]],
          org_label = 'GERiT',
          org_id_url = gerit_id,
          org_id_label = gerit_id_orig
        ),
        list(
          org_url = org_urls[["hrk"]],
          org_label = 'HRK',
          org_id_url = `Hs-Nr.`,
          org_id_label = hs_nr_orig
        ),
        list(
          org_url = org_urls[["risis"]],
          org_label = 'RISIS',
          org_id_url = RISIS_ORGREG_ENTITY_ID,
          org_id_label = risis_orgreg_entity_id_orig
        ),
        list(
          org_url = org_urls[["ror"]],
          org_label = 'ROR',
          org_id_url = ROR_ID,
          org_id_label = ror_id_orig
        ),
        list(
          org_url = org_urls[["whed"]],
          org_label = 'WHED',
          org_id_url = WHED_ID,
          org_id_label = whed_id_orig
        )
      )
    ),
    Ort = list(
      id_function(
        label = Ort,
        list(
          org_url = ort_urls[["daad_cities"]],
          org_label = 'DAAD',
          org_id_url = DAAD_ORT,
          org_id_label = DAAD_ORT_LABEL
        ),
        list(
          org_url = ort_urls[["daad_che_cities"]],
          org_label = 'DAAD CHE',
          org_id_url = CHE_DAAD_Town_ID,
          org_id_label = CHE_DAAD_Town_ID_Label
        )
      )
    ),
    Hochschule_EN = list(
      id_function(
        label = Hochschule_EN,
        list(
          org_url = university_urls[["daad_che"]],
          org_label = 'DAAD CHE',
          org_id_url = CHE_DAAD_University_ID,
          org_id_label = CHE_DAAD_HS_LABEL
        ),
        list(
          org_url = university_urls[["studieren_de"]],
          org_label = 'studieren.de',
          org_id_url = STUDIEREN_ID,
          org_id_label = STUDIEREN_HS_LABEL
        ),
        list(
          org_url = university_urls[["zeit_che"]],
          org_label = 'ZEIT CHE',
          org_id_url = zeit_che_university,
          org_id_label = ZEIT_CHE_HS_LABEL
        )
      )
    ),
    Wiki = build_wiki_span(
      urls = c(
        Wiki_ID_DE,
        Wiki_ID_EN,
        Wikidata_ID
      ),
      labels = c(
        'de',
        'en',
        'data'
      )
    ),
    Logo = build_logo_html(
      logo_url = logo_url,
      hochschule_url = url,
      data_uri = logo_data_uri
    ),
    social_media = list(
      social_media_icons(
        "fab fa-facebook" = facebook_id, 
        "fab fa-instagram" = insta_id,
        "fab fa-linkedin" = linkedin,
        "fab fa-mastodon" = mastodon_id,
        "fab fa-tiktok" = tiktok,
        "fab fa-x-twitter" = x_user,
        "fab fa-youtube" = youtube_channel_id
      )
    ),
    Rankings = list(
      rankings_html(
        arwu_id,
        CHE_DAAD_University_ID,
        Multirank,
        the_id,
        qs_world_id
      )
    )
  ) |>
  dplyr::ungroup() |> 
  dplyr::mutate(
    Logo = purrr::map(
      Logo,
      gt::html
    ),
    Wiki = purrr::map(
      Wiki,
      gt::html
    ),
    Rankings = purrr::map(
      Rankings,
      gt::html
    )
  ) |> 
  dplyr::select(
    Logo,
    IDs,
    Ort,
    Hochschule_EN,
    Wiki,
    Rankings,
    social_media
  ) |> 
  gt::gt(
    id = "hochschulen"
    # rowname_col = "Hochschule_Signatur",
    # rownames_to_stub  = TRUE
  ) |>
  gt::tab_header(
    title = title
    #,subtitle = subtitle
  ) |> 
  gt::tab_source_note(
    source_note = gt::html(
      source_note
    )
  ) |> 
  gt::cols_label(
    Hochschule_EN = "Hochschule",
    social_media = "Social Media"
  ) |> 
  gt::cols_align(
    align = "left",
    columns = dplyr::everything()
  ) |> 
  gt::cols_align(
    align = "right",
    columns = Logo
  ) |>
  gt::cols_width(
    Logo ~ gt::px(
      325L
    ),
    IDs ~ gt::px(
      150L
    ),
    Hochschule_EN ~ gt::px(
      250L
    )
  ) |> 
  gt::opt_interactive(
    #use_search = TRUE, # not really useful with so many columns having multiple values
    use_highlight = TRUE,
    use_page_size_select = TRUE,
    page_size_default = 50
  ) |> 
  gt::tab_style(
    style = gt::cell_text(
      align = "left"
    ),
    locations = gt::cells_body(
      columns = c(
        IDs, Ort, Hochschule_EN
      )
    )
  ) |> 
  gt::tab_options(
    container.width = gt::px(table_width),
    table.width = gt::px(table_width)
  ) |> 
  gt::opt_css(
    css = "
    #hochschulen rt-td{
      flex-wrap:wrap;
      align-items:center;
    }
    #hochschulen .rt-align-left{
      align-items:center;
    }
    #hochschulen .rt-align-right{
      align-items:center;
    }
    "
  )
# <style type="text/css">
# .container template-article{
#   max-width: 1500px !important;
# }
# </style>

Data source

The data is sourced from the following tables and lists:

Identifiers

If available, the following identifiers for each institution are included:

City

If available, the following websites based on the institution's city are linked:

Institutions of higher education

If available, the following websites providing further details on each instituion of higher eduation and their programs are linked:

Rankings

If available, the following rankings are linked:

Social Media

If available, these social media sites are linked:

APIs

The following APIs were helpful in creating the data above:

A rant about the Higher Education Compass / Hochschulkompass and data licences

One obvious addition to the ?hochschulen dataset would be a link to the Higher Education Compass / Hochschulkompass by the HRK. Unfortunately, the compass website uses a lot of obfuscating Javascript to make direct links very difficult. Now, the HRK is a semi-public institution funded by the Stiftung zur Förderung der Hochschulrektorenkonferenz. This foundation mainly raises its funds through grants from the Länder and from the Federal Ministry of Education and Research. Moreover, the mostly public members (around 271 institutions of higher education) of the HRK pay annual membership fees.

According to the website, "all information found in the Higher Education Compass is authorized by the universities and is updated by employees at the universities themselves". Against this backdrop, it is incomprehensible that the HRK deems it worthwhile to protect the Higher Education Compass data. Use of this data by the public and/or private competitors to provide better offerings with additional data or different UI should be encouraged, not made more difficult! After all, this is non-confidential data on public institutions largely paid for by taxpayers' money, administered by staff mostly in the public sector...

The ?hochschulen dataset consciously consists almost exclusively of identifiers and links, so as to avoid data licencing issues. Particularly the private providers, like ZEIT or studieren.de, naturally have a vested business interest to protect their data and to discourage direct links as best they can.

About this project

Like most good projects, this one started out of a sense of annoyance. In particular, I looked at the Destatis list of 427 institutions of higher education and realized that they did NOT provide a detailed breakdown of the members institutions in each category. For the time being, only the Länder statistical agencies provide this level of detail. Moreover, I noticed that the list of all institutions of higher education in Germany provided by the German Rector's Conference (HRK) did not use the instituional identifiers used by the Länder statistical agencies (as one would expect). Nor did their numbers match up with Destatis. I will admit, writing these two slights down, I may come off as easily annoyed - but I can assure you, these sort of things can be heart-wrenching, really!

Annoyed as I was, I started to dig, and dig, and dig a little deeper still. I discovered along the way that trying to count institutions of higher education was by no means a particularly innovative endeavor - after all, ETER does it at the European level, WHED does it for the global level, GERiT and ROR do it for research institutions (slightly different emphasis). Had I known about these initiatives from the start - rather than stumbling upon them during the process - I would have avoided a lot of work. Alas, the proof of the pudding is in the eating...

Aims of this project

A lot of the data assembled in the ?hochschulen dataset will find its more permanent home on Wikidata - another great resource I have only come to appreciate as this project has progressed. For the time being, the idea is that this data set can save other people time matching up different data sources and IDs. People who do work on rankings, or people who work in communication departments come to mind, but there are certainly lots of other applications.

To Dos

fs::file_delete(
  unique(logo_url_written[!is.na(logo_url_written)])
)


RichardMeyer-Eppler/studentenstatistikNRW documentation built on July 27, 2024, 3:14 a.m.