create_data/2.get_ESCO_occupations.R

library(jsonlite)
library(tidyverse)

get_occupation <-function(result) {

  occupation_en <- tryCatch(result$preferredLabel$en,
                             error = function(e) {
                               occupation_en <- NULL
                             })
  occupation_it <- tryCatch(result$preferredLabel$it,
                             error = function(e) {
                               occupation_it <- NULL
                             })

  return(c(occupation_en, occupation_it))
}

get_occupation_uri <-function(result) {

  occupation_uri <- tryCatch(result$uri,
                             error = function(e) {
                               occupation_uri <- NULL
                             })

  return(occupation_uri)
}

get_isco_code <-function(result) {

  isco_code <- tryCatch(result$code,
                             error = function(e) {
                               isco_code <- NULL
                             })
  return(isco_code)
}

get_essential_skills <-function(result) {

  hasEssentialSkill <- tryCatch(result$`_links`$hasEssentialSkill %>%
                                 select(uri, title),
                               error = function(e) {
                                 hasEssentialSkill <- tibble(.rows = NULL)
                               })

  return(hasEssentialSkill)
}

get_optional_skills <-function(result) {

  hasOptionalSkill <- tryCatch(result$`_links`$hasOptionalSkill %>%
                                 select(uri, title),
                               error = function(e) {
                                 hasOptionalSkill <- tibble(.rows = NULL)
                               })

  return(hasOptionalSkill)
}

get_occupations_from_response <- function(response) {

  results <- response$`_embedded`

  N <- length(results)

  ESCO_occupations <- tibble(.rows = NULL)

  for (i in 1:N){

    # Slicing the row
    result <- results[[i]]
    # Getting the occupation title
    occupation <- get_occupation(result)
    # Getting the occupation URI
    occupation_uri <- get_occupation_uri(result)
    # Getting the ISCO code
    isco_code <- get_isco_code(result)
    # Geting essential skills
    essential_skills <- get_essential_skills(result)
    if (nrow(essential_skills) > 0) {
      essential_skills <- essential_skills %>%
        mutate(is_essential = TRUE) %>%
        rename(skill = 'title')
    }
    # Getting optional skills (if any)
    optional_skills <- get_optional_skills(result)
    if (nrow(optional_skills) > 0) {
      optional_skills <- optional_skills %>%
        mutate(is_essential = FALSE) %>%
        rename(skill = 'title')
    }

    # Building data frame
    occupation_df <- bind_rows(essential_skills, optional_skills) %>%
      mutate(occupation_en = occupation[1],
             occupation_it = occupation[2],
             occupation_uri = occupation_uri,
             isco_code = isco_code) %>%
      # Grouping variables
      group_by(occupation_en, occupation_it, isco_code, occupation_uri) %>%
      # Nesting the skills into a tibble
      nest()

    ESCO_occupations <- bind_rows(ESCO_occupations, occupation_df)
  }

  return(ESCO_occupations)
}


get_ESCO_occupations <- function() {

  occupations_scheme <- "http://data.europa.eu/esco/concept-scheme/occupations"
  skills_scheme <- "http://data.europa.eu/esco/concept-scheme/skills"
  isco_scheme <- "http://data.europa.eu/esco/concept-scheme/isco"

  language <- "it"

  api_url <- "https://ec.europa.eu/esco/api"
  taxonomy_endpoint <- "/resource/taxonomy"
  occupation_endpoint <- "/resource/occupation"

  url <- paste0(api_url, occupation_endpoint,
                "?",
                "isInScheme=",
                occupations_scheme,
                "&language=",
                language)

  response <- fromJSON(txt=url(url))

  total_occupations <- response$total

  ESCO_occupations <- tibble(.rows = NULL)

  ESCO_occupations_from_response <- get_occupations_from_response(response)

  ESCO_occupations <- bind_rows(ESCO_occupations, ESCO_occupations_from_response)

  print(paste0("Queried ",
               nrow(ESCO_occupations),
               " of ",
               total_occupations,
               " occupations"))

  next_url <- response$`_links`$`next`$href

  while(!is.null(next_url)) {

    response <- fromJSON(txt=url(next_url))

    ESCO_occupations_from_response <- get_occupations_from_response(response)

    ESCO_occupations <- bind_rows(ESCO_occupations, ESCO_occupations_from_response)

    next_url <- response$`_links`$`next`$href

    print(paste0("Queried ",
                 nrow(ESCO_occupations),
                 " of ",
                 total_occupations,
                 " occupations"))

  }

  return(ESCO_occupations)
}

ESCO_occupations <- get_ESCO_occupations()

saveRDS(ESCO_occupations, file = "temp_data/ESCO_occupations.rds")
ldbolanos/standards documentation built on Aug. 7, 2020, 8:13 p.m.