library(jsonlite)
library(tidyverse)
library(parallel)
library(httr)
library(textyr)
ONET_careers <- tibble(.rows = NULL)
next_url <- "https://services.onetcenter.org/ws/mnm/careers/"
tot_careers <- 1000000
while (nrow(ONET_careers) < tot_careers) {
r <- GET(next_url,
add_headers(Authorization = "Basic dGV4dHlfYml6Ojg5NDlyems=",
Accept = "application/json"))
response_careers <- content(r, "parsed", type = "application/json")
for (link in response_careers$link) {
if (link$rel == "next") {
next_url <- link$href
}
}
# For each career
for (career in response_careers$career) {
# print(career$title)
career_code <- career$code
url_occupation <- paste0("https://services.onetcenter.org/ws/online/occupations/",
career_code,
"/")
r <- GET(url_occupation,
add_headers(Authorization = "Basic dGV4dHlfYml6Ojg5NDlyems=",
Accept = "application/json"))
response_occupation <- content(r, "parsed", type = "application/json")
# For each resource retrieve data
technology_skills <- c()
skills <- c()
abilities <- c()
knowledge <- c()
tools <- c()
tools_technology <- c()
for (resource in response_occupation$summary_resources$resource) {
# Avoid retrieving other resources
if (resource$title %in% c("Technology Skills",
"Skills",
"Abilities",
"Knowledge",
"Tools Used",
"Tools & Technology")) {
# Getting the resource URL
url_summary_resource <- resource$href
# Making request
r <- GET(url_summary_resource,
add_headers(Authorization = "Basic dGV4dHlfYml6Ojg5NDlyems=",
Accept = "application/json"))
# Parsing response
response_summary_resource <- content(r, "parsed", type = "application/json")
# Retrieving data
if (resource$title == "Technology Skills") {
for (skill in response_summary_resource$category) {
technology_skills <- c(technology_skills, skill$title$name)
}
} else if (resource$title == "Skills") {
for (skill in response_summary_resource$element) {
skills <- c(skills, skill$name)
}
} else if (resource$title == "Abilities") {
for (skill in response_summary_resource$element) {
abilities <- c(abilities, skill$name)
}
} else if (resource$title == "Knowledge") {
for (skill in response_summary_resource$element) {
knowledge <- c(knowledge, skill$name)
}
} else if (resource$title == "Tools Used") {
for (skill in response_summary_resource$category) {
tools <- c(tools, skill$title$name)
}
} else if (resource$title == "Tools & Technology") {
for (skill in response_summary_resource$tools) {
tools_technology <- c(tools_technology, skill$title$name)
}
for (skill in response_summary_resource$technology) {
tools_technology <- c(tools_technology, skill$title$name)
}
}
}
}
# Trick to achieve one row per career
technology_skills <- list(technology_skills)
skills <- list(skills)
abilities <- list(abilities)
knowledge <- list(knowledge)
tools <- list(tools)
tools_technology <- list(tools_technology)
# Creating record
career_tibble <- tibble(technology_skills,
skills,
abilities,
knowledge,
tools) %>%
mutate(occupation = career$title,
soc_code = career_code,
href = career$href,
bright_outlook = career$tags$bright_outlook,
green = career$tags$green,
apprenticeship = career$tags$apprenticeship) %>%
group_by(occupation, soc_code, href, bright_outlook, green, apprenticeship) %>%
nest()
# Appending to the final tibble
ONET_careers <- bind_rows(ONET_careers, career_tibble)
}
tot_careers <- response_careers$total
print(paste0("Queried ",
nrow(ONET_careers),
" of ",
tot_careers,
" careers..."))
}
ONET_careers <- ONET_careers[1:974, ]
saveRDS(ONET_careers, file = "create_data/ONET_DB_raw.rds")
# Reformating data like ESCO_DB
ONET_DB_raw <- readRDS("create_data/ONET_DB_raw.rds") %>%
unnest(cols = c("data")) %>%
replace_na(list(skills = "None", abilities = "None", knowledge = "None"))
skills_concat_per_row <- tibble(.rows = NULL)
technology_concat_per_row <- tibble(.rows = NULL)
# Calculate the number of cores
no_cores <- detectCores() - 1
# Initiate cluster
cl <- makeCluster(no_cores)
for (i in 1:nrow(ONET_DB_raw)) {
# Concatenating all columns
skill <- c()
technology <- c()
for (j in 7:ncol(ONET_DB_raw)) {
if (j == 7 | j == 11) {
N_digital = length(ONET_DB_raw[[i, j]][[1]]) # Number of digital skills
technology <- c(technology, ONET_DB_raw[[i, j]])
} else {
skill <- c(skill, ONET_DB_raw[[i, j]])
}
}
skill_tibble <- tibble(unlist(skill)) %>%
unique()
technology_tibble <- tibble(unlist(technology)) %>%
unique()
is_digital = rep(FALSE, nrow(skill_tibble))
# Applying soft skills tagger
# Tagging English skills
is_soft <- parLapply(cl,
skill_tibble[, 1],
text_tagger, type = "soft_skills") %>%
tibble() %>%
unnest(cols = c(.))
is_soft <- purrr::map(is_soft$entity, is_empty)
is_soft <- !(unlist(is_soft))
skill_tibble <- skill_tibble %>%
rename("skill"= 1) %>%
mutate(is_digital = is_digital) %>%
mutate(is_soft = is_soft) %>%
mutate(row = i)
technology_tibble <- technology_tibble %>%
rename("technology"= 1) %>%
mutate(row = i)
skills_concat_per_row <- bind_rows(skills_concat_per_row, skill_tibble)
technology_concat_per_row <- bind_rows(technology_concat_per_row, technology_tibble)
}
skills_concat_per_row <- skills_concat_per_row %>%
group_by(row) %>%
nest()
technology_concat_per_row <- technology_concat_per_row %>%
group_by(row) %>%
nest()
ONET_DB <- ONET_DB_raw %>%
mutate(skill = skills_concat_per_row$data,
technology = technology_concat_per_row$data) %>%
select(-(technology_skills:tools))
saveRDS(ONET_DB, file = "create_data/ONET_DB.rds")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.