# This script uses ateco2esco table as a bridge between ATECO sectors and SOC codes.
# It will match ISCO codes with the equivalent SOC and then paste the corresponding ATECO sector
library(openxlsx)
library(tidyverse)
library(magrittr)
# Loading data
ateco2esco <- readRDS("create_data/ateco2esco_skills.rds") %>%
ungroup() %>%
unnest(cols = c(data)) %>%
ungroup() %>%
select(ateco_code, ateco_sector, ateco_sector_it, isco_code, confidence) %>%
mutate(isco_code = substr(isco_code, 1, 4)) %>%
group_by(ateco_sector,
ateco_sector_it,
ateco_code,
isco_code) %>%
mutate(confidence = max(confidence)) %>%
ungroup() %>%
unique()
ONET_DB <- readRDS("create_data/ONET_DB.rds") %>%
mutate(soc_code = substr(soc_code, 1, 7)) %>%
ungroup()
esco_onet_crosswalk <- read.xlsx("C://Users/Luis/Desktop/isco_soc_crosswalk.xlsx",
sheet = 2) %>%
select(1, 4) %>%
rename(soc_code = 1, isco_code = 2) %>%
.[-(1:5), ]
# Joining tables to enrich information
# Joining ONET with the ISCO codes
temp <- inner_join(ONET_DB, esco_onet_crosswalk, by = c("soc_code"))
# Joining previous table with the ATECO codes
soc_ateco_crosswalk <- inner_join(temp, ateco2esco, by = c("isco_code")) %>%
select(soc_code,
isco_code,
ateco_code,
ateco_sector,
ateco_sector_it,
confidence) %>%
# Removing duplicates because of how inner_join works here
unique()
# Helper function
soc2ateco <- function(soc_code) {
row <- match(soc_code, soc_ateco_crosswalk$soc_code)
if (!is.na(row)) {
ateco_code <- soc_ateco_crosswalk$ateco_code[[row]]
} else {
ateco_code <- NULL
}
return(ateco_code)
}
soc2ateco_confidence <- function(soc_code) {
row <- match(soc_code, soc_ateco_crosswalk$soc_code)
if (!is.na(row)) {
ateco_code_confidence <- soc_ateco_crosswalk$confidence[[row]]
} else {
ateco_code_confidence <- NULL
}
return(ateco_code_confidence)
}
ateco_code2name <- function(ateco_code) {
if (length(ateco_code) > 0) {
row <- match(ateco_code, ateco2esco$ateco_code)
if (!is.na(row)) {
ateco_sector <- ateco2esco$ateco_sector[[row]]
} else {
ateco_sector <- "Unknown"
}
return(ateco_sector)
} else {
return("Unknown")
}
}
ateco_code2name_it <- function(ateco_code) {
if (length(ateco_code) > 0) {
row <- match(ateco_code, ateco2esco$ateco_code)
if (!is.na(row)) {
ateco_sector <- ateco2esco$ateco_sector_it[[row]]
} else {
ateco_sector <- "Unknown"
}
return(ateco_sector)
} else {
return("Unknown")
}
}
# Mapping ATECO codes from SOC codes
ateco_code <- purrr::map(ONET_DB$soc_code, soc2ateco)
ateco_code_confidence <- purrr::map(ONET_DB$soc_code, soc2ateco_confidence)
ateco_sector <- vector("character", length(ateco_code))
ateco_sector_it <- vector("character", length(ateco_code))
for (i in 1:length(ateco_code)) {
ateco_sector[i] <- ateco_code2name(ateco_code[[i]])
ateco_sector_it[i] <- ateco_code2name_it(ateco_code[[i]])
}
# Formatting data
ateco2onet_skills <- readRDS("create_data/ONET_DB.rds") %>%
mutate(ateco_code = ateco_code,
ateco_sector = ateco_sector,
ateco_sector_it = ateco_sector,
confidence = ateco_code_confidence) %>%
filter(ateco_sector != "Unknown") %>%
# Removing technologies
select(-technology) %>%
# Unnesting skills
unnest(cols = c("skill")) %>%
ungroup()
# Setting correct type to be able to sort
ateco2onet_skills$ateco_code %<>% as.character
# Sorting and grouping
ateco2onet_skills <- ateco2onet_skills %>%
arrange(ateco_code) %>%
nest(-ateco_code)
ateco2onet_technologies <- readRDS("create_data/ONET_DB.rds") %>%
mutate(ateco_code = ateco_code,
ateco_sector = ateco_sector,
ateco_sector_it = ateco_sector,
confidence = ateco_code_confidence) %>%
filter(ateco_sector != "Unknown") %>%
# Removing technologies
select(-skill) %>%
# Unnesting skills
unnest(cols = c("technology")) %>%
ungroup()
ateco2onet_technologies$ateco_code %<>% as.character
ateco2onet_technologies <- ateco2onet_technologies %>%
arrange(ateco_code) %>%
nest(-ateco_code)
# Saving final ONET skills table with ATECO codes
saveRDS(ateco2onet_skills, file = "create_data/ateco2onet_skills.rds")
saveRDS(ateco2onet_technologies, file = "create_data/ateco2onet_technologies.rds")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.