create_data/3.create_ESCO_DB.R

# Merging skills and occupations datasets
ESCO_skills <- readRDS("create_data/ESCO_skills.rds") %>%
  rename(skill = "skill_en") %>%
  select(-3)

# Applying technimetro
# Calculate the number of cores
no_cores <- detectCores() - 1
# Initiate cluster
cl <- makeCluster(no_cores)
# Tagging English skills
is_digital_skill_en <- parLapply(cl,
                                 ESCO_skills$skill,
                                 text_tagger) %>%
  tibble() %>%
  unnest(cols = c(.))
is_digital_skill_en <- purrr::map(is_digital_skill_en$entity, is_empty)
# Tagging Italian skills
is_digital_skill_it <- parLapply(cl,
                                 ESCO_skills$skill_it,
                                 text_tagger) %>%
  tibble() %>%
  unnest(cols = c(.))
is_digital_skill_it <- purrr::map(is_digital_skill_it$entity, is_empty)

is_digital_skill <- !(unlist(is_digital_skill_en) & unlist(is_digital_skill_it))

ESCO_skills <- ESCO_skills %>%
  mutate(is_digital = is_digital_skill)

# Applying soft skills tagger
# Calculate the number of cores
no_cores <- detectCores() - 1
# Initiate cluster
cl <- makeCluster(no_cores)
# Tagging English skills
is_soft <- parLapply(cl,
                     ESCO_skills$skill,
                     text_tagger, type = "soft_skills") %>%
  tibble() %>%
  unnest(cols = c(.))
is_soft <- purrr::map(is_soft$entity, is_empty)

is_soft <- !(unlist(is_soft))

ESCO_skills <- ESCO_skills %>%
  mutate(is_soft = is_soft)

ESCO_skills %>%
  filter(is_soft == TRUE) %>%
  View()

ESCO_occupations <- readRDS("create_data/ESCO_occupations.rds") %>%
  unnest(cols = c("data"))

# ESCO_DB <- merge(ESCO_occupations, ESCO_skills, by.x = c("skill"))

ESCO_DB <- left_join(ESCO_occupations, ESCO_skills, by = "skill") %>%
  group_by(occupation_en, occupation_it, occupation_uri, isco_code) %>%
  nest()

saveRDS(ESCO_DB, "create_data/ESCO_DB.rds")
ldbolanos/standards documentation built on Aug. 7, 2020, 8:13 p.m.