# A macro to save the O*NET taxonomy as a data.tree within the package
# https://www.onetcenter.org/database.html#individual-files
# https://www.onetcenter.org/dictionary/26.2/excel/content_model_reference.html
library(janitor)
library(tidyverse)
library(readxl)
library(data.tree)
data <- read_excel(path = "scratch/ONET/Content Model Reference.xlsx")
# Clean up column names and choose the codes and the names
data <- data %>%
clean_names() %>%
select(element_id, element_name)
# Check for repeated labels that could scupper the algorithm
ggplot(data = data, mapping = aes(x = element_name)) + geom_bar() + coord_flip()
# Need to check for duplicated labels
duplicates <- data %>%
group_by(element_name) %>%
summarize(n=n()) %>%
filter(n>1)
# Where there is a duplicated name, append the ID to make it unique
# (All IDs are unique)
data <- data %>% left_join(duplicates, by = "element_name")
data$element_name <- ifelse(
is.na(data$n),
data$element_name,
paste0(data$element_name, " (", data$element_id, ")")
)
# Clean up
remove(duplicates)
data <- data %>% select(element_id, element_name)
# Create a list of character vectors for each element_id
# e.g. "5.D.1" becomes "5" "D" "1"
mother_id <- strsplit(data$element_id, split = "\\.")
# Loop over the list (hard to get the pasting right without a loop)
for (i in 1:length(mother_id)) {
# remove the last lement of the vector
# e.g. "5" "D" "1" becomes "5" "D"
mother_id[[i]] <- mother_id[[i]] %>% head(-1)
# stick them back together to make mother codes, e.g. "5.D"
mother_id[[i]] <- paste(mother_id[[i]], collapse = ".")
}
# Make a mother data frame to join the children to via mother_id
mothers <- data %>%
rename(
mother_id = element_id,
mother_name = element_name
)
# Add a row for the ultimate mother
mothers <- rbind(
data.frame(mother_id = "0", mother_name = "O*NET"),
mothers
)
# Add a mother_id column to the original data frame
data$mother_id <- as.vector(as.character(mother_id))
# If there is no mother, the mother_id is 0
data$mother_id[mother_id == ""] <- "0"
# Clean up objects we no longer need
remove(i)
remove(mother_id)
# Create an edge list by joining the mothers to the daughters
edges <- left_join(mothers, data, by = "mother_id") %>%
rename(
child_id = element_id,
child_name = element_name,
)
# Put edges in the right format for data.tree
edges <- edges %>%
select(mother_name, child_name) %>%
drop_na()
# Make the data.tree
onet.skills <- FromDataFrameNetwork(edges)
# See the data.tree in all its glory
print(onet.skills)
# Save the data.tree so it can be accessed by via library(skillr)
save(onet.skills, file = "data/onet.skills.RData", version = 2)
# Clean up
remove(onet.skills)
remove(data)
remove(edges)
remove(mothers)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.