library(tidyverse)
library(zoo)
library(viridis)
library(collapsibleTree)
library(R6)
library(readxl)
library(janitor)
library(svglite)
library(ggforce) #includes geom_circle (https://ggforce.data-imaginist.com/reference/geom_circle.html)
library(ggfittext)
library(ggrepel)
# ------------------------------------------------------------------------------
# Coursera skills taxonomy linked to the WEF taxonomy
cdata <- read.csv("coursera-data-feb/coursera_wef_taxonomy_map.csv")
paste("Of", length(cdata$wef_skill), "Coursera skills,", sum(cdata$wef_skill %>% is.na()), "have no Forum counterpart") %>% print()
# Arrange the data so it can be made a well-ordered dendrogram
cdata <- cdata %>%
filter(level == 2) %>%
select(domain_id, competency_id, skill_id) %>%
arrange_all()
# Print the interactive dendrogram
`Coursera Skills Taxonomy` <- cdata
p <- collapsibleTree(`Coursera Skills Taxonomy`, c("domain_id", "competency_id", "skill_id"))
p
# The following section is commented as the input data is incomplete -----------
# Make the diagram with names not IDs
#`Coursera Skills Taxonomy`$domain_name <- data$skill_name[match(`Coursera Skills Taxonomy`$domain_id , data$skill_id)]
#`Coursera Skills Taxonomy`$competency_name <- data$skill_name[match(`Coursera Skills Taxonomy`$competency_id, data$skill_id)]
#`Coursera Skills Taxonomy`$skill_name <- data$skill_name[match(`Coursera Skills Taxonomy`$skill_id , data$skill_id)]
# However quite a few competency names were not included in the "dictionary" (data)
# As a result they are NA here
# As a result tye dendrograme doesn't quite work right
# Have asked Eric Karsten about this
# Print the interactive dendrogram
#p <- collapsibleTree(`Coursera Skills Taxonomy`, c("domain_name", "competency_name", "skill_name"))
#p
# ------------------------------------------------------------------------------
# Add mappings from each of domains competencies and skills to Forum level-4 skills
# Propagate the domain mappings and the the competency mappings down to skills mappings
# Note that a single Coursera skill is often mapped by two Forum level-4 skills,
# as they may be mapped twice as e.g. domains and competencies
`Coursera Skills Taxonomy`$wef_skill1 <- data$wef_skill[match(`Coursera Skills Taxonomy`$domain_id , data$skill_id)]
`Coursera Skills Taxonomy`$wef_skill2 <- data$wef_skill[match(`Coursera Skills Taxonomy`$competency_id, data$skill_id)]
`Coursera Skills Taxonomy`$wef_skill3 <- data$wef_skill[match(`Coursera Skills Taxonomy`$skill_id , data$skill_id)]
# ------------------------------------------------------------------------------
# ------------------------------------------------------------------------------
# WEF skills taxonomy linked to the WEF taxonomy
forum_skills <- read_excel(path = "data/taxonomy-WEF.xlsx")
save(forum_skills, file = "data/forum_skills.RData")
# Fill downwards as the Excel file's merged cells were made NA except at the top
# Don't do this for level 4 as some are intentionally blank
fdata <- fdata %>%
select(L1, L2, L3, L4) %>%
fill(L1, L2, L3)
# Make them factors now we have done necessary naming fixes to Excel lapses
fdata <- fdata %>% transmute(
L1 = factor(L1),
L2 = factor(L2),
L3 = factor(L3),
L4 = factor(L4),
)
# Print the interactive dendrogram
`Forum Skills Taxonomy` <- fdata
p <- collapsibleTree(`Forum Skills Taxonomy`, c("L1", "L2", "L3", "L4"))
p
# ------------------------------------------------------------------------------
# Let's start by mapping Coursera competencies (L2) to Forum level-3 skills
# First we need a level-2-granularity data frame for Coursera
cdata2 <- cdata %>% rename(
L1 = domain_id,
L2 = competency_id,
L3 = skill_id
) %>% select(L1, L2) %>%
unique()
# Now make a level-3-granularity data frame for the Forum's taxonomy
fdata3 <- fdata %>%
select(L1, L2, L3) %>%
unique()
# As we are making a matrix to go with level-2 Coursera, add two empty rows
fdata3 <- bind_rows(data.frame(L1=c(NA,NA), L2=c(NA,NA), L3=c(NA,NA)), fdata3)
# Now transpose it, so we can build the redistribution matrix
fdata3T <- t(fdata3)
# Force the column names to be the same so we can bind the data frames to make the matrix structure
colnames(cdata2) <- colnames(fdata3T)
# Bind the labelling parts of the matrix
library(data.table)
cfmatrix <- rbindlist(list(as.data.table(fdata3T), cdata2), fill = TRUE)
# Output a csv file to fill with 1s and 0s to redistribute enrollment numbers
cfmatrix %>% write.csv("empty-matrix.csv", col.names = FALSE, row.names = FALSE)
# !!!!
# To do:
# - The enrollment data is mixed between level 1 and 2
# - There is some duplication, with categories and subcategories sometimes both reported
# - So you need to do a bespoke job; you can't just make a single level-X to level-Y aggregation matrix
# - Start with the data, and choose which to use when there is duplication
# - Manually sort what is available into level-3 categories
# - ...or level-4 where it seems more appropriate
# - ...while adding in sensible industry specialized categories where needed
# ------------------------------------------------------------------------------
# ------------------------------------------------------------------------------
# ------------------------------------------------------------------------------
# ------------------------------------------------------------------------------
# ------------------------------------------------------------------------------
# Experimentation with OO taxonomies
daughters = c("b", "c", "d", "e", "f", "g", "h")
mothers = c("a", "a", "b", "b", "c", "c", "c")
data <- data.frame(daughters, mothers)
Taxonomy <- R6Class(
"Taxonomy",
public = list(
initialize = function(name, data) {
private$name <- name
private$data <- data
},
GetName = function() {
return(private$name)
},
GetData = function() {
return(private$data)
}
),
private = list(
name = NULL,
data = NULL,
daughters = vector()
)
)
taxonomy <- Taxonomy$new("WEF", data)
Skill <- R6Class(
"Skill",
public = list(
initialize = function(name) {
private$name <- name
private$daughters <- Skill$new("Bob")
private$daughters <- Skill$new("Janet")
},
GetName = function() {
return(private$name)
},
GetDaughters = function() {
str(private$daughters)
return(private$daughters)
}
),
private = list(
name = NULL,
daughters = vector()
)
)
tax1 <- Skill$new("Alexia")
tax1$GetName()
z <- tax1$GetDaughters()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.