# (C) Philippe J.S. De Brouwer -- 2021 library(div) library(tidyverse) library(flexdashboard) library(knitr) library(plotly) library(gridExtra) library(kableExtra) library(grid) library(lattice) # The fake team: teamSize <- 400 pctFemales <- 0.35 maleSalaryBias <- 1.035 d <- div_fake_team(seed = 18901228, N = teamSize, genders = c("F", "M"), gender_prob = c(pctFemales, 1 - pctFemales), gender_salaryBias = c(1, maleSalaryBias) ) # Gender diversity tibble per grade: tbl_gender_div <- table(d$gender, d$grade) %>% apply(2, diversity, prior = c(50.2, 49.8)) %>% tibble(value = ., label = paste("Grade", names(.))) # Average gender diversity tibble: tbl_gender_div_all <- table(d$gender) %>% as.vector %>% diversity(prior = c(50.2, 49.8)) %>% tibble(value = ., label = "Gender Diversity") # Age diversity tibble: ageRef <- c(15.94, 27.11, 23.99, 20.68, 12.28) / 100 ad <- table(cut(d$age, breaks = c(20, 30, 40, 50, 60, 65))) ad <- ad / sum(ad) # not needed, diversity() does this, bu useful for the table tbl_age_div <- tibble(value = diversity(ad, prior = ageRef), label = "Age Diversity") # Gender PayGap tibble: pg_gender <- div_paygap(d) # Citizenship PayGap tibble pg_citizenship <- div_paygap(d, x = "citizenship" , x_ctrl = "Polish") # Age: pg_age <- div_paygap(d, x = "age") # tenure in the company: pg_tenure_firm <- div_paygap(d, x = "tenure_firm")
The data used for this report is randomly generated with the following characteristics:
r teamSize
r pctFemales
r maleSalaryBias
{width=100%}
f1 <- tibble(Nbr = 1, Area = "Gender", Finding = "Where we can calculate the paygap between females and non-females, we find that the females generally earn less in similar roles and similar grades.", Suggestion = "Check the gender-paygap table and identify the grade/role combinations where an the paygap has most stars. Check if the salary differences are justified.") f2 <- tibble(Nbr = 2, Area = "Age", Finding = "The team is predominantly younger than the surrounding population (Poland).", Suggestion = "Consider hiring older people to balance. Focus on retention.") f3 <- tibble(Nbr = 3, Area = "Gender", Finding = "The diversity is good in grade 1 and 2, but under par in grade 3", Suggestion = "Consider if females have barriers to apply to grade 3 jobs and remove the barriers.") f4 <- tibble(Nbr = 4, Area = "Gender", Finding = "Males in Grade 2 seem to have been promoted faster.", Suggestion = "Understand unconscious bias, coach everyone (and specially females), work on trust.") findings <- rbind(f1, f2, f3, f4) colnames(findings) <- c("Nbr", "Area", "Finding", "Suggestion") knitr::kable(findings) %>% #column_spec(3:4, width = "12em") kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
f_wrap <- function(x) { strwrap(x, width = 55, simplify = FALSE) %>% sapply(paste, collapse = "\n") } lay1 <- rbind(c(1,1,2), c(1,1,2) ) p1_1 <- div_gauge_plot(tbl_gender_div, ncol = 2, nbrSize = 4) p1_2 <- div_gauge_plot(tbl_gender_div_all, ncol = 1) p1 <- arrangeGrob(p1_1, p1_2, layout_matrix = lay1) p2 <- textGrob(f_wrap(f3$Finding)) p3 <- div_gauge_plot(tbl_age_div, ncol = 1) p4 <- textGrob(f_wrap(f2$Finding)) p5 <- div_plot_paygap_distribution(pg_gender$data$paygap, label = "Gender") p6 <- textGrob(f_wrap(f1$Finding)) p7 <- div_plot_paygap_distribution(pg_citizenship$data$paygap, label = "Citizenship") p8 <- textGrob(f_wrap("No Bias detetected for Citizenship (in salary) -- both team and unbiased distribution are virtually the same")) lay <- rbind(c(2,2,4,4), c(1,1,3,3), c(1,1,3,3), c(1,1,3,3), c(6,6,8,8), c(5,5,7,7), c(5,5,7,7), c(5,5,7,7) ) grid.arrange(p1, p2, p3, p4, p5, p6, p7, p8, layout_matrix = lay)
paygap
= the ratio of median salaries of one group divided by the median of the salaries of the other groupNA
} = numbers are too small, please look at individuals;.
} = maybe there is some bias, but the numbers are low, check individuals;*
} = you should check for bias;**
} = bias is probably there;***
} = most certainly there is biasSo, there will be more stars if the probability of a bias is higher: this can be due to a higher bias and/or due to a larger sample size.
\small
grade
= the salary grade as used in the companyjobID
= a unique identifier of the job category (can be abbreviated)sal_F
= the median salary of the females (F)sal_oth
= the median salary of the other groups (non F). The tool is open to use more than one gender.age_F
= the median age of the females (or age_Pol
could be the median age of the team members with Polish citizenship)age_oth
= the median age of the other groups take together (e.g. the median age of non females)paygap
= the ratio of median salary earned by the selected group (e.g. females) divided by the median of the other people. If this is lower than $1$, then median female earns less than the median non-female.conf.
= the confidence level that this paygap is significant.Info The diversity indexes show how diverse our workforce is. They are calculated similar to entropy: $I = -\frac{1}{\log(N)} \sum_i^N {p_i \log p_i}$, where there are $N$ possible and mutually exclusive states $i$. They range from $0$ to $1$.
```r"} t <- table(d$gender, d$grade) t1 <- apply(t, 2, diversity, prior = c(50.2, 49.8)) t2 <- tibble(value = t1, label = paste("Grade", names(t1))) div_gauge_plot(t2, ncol = 3)
### The distribution of paygap. All paygaps, however go in the same direction. This is not good and needs to be addressed. ```r div_plot_paygap_distribution(pg_gender$data$paygap, label = "Gender")
div_parse_paygap(pg_gender)
p1 <- ggplot(data = d, aes(x=gender, y=salary, fill=gender)) + geom_boxplot() + facet_grid(grade ~ jobID, scales="free_y") + ggtitle('The salary gap per salary grade (level in the company)') ggplotly(p1)
```r)."} ylim1 = boxplot.stats(1/d$tenure_job)$stats[c(1, 5)] p <- ggplot(data = d, aes(x=gender, y=1/tenure_job, fill=gender)) + geom_boxplot() + facet_wrap(~grade) + coord_cartesian(ylim = ylim1*1.05) p
### Promotions per Year per Gender ```r"} ylim1 = boxplot.stats(1/d$tenure_job)$stats[c(1, 5)] p <- ggplot(data = d, aes(x=gender, y=1/tenure_grade, fill=gender)) + geom_boxplot() + facet_wrap(~grade) + coord_cartesian(ylim = ylim1*1.05) p
findings %>% filter(Area == "Gender") %>% select(-c(Area)) %>% kable(format = "html") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```r"}
p1 <- div_gauge_plot(tbl_age_div) t <- rbind(ad, ageRef) rownames(t) <- c("team", "reference")
t2 <- t %>% t() %>% as_tibble %>% mutate(age_group = rownames(t(t))) %>% as_tibble %>% gather("population", "percentage", 1:2)
p2 <- ggplot(t2, aes(x = age_group, y = percentage, fill = population)) + geom_bar(stat="identity", position=position_dodge()) + geom_text(aes(label=round(percentage,2)), vjust=1.6, color="black", position = position_dodge(0.9), size=3.5) + theme(legend.position="bottom")
grid.arrange(p1, p2, ncol=2, widths = 1:2)
### The distribution of age paygap reveals no systhematic bias. ```r div_plot_paygap_distribution(pg_age$data$paygap, label = "Age")
pg_age %>% div_parse_paygap
findings %>% filter(Area == "Age") %>% select(-c(Area)) %>% kable(format = "html") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
#t <- table(d$citizenship, d$grade) t <- d %>% group_by(citizenship) %>% summarise(n = n()) %>% arrange(desc(n)) %>% ggplot( aes(x = citizenship, y = n)) + geom_col(aes(fill = n), width = 0.7) + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5), axis.title.x=element_blank()) t
t <- d %>% group_by(citizenship, grade) %>% summarise(n = n()) %>% ggplot( aes(x = grade, y = n)) + geom_col(aes(fill = n), width = 0.7) + facet_wrap(~ citizenship, scales="free_y") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5), axis.title.x=element_blank()) t
div_plot_paygap_distribution(pg_citizenship$data$paygap, label = "Citizenship")
div_parse_paygap(pg_citizenship)
# the default R-approach: # library(rworldmap) nbrPerCountry = read.table(text=" country value Poland 100 Ukraine 65 UK 2 USA 1 China 3 Germany 0 France 1 Italy 20 Greece 25 Spain 13 Portugal 7 Mexico 55 Belarus 5 Russia 7 Vietnam 1 India 25 Belgium 2 Chile 6 Congo 1 ", header=T) invisible(capture.output({ x <- joinCountryData2Map(nbrPerCountry, joinCode="NAME", nameJoinColumn="country", verbose=FALSE) })) mapCountryData(x, nameColumnToPlot="value", catMethod="fixedWidth", mapTitle="Where we are from")
#install.packages('maps') library(maps) #install.packages('sp') library(sp) #install.packages('leaflet') library(leaflet) map <- leaflet() %>% addProviderTiles(providers$OpenStreetMap) %>% setView(lng = 0, lat = 0, zoom = 2) map
div_plot_paygap_distribution(pg_tenure_firm$data$paygap, label = "Tenure in the Firm")
pg_tenure_firm %>% div_parse_paygap
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.