Diversity and Inclusion Report

# Libraries
library(div)
library(gridExtra)
library(kableExtra)
library(grid)
library(lattice)

# Options and defaults:
options(knitr.table.format = "latex")
knitr::opts_chunk$set(echo = FALSE)
knitr::opts_chunk$set(warning = FALSE)
knitr::opts_chunk$set(message = FALSE)
knitr::opts_chunk$set(fig.width=12, fig.height=6) 

# The fake team:
teamSize       <- 400
pctFemales     <- 0.35
maleSalaryBias <- 1.035
d <- div_fake_team(seed = 18901228, 
                   N = teamSize, 
                   genders = c("F", "M"),
                   gender_prob = c(pctFemales, 1 - pctFemales),
                   gender_salaryBias = c(1, maleSalaryBias)
                   )


# Gender diversity tibble per grade:
tbl_gender_div <- table(d$gender, d$grade) %>%
 apply(2, diversity, prior = c(50.2, 49.8)) %>%
 tibble(value = ., label = paste("Grade", names(.)))

# Average gender diversity tibble:
tbl_gender_div_all <- table(d$gender) %>%
  as.vector %>%
  diversity(prior = c(50.2, 49.8)) %>%
 tibble(value = ., label = "Gender Diversity")

# Age diversity tibble:
ageRef <- c(15.94, 27.11, 23.99, 20.68, 12.28) / 100
ad <- table(cut(d$age, breaks = c(20, 30, 40, 50, 60, 65)))
ad <- ad / sum(ad)  # not needed, diversity() does this, bu useful for the table
tbl_age_div <- tibble(value = diversity(ad, prior = ageRef), label = "Age Diversity")

# Gender PayGap tibble:
pg_gender <- div_paygap(d)


# Citizenship PayGap tibble
pg_citizenship <- d                %>%
             div_paygap(x = "citizenship" , x_ctrl = "Polish")

Table of Contents

\tableofcontents[hideallsubsections]

The Particular Example Behind this Demo Report

The data used for this report is randomly generated with the following characteristics:

Equitable outcomes are not the same as equal outcomes!

Overview

Findings (in order of importance)

\small

f1 <- tibble(Nbr = 1, 
             Area = "Gender", 
             Finding = "Where we can calculate the paygap between females and non-females, we find that the females generally earn less in similar roles and similar grades.", 
             Suggestion = "Check the gender-paygap table and identify the grade/role combinations where an the paygap has most stars. Check if the salary differences are justified.")
f2 <- tibble(Nbr = 2, 
             Area = "Age", 
             Finding = "The team is predominantly younger than the surrounding population (Poland).", 
             Suggestion = "Consider hiring older people to balance. Focus on retention.")
f3 <- tibble(Nbr = 3, 
             Area = "Gender", 
             Finding = "The diversity is good in grade 1 and 2, but under par in grade 3", 
             Suggestion = "Consider if females have barriers to apply to grade 3 jobs and remove the barriers.")
f4 <- tibble(Nbr = 4, 
             Area = "Gender", 
             Finding = "Males in Grade 2 seem to have been promoted faster.", 
             Suggestion = "Understand unconscious bias, coach everyone (and specially females), work on trust.")
findings <- rbind(f1, f2, f3, f4)
colnames(findings) <- c("Nbr", "Area", "Finding", "Suggestion")

knitr::kable(findings) %>% column_spec(3:4, width = "12em")
write_csv(findings,'findings.csv')

Dashboard

f_wrap <- function(x) {
  strwrap(x, width = 55, simplify = FALSE) %>%
  sapply(paste, collapse = "\n")
}

lay1 <- rbind(c(1,1,2),
              c(1,1,2)
              )
p1_1 <- div_gauge_plot(tbl_gender_div, ncol = 2, nbrSize = 4)
p1_2 <- div_gauge_plot(tbl_gender_div_all, ncol = 1)
p1 <- arrangeGrob(p1_1, p1_2, layout_matrix = lay1)
p2 <- textGrob(f_wrap(f3$Finding))

p3 <- div_gauge_plot(tbl_age_div, ncol = 1)
p4 <- textGrob(f_wrap(f2$Finding))

p5 <- div_plot_paygap_distribution(pg_gender$data$paygap, label = "Gender")
p6 <- textGrob(f_wrap(f1$Finding))

p7 <- div_plot_paygap_distribution(pg_citizenship$data$paygap, label = "Citizenship")
p8 <- textGrob(f_wrap("No Bias detetected for Citizenship (in salary) -- both team and unbiased distribution are virtually the same"))

lay <- rbind(c(2,2,4,4),
             c(1,1,3,3),
             c(1,1,3,3),
             c(1,1,3,3),
             c(6,6,8,8),
             c(5,5,7,7),
             c(5,5,7,7),
             c(5,5,7,7)
             )

grid.arrange(p1, p2, p3, p4, p5, p6, p7, p8, layout_matrix = lay)

Diversity

Gender diversity per grade

```r"} t <- table(d$gender, d$grade) t1 <- apply(t, 2, diversity, prior = c(50.2, 49.8)) t2 <- tibble(value = t1, label = paste("Grade", names(t1))) div_gauge_plot(t2, ncol = 3)

## Age diversity

```r"}
# ages in Poland
p1 <- div_gauge_plot(tbl_age_div)
t <- rbind(ad, ageRef)
rownames(t) <- c("team", "reference")
#knitr::kable(round(t,2), 
#             caption = "The percentages of different age groups in this team compared to the population of Poland. An equal distribution would yield a diversity of 1.\\label{tab:div:age}")

t2 <- t %>% t() %>% 
  as_tibble %>%
  mutate(age_group = rownames(t(t))) %>%
  as_tibble %>% gather("population", "percentage", 1:2)

p2 <- ggplot(t2, aes(x = age_group, y = percentage, fill = population)) +
  geom_bar(stat="identity",  position=position_dodge()) +
  geom_text(aes(label=round(percentage,2)), vjust=1.6, color="black", position = position_dodge(0.9), size=3.5) + 
  theme(legend.position="bottom")

grid.arrange(p1, p2, ncol=2, widths = 1:2)

Diversity in nationalities (1/2)

\framesubtitle{Distribution for the team}

#t <- table(d$citizenship, d$grade)
t <- d                     %>% 
     group_by(citizenship) %>% 
     summarise(n = n())    %>%
     arrange(desc(n))      %>%
     ggplot( aes(x = citizenship, y = n))    +
        geom_col(aes(fill = n), width = 0.7) +
        theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5),
              axis.title.x=element_blank())

t

Diversity in nationalities (2/2)

\framesubtitle{Breakdown per Grade}

t <- d                            %>% 
     group_by(citizenship, grade) %>% 
     summarise(n = n())           %>%
     ggplot( aes(x = grade, y = n))                +
        geom_col(aes(fill = n), width = 0.7)       +
        facet_wrap(~ citizenship, scales="free_y") +
        theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5),
                axis.title.x=element_blank())

t

Inclusion

The Gender PayGap

div_parse_paygap(pg_gender)

The Citizenship PayGap

div_parse_paygap(pg_citizenship)

The Age Paygap

div_paygap(d, x = "age") %>% div_parse_paygap

Time in firm paygap

div_paygap(d, x = "tenure_firm") %>% div_parse_paygap

Job Changes per Year per Gender

```r)."} ylim1 = boxplot.stats(1/d$tenure_job)$stats[c(1, 5)] p <- ggplot(data = d, aes(x=gender, y=1/tenure_job, fill=gender)) + geom_boxplot() + facet_wrap(~grade) + coord_cartesian(ylim = ylim1*1.05) p

## Promotions per Year per Gender

```r"}
ylim1 = boxplot.stats(1/d$tenure_job)$stats[c(1, 5)]
p <- ggplot(data = d, aes(x=gender, y=1/tenure_grade, fill=gender)) + 
    geom_boxplot() +
    facet_wrap(~grade) + 
    coord_cartesian(ylim = ylim1*1.05)
p

Conclusions

conclusions <- rbind(tibble(Nbr = 0, 
                            Suggestion = 'Learn more by reading e.g. "The Essentials of Diversification & Inclusion", Dabrowska (2019)' ),
                     findings[,c(1,4)])
knitr::kable(conclusions) %>% column_spec(2, width = "22em")

nocite: | @diversityhub2019


Appendices

Legend Paygap

So, there will be more stars if the probability of a bias is higher: this can be due to a higher bias and/or due to a larger sample size.

Legend: Paygap Column Headers

\small

The Diversity Index (1/2) {.shrink}

We express diversity as a number between zero and one. Our calculation is based on @debrouwer2020 and more in particular section 36.3.1 ``The Business Case: a Diversity Dashboard''. Details can be found in the book. The method is:

The Diversity Index (2/2)

```r"} x <- seq(length.out = 500, from = 0, to = 1) t <- cbind(x, 1-x) tt <- tibble(diversity = apply(t, 1, FUN = diversity), x = x) p1 <- ggplot(data=tt, aes(x=x, y=diversity, color = diversity)) + geom_line(lwd = 3) + ggtitle("Diversity index for binary variables and prior probabilities of 50/50") + xlab("Proportion of one of the two classes")

tt <- tibble(diversity = apply(t, 1, FUN = diversity, prior = c(.7,.3)), x = x) p2 <- ggplot(data=tt, aes(x=x, y=diversity, color = diversity)) + geom_line(lwd = 3) + ggtitle("Diversity index for binary variables and prior probabilities of 70/30") + xlab("Proportion of one of the two classes") grid.arrange(p1, p2, ncol = 1)

## The confidence level and p-value
The p-value is the probability that we make a mistake by assuming that there is no paygap.

It is calculated by splitting the data on a variable in binary factors (e.g. Females and others) and then checking how likely it is that a random person from the first group earns less than a random person from the second group. This is done by a method known as Mann-Whitney U test: [see Wikipedia](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test)[^1]


[^1]: The MannWhitney U test (aka. MannWhitneyWilcoxon (MWW), Wilcoxon rank-sum test, or WilcoxonMannWhitney test) is a nonparametric test of the null hypothesis that, for randomly selected values X and Y from two populations, the probability of X being greater than Y is equal to the probability of Y being greater than X. If we assume that the distributions are symmetric, it boils down to a test that the medians are different.

## Another view on the PayGap

```r."}
# Transformation function for 2 decimals
scaleFUN <- function(x) sprintf("%.0f", x)

p <- ggplot(data = d, aes(x=gender, y=salary, fill=gender)) + 
    geom_boxplot() +
    facet_wrap(~grade + jobID, scale="free") + 
    scale_y_continuous(labels=scaleFUN)
p

Bibliography



Try the div package in your browser

Any scripts or data that you put into this service are public.

div documentation built on May 6, 2021, 9:06 a.m.