Nothing
## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup--------------------------------------------------------------------
library(GenderInfer)
library(dplyr)
library(ggplot2)
## ---- warning=FALSE, message=FALSE--------------------------------------------
head(authors)
## ---- warning=FALSE, message=FALSE--------------------------------------------
authors_df <- assign_gender(data_df = authors, first_name_col = "first_name")
head(authors_df)
## ---- warning=FALSE, message=FALSE--------------------------------------------
## Count how many female, male and unknown gender there are in the data
authors_df %>% count(gender)
## per gender and country
authors_df %>% count(gender, country_code)
## ---- warning=FALSE, message=FALSE-------------------------------------------
## calculates baseline for the year range 2016-2019
baseline_female <- baseline(data_df = authors_df %>%
filter(publication_years %in% seq(2016, 2019)),
gender_col = "gender")
baseline_female
## -----------------------------------------------------------------------------
## Create a data frame that containing only the data from 2020 and
## the count of the variable gender.
female_count_2020 <- authors_df %>%
filter(publication_years == 2020) %>%
count(gender)
## create a new data frame to be used for the binomial calculation.
df_gender <- reshape_for_binomials(data = female_count_2020,
gender_col = "gender",
level = 2020)
#df_gender <- test(female_count_2020, "gender", 2020)
df_gender
## ---- fig.width=6, fig.height=4-----------------------------------------------
## Calculate the binomial
## Create a new column with the baseline and calculate the binomial.
df_gender <- calculate_binom_baseline(data_df = df_gender,
baseline_female = baseline_female)
df_gender
## Reshape first the dataframe using `gender_total_df` and afterwards create a
## bar chart of showing the number of male, female and unknown gender with `gender_bar_chart`
gender_total <- total_gender_df(data_df = df_gender, level = "level")
bar_chart(data_df = gender_total, x_label = "Year",
y_label = "Total number")
## ---- fig.width=6-------------------------------------------------------------
## reshape the dataframe using the function `percent_df`.
## Add to `stacked_bar_chart` coord_flip() from ggplot2 to invert the xy axis.
# percent_df(data_df = df_gender)
percent_data <- percent_df(data_df = df_gender)
stacked_bar_chart(percent_data, baseline_female = baseline_female,
x_label = "Year", y_label = "Percentage of authors",
baseline_label = "Female baseline 2016-2019:") +
coord_flip()
## -----------------------------------------------------------------------------
## calculate binomials for us and uk.
## Reshape the dataframe and filter it country UK and US and year 2020 and count
## gender per countries.
# as.data.frame(t(with(authors_df, tapply(n, list(gender), c))))
UK_US_df <- reshape_for_binomials(data_df = authors_df %>%
filter(country_code %in% c("UK", "US"),
publication_years == 2020) %>%
count(gender, country_code),
gender_col = "gender", level = "country_code")
## To calculate the baseline for each country we can use the function `sapply`
baseline_uk_us <- sapply(UK_US_df$level, function(x) {
baseline(data_df = authors_df %>%
filter(country_code %in% x, publication_years %in% seq(2016, 2019)),
gender_col = "gender")
})
baseline_uk_us
UK_US_binom <- calculate_binom_baseline(data_df = UK_US_df,
baseline_female = baseline_uk_us)
UK_US_binom
## ---- fig.width=6, fig.height=4-----------------------------------------------
percent_uk_us <- percent_df(UK_US_binom)
bullet_chart <- bullet_chart(data_df = percent_uk_us,
baseline_female = baseline_uk_us,
x_label = "Countries", y_label = "% Authors",
baseline_label = "Female baseline for 2016-2019")
bullet_chart
## -----------------------------------------------------------------------------
## calculate binomials for US and UK
UK_df <- reshape_for_binomials(data_df = authors_df %>%
filter(country_code == "UK") %>%
count(gender, publication_years),
"gender", "publication_years")
UK_df
## create a baseline vector containing values for each year from 2016 to 2020.
## using as country to compare France.
baseline_fr <- sapply(seq(2016, 2020), function(x) {
baseline(data_df = authors_df %>%
filter(country_code == "FR", publication_years %in% x),
gender_col = "gender")
})
baseline_fr
UK_binom <- calculate_binom_baseline(UK_df, baseline_female = baseline_fr)
UK_binom
## ---- fig.width=7, fig.height=5-----------------------------------------------
## Calculate the total number of submission per country and per year
percent_uk <- percent_df(UK_binom)
## calculate the number of submission from UK
total_uk <- authors_df %>%
filter(country_code == "UK") %>%
count(publication_years) %>%
mutate(x_values = factor(publication_years,
levels = publication_years))
## conversion factor to create the second y-axis
c <- min(total_uk$n) / 100
bullet_line_chart(data_df = percent_uk, baseline_female = baseline_fr,
x_label = "year", y_bullet_chart_label = "Authors submission (%)",
baseline_label = "French Female baseline",
line_chart_df = total_uk,
line_chart_scaling = c, y_line_chart_label = "Total number",
line_label = "Total submission UK")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.