inst/doc/GenderInfer.R

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)


## ----setup--------------------------------------------------------------------
library(GenderInfer)
library(dplyr)
library(ggplot2)

## ---- warning=FALSE, message=FALSE--------------------------------------------
head(authors)

## ---- warning=FALSE, message=FALSE--------------------------------------------
authors_df <- assign_gender(data_df = authors, first_name_col = "first_name")
  
head(authors_df)

## ---- warning=FALSE, message=FALSE--------------------------------------------
## Count how many female, male and unknown gender there are in the data
authors_df %>% count(gender)

## per gender and country
authors_df %>% count(gender, country_code)


## ----  warning=FALSE, message=FALSE-------------------------------------------
## calculates baseline for the year range 2016-2019
baseline_female <- baseline(data_df = authors_df %>% 
                              filter(publication_years %in% seq(2016, 2019)),
                            gender_col = "gender")
baseline_female


## -----------------------------------------------------------------------------
## Create a data frame that containing only the data from 2020 and
## the count of the variable gender.
female_count_2020 <- authors_df %>% 
  filter(publication_years == 2020) %>%
  count(gender)

## create a new data frame to be used for the binomial calculation.
df_gender <- reshape_for_binomials(data = female_count_2020,
                                   gender_col = "gender",
                                   level = 2020)
#df_gender <- test(female_count_2020, "gender", 2020)

df_gender

## ---- fig.width=6, fig.height=4-----------------------------------------------
## Calculate the binomial
## Create a new column with the baseline and calculate the binomial.

df_gender <- calculate_binom_baseline(data_df = df_gender,
                                      baseline_female = baseline_female)

df_gender

## Reshape first the dataframe using `gender_total_df` and afterwards create a
## bar chart of showing the number of male, female and unknown gender with `gender_bar_chart`
gender_total <- total_gender_df(data_df = df_gender, level = "level")

bar_chart(data_df = gender_total, x_label = "Year", 
                 y_label = "Total number")


## ---- fig.width=6-------------------------------------------------------------
## reshape the dataframe using the function `percent_df`.
## Add to `stacked_bar_chart` coord_flip() from ggplot2 to invert the xy axis.
# percent_df(data_df = df_gender)
percent_data <- percent_df(data_df = df_gender) 
stacked_bar_chart(percent_data, baseline_female = baseline_female,
                    x_label = "Year", y_label = "Percentage of authors",
                    baseline_label = "Female baseline 2016-2019:") +
  coord_flip() 
 


## -----------------------------------------------------------------------------
## calculate binomials for us and uk. 
## Reshape the dataframe and filter it country UK and US and year 2020 and count
## gender per countries.
# as.data.frame(t(with(authors_df, tapply(n, list(gender), c))))

UK_US_df <- reshape_for_binomials(data_df = authors_df %>%
                                   filter(country_code %in% c("UK", "US"),
                                          publication_years == 2020) %>%
                                    count(gender, country_code),
                                 gender_col = "gender", level = "country_code")

## To calculate the baseline for each country we can use the function `sapply`
baseline_uk_us <- sapply(UK_US_df$level, function(x) {
  baseline(data_df = authors_df %>%
            filter(country_code %in% x, publication_years %in% seq(2016, 2019)),
           gender_col = "gender")
})

baseline_uk_us

UK_US_binom <- calculate_binom_baseline(data_df = UK_US_df,
                                        baseline_female = baseline_uk_us)

UK_US_binom

## ---- fig.width=6, fig.height=4-----------------------------------------------
percent_uk_us <- percent_df(UK_US_binom)

bullet_chart <- bullet_chart(data_df = percent_uk_us,
                             baseline_female = baseline_uk_us,
                             x_label = "Countries", y_label = "% Authors",
                             baseline_label = "Female baseline for 2016-2019")
bullet_chart

## -----------------------------------------------------------------------------
## calculate binomials for US and UK

UK_df <- reshape_for_binomials(data_df = authors_df %>%
                                     filter(country_code == "UK") %>% 
                                     count(gender, publication_years),
                               "gender", "publication_years")

UK_df

## create a baseline vector containing values for each year from 2016 to 2020.
## using as country to compare France.
baseline_fr <- sapply(seq(2016, 2020), function(x) {
  baseline(data_df = authors_df %>%
             filter(country_code == "FR", publication_years %in% x), 
           gender_col = "gender")
})
baseline_fr

UK_binom <- calculate_binom_baseline(UK_df, baseline_female = baseline_fr)
UK_binom

## ---- fig.width=7, fig.height=5-----------------------------------------------
## Calculate the total number of submission per country and per year
percent_uk <- percent_df(UK_binom)
## calculate the number of submission from UK
total_uk <- authors_df %>%
  filter(country_code == "UK") %>%
  count(publication_years) %>%
  mutate(x_values = factor(publication_years,
                                    levels = publication_years))
## conversion factor to create the second y-axis
c <- min(total_uk$n) / 100
bullet_line_chart(data_df = percent_uk, baseline_female = baseline_fr,
                  x_label = "year", y_bullet_chart_label = "Authors submission (%)",
                  baseline_label = "French Female baseline",
                  line_chart_df = total_uk,
                  line_chart_scaling = c, y_line_chart_label = "Total number",
                  line_label = "Total submission UK")

Try the GenderInfer package in your browser

Any scripts or data that you put into this service are public.

GenderInfer documentation built on Sept. 29, 2021, 9:07 a.m.