knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "man/figures/README-", out.width = "100%" )
This packages standardizes the output for the R packages wru, predictrace, and includes a python script and virtual environment setup for the python library ethnicolr.
library(rRace); library(reticulate); library(tidyverse); library(ggthemes)
We test the package with a build-in table "name_table" which includes some random names.
tab_names <- name_table head(tab_names)
tab_prr <- race_prr(tab_names) head(tab_prr)
First we have to download census data
download_census( .key = "YOUR KEY", .geo = "county", .use_age = TRUE, .use_gen = TRUE, .dir = "_debug_data/census/", .workers = 10, .retry = 10, .progress = TRUE, .states = unique(tab_names$state) )
tab_wru <- race_wru( .tab = tab_names, .use_geo = TRUE, .use_age = TRUE, .use_gen = TRUE, .census_dir = "_debug_data/census/", .census_geo = "county" ) head(tab_wru)
ToDO: Explain the Setup
.py_path <- filter(conda_list(), name == "env-ethnicolr")[[2]] Sys.setenv(RETICULATE_PYTHON = .py_path) reticulate::py_discover_config()
The ethnicolr function is stored as a system file, you can easily load it with the following command.
.path_py <- system.file("extdata/script_ethnicolr.py", package = "rRace") reticulate::source_python(.path_py)
You can safely ignore the error about tensorflow (if present). It only tells you that you don't have a GPU setup on your machine.
As a reference the complete function (not necessary to execute it)
import os import pandas as pd import ethnicolr def race_eth(tab, methods = ["CEL", "FLF", "FLL"]): # CEL: CENSUS DATA, prediction on LAST NAME ------------------------------------------ if "CEL" in methods: df1 = ethnicolr.pred_census_ln(df=tab, namecol="last_name") cols = {'api': 'pasian', 'black': 'pblack', 'hispanic': 'phispa', 'white': 'pwhite'} df1.rename(columns=cols, inplace=True) df1["method"]="CEL-1-0-0-0-0-N" else: df1=pd.DataFrame() # FLF: FLORIDA DATA, prediction on FULL NAME ----------------------------------------- if "FLF" in methods: df2 = ethnicolr.pred_fl_reg_name(df=tab, lname_col="last_name", fname_col="first_name") cols={'asian': 'pasian', 'nh_black': 'pblack', 'hispanic': 'phispa', 'nh_white': 'pwhite'} df2.rename(columns=cols, inplace=True) df2["method"]="FLF-1-1-0-0-0-N" else: df2=pd.DataFrame() # FLL: FLORIDA DATA, prediction on LAST NAME ----------------------------------------- if "FLL" in methods: df3 = ethnicolr.pred_fl_reg_ln(df=tab, namecol="last_name") cols={'asian': 'pasian', 'nh_black': 'pblack', 'hispanic': 'phispa', 'nh_white': 'pwhite'} df3.rename(columns=cols, inplace=True) df3["method"]="FLL-1-1-0-0-0-N" else: df3=pd.DataFrame() # combine dataframes to single dataframe ---------------------------------------------------- df_out=pd.concat([df1, df2, df3], ignore_index=True) df_out["pother"]=0 # select and reorder columns ---------------------------------------------------------------- cols_old=list(tab.columns.values) cols_old.remove("__name") cols_new=["method", "pasian", "pblack", "phispa", "pwhite", "pother", "race"] df_out=df_out[cols_old+cols_new] # recode race values ------------------------------------------------------------------------ race_old=["hispanic", "nh_white", "nh_black"] race_new=["hispa", "white", "black"] df_out["race"]=df_out["race"].replace(race_old, race_new) # return output ----------------------------------------------------------------------------- return df_out
Again, you can safely ignore the error about tensorflow (if present).
tab_eth <- as_tibble(race_eth(tab_names)) head(tab_eth)
tab_race <- race_predict(tab_prr, tab_wru, tab_eth) head(tab_race)
tab_race %>% filter(rank_hp == 1) %>% count(race) %>% ggplot(aes(race, n, fill = race)) + geom_col() + scale_fill_brewer(palette = "PuBu") + ggthemes::geom_rangeframe(color = "black") + ggthemes::theme_tufte()
tab_race %>% filter(rank_gd == 1) %>% count(race) %>% ggplot(aes(race, n, fill = race)) + geom_col() + scale_fill_brewer(palette = "PuBu") + ggthemes::geom_rangeframe(color = "black") + ggthemes::theme_tufte()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.