Summary: In this Notebook, I make the CenSoc-DMF and CenSoc-Numident Demo files.
Steps to make CenSoc Demo File:
(1) Read in IPUMS 1940 1% Extract (2) Link Crosswalk (2) Link to CenSoc-DMF
Library the Packages for analysis
## library packages library(tidyverse) library(data.table) library(ipumsr) library(styler) library(datasets) library(readxl)
recode_education <- function(df, educ_var) { df <- df %>% mutate(educ_variable = !!sym(educ_var)) %>% mutate(educ_yrs = case_when( educ_variable == 2 ~ 0, educ_variable == 12 ~ 0, educ_variable == 14 ~ 1, educ_variable == 15 ~ 2, educ_variable == 16 ~ 3, educ_variable == 17 ~ 4, educ_variable == 22 ~ 5, educ_variable == 23 ~ 6, educ_variable == 25 ~ 7, educ_variable == 26 ~ 8, educ_variable == 30 ~ 9, educ_variable == 40 ~ 10, educ_variable == 50 ~ 11, educ_variable == 60 ~ 12, educ_variable == 70 ~ 13, educ_variable == 80 ~ 14, educ_variable == 90 ~ 15, educ_variable == 100 ~ 16, educ_variable == 110 ~ 17, educ_variable == 111 ~ 17, educ_variable == 112 ~ 17, educ_variable == 113 ~ 17 )) %>% dplyr::select(-educ_variable) return(df) }
# Read in 1% IPUMS 1940 Census Extract ipums1pct <- fread("/data/censoc/workspace/demo_input/ipums1940_1p_sample_2024.csv.gz") %>% rename_all(tolower) # Read crosswalk (Histid) xwalk <- fread("/data/censoc/crosswalks/1940_histid_cc_1p_xwalk.txt") # Join histid to ipums1pct ipums1pct <- ipums1pct %>% inner_join(xwalk, by = c("serial", "pernum")) %>% relocate(histid)
## Read in linked Numident #numidentdata <- fread("/data/censoc/censoc_data_releases/censoc_numident/censoc_numident_v3/censoc_numident_v3.csv") numidentdata <- fread("/data/censoc/censoc_data_releases/censoc_linked_to_census/v3/censoc_numident_v3_linked.csv") # Join numident to demo by histid numidentdemo <- ipums1pct %>% rename( sex.demo = "sex", bpl.demo = "bpl" ) %>% inner_join(numidentdata, by = c("histid" = "HISTID")) # 70k remaining observations # match sex, age and race from ipums 1% to numident data # (there are seemingly some slight differences between sample (unkown year -- maybe 2021) and # full count data (2022) used to construct the CenSoc-Numident.) numidentdemo %<>% filter( AGE == age, SEX == sex.demo ) #64,686 # recode education numidentdemo <- recode_education(numidentdemo,"educd") ## Select numident variables vars_to_keep_num <- c("histid", "byear", "bmonth", "dyear", "dmonth", "death_age", "sex", "race_first", "race_first_cyear", "race_first_cmonth", "race_last", "race_last_cyear", "race_last_cmonth", "bpl", "bpl_string", "zip_residence", "socstate", "socstate_string", "age_first_application", "weight", "pernum", "perwt", "age", "mbpl", "fbpl", "educd", "educ_yrs", "empstatd", "hispan", "incwage", "incnonwg", "marst", "nativity", "occ", "occscore", "ownershp", "race", "rent", "serial", "statefip", "urban") ## keep variables and rename to lower case demo_file_numident <- numidentdemo %>% dplyr::select(all_of(vars_to_keep_num))
## Read in linked demo file #dmfdata <- fread("/data/censoc/censoc_data_releases/censoc_dmf/censoc_dmf_v3/censoc_dmf_v3.csv") dmfdata <- fread("/data/censoc/censoc_data_releases/censoc_linked_to_census/v3/censoc_dmf_v3_linked.csv") # Join dmf to demo by histid dmfdata_demo <- ipums1pct %>% inner_join(dmfdata, by = c("histid" = "HISTID")) # 45,907 # recode education dmfdata_demo <- recode_education(dmfdata_demo, "educd") dmfdata_demo %<>% filter( AGE == age, SEX == sex, ) # 42,421 ## Select DMF variables vars_to_keep_dmf <- c("histid", "byear", "bmonth", "dyear", "dmonth", "death_age", "weight", "pernum", "perwt", "age", "sex", "bpld", "mbpl", "fbpl", "educd", "educ_yrs", "empstatd", "hispan", "incwage", "incnonwg", "marst", "nativity", "occ", "occscore", "ownershp", "race", "rent", "serial", "statefip", "urban") ## demo_file_dmf <- dmfdata_demo %>% dplyr::select(all_of(vars_to_keep_dmf))
Write Files
demo_file_dmf %<>% rename(HISTID = histid) demo_file_numident %<>% rename(HISTID = histid) ## Write out csv file write_csv(demo_file_dmf, "/data/censoc/censoc_data_releases/censoc_dmf_demo/censoc_dmf_demo_v3/censoc_dmf_demo_v3.csv") write_csv(demo_file_numident, "/data/censoc/censoc_data_releases/censoc_numident_demo/censoc_numident_demo_v3/censoc_numident_demo_v3.csv")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.