Summary: In this Notebook, I make the CenSoc-DMF and CenSoc-Numident Demo files.
Steps to make CenSoc Demo File:
(1) Read in IPUMS 1940 1% Extract (2) Link Crosswalk (2) Link to CenSoc-DMF
Library the Packages for analysis
## library packages library(tidyverse) library(data.table) library(ipumsr) library(styler) library(datasets) library(readxl) library(censocdev)
# Read in 1% IPUMS 1940 Census Extract ipums1pct <- fread("/data/censoc/workspace/sampleIPUMS.csv") %>% rename_all(tolower) # Read crosswalk (Histid) xwalk <- fread("/data/censoc/crosswalks/1940_histid_cc_1p_xwalk.txt") # Join histid to ipums1pct ipums1pct <- ipums1pct %>% inner_join(xwalk, by = c("serial", "pernum")) %>% relocate(histid)
## Read in linked Numident numidentdata <- fread("/data/censoc/censoc_data_releases/censoc_linked_to_census/v2.1/censoc_numident_v2.1_linked.csv") # Join numident to demo by histid numidentdemo <- ipums1pct %>% rename( sex.demo = "sex", bpl.demo = "bpl" ) %>% inner_join(numidentdata, by = c("histid" = "HISTID")) # 93917 remaining observations # match sex, age and race from demo to numident data numident2.1 <- numidentdemo %>% filter( AGE == age, RACE == race, SEX == sex.demo ) # 85865 remaining observations ## Select numident variables for v2.1 vars_to_keep_num <- c("histid", "byear", "bmonth", "dyear", "dmonth", "death_age", "race_first", "race_first_cyear", "race_last", "bpl_string", "zip_residence", "socstate", "socstate_string", "age_first_application", "link_abe_exact_conservative", "weight", "weight_conservative", "PERWT", "AGE", "SEX", "bpl", "MBPL", "FBPL", "EDUCD", "EMPSTATD", "HISPAN", "INCNONWG", "INCWAGE", "MARST", "NATIVITY", "OCC", "OCCSCORE", "OWNERSHP", "PERNUM", "RACE", "RENT", "SERIAL", "STATEFIP", "URBAN") ## keep variables and rename to lower case demo_file_numident <- numident2.1 %>% select(vars_to_keep_num) %>% rename_all(tolower) %>% censocdev::recode_education(educ_var = educd)
## Read in linked demo file dmfdata <- fread("/data/censoc/censoc_data_releases/censoc_linked_to_census/v2.1/censoc_dmf_v2.1_linked.csv") # Join dmf to demo by histid dmfdata_demo <- ipums1pct %>% inner_join(dmfdata, by = c("histid" = "HISTID")) # 76,496 # Match sex, age, and race from demo to dmf data dmf2.1 <- dmfdata_demo %>% filter( AGE == age, SEX == sex, RACE == race ) # 70,211 remaining obs ## Select DMF variables for v2.1 vars_to_keep_dmf <- c("histid", "byear", "bmonth", "dyear", "dmonth", "death_age", "link_abe_exact_conservative", "weight", "weight_conservative", "PERWT", "AGE", "SEX", "BPLD", "MBPL", "FBPL", "EDUCD", "EMPSTATD", "HISPAN", "INCNONWG", "INCWAGE", "MARST", "NATIVITY", "OCC", "OCCSCORE", "OWNERSHP", "PERNUM", "RACE", "RENT", "SERIAL", "STATEFIP", "URBAN") ## Write out demo file demo_file_dmf <- dmf2.1 %>% select(vars_to_keep_dmf) %>% rename(bpl = "BPLD") %>% rename_all(tolower) %>% censocdev::recode_education(educ_var = educd)
statefip_string and bpl_string function
# bpl_string_code<-read_csv("/data/josh/CenSoc/censoc_data/bpl_string_code.csv") %>% # mutate(Code= as.factor(Code)) demo_file_numident %>% mutate(statefip = as.numeric(statefip)) %>% count(statefip) %>% add_row(statefip = c(2,15), n = c(0,0)) statefip_string_fun<-function(numident_dmf){ #Create separate data.frame for statefip values and add Alaska and Hawaii statenum<- numident_dmf %>% mutate(statefip = as.numeric(statefip)) %>% count(statefip) %>% add_row(statefip= c(2,15), n = c(0,0)) %>% arrange(statefip) #Create another data.frame with `state.name` character values and add Washington D.C. state_new<-tibble(state.name) %>% rbind("District of Columbia") %>% arrange(state.name) %>% mutate(statefip = statenum$statefip) # Integrate state.name return(numident_dmf %>% inner_join(state_new, by = "statefip") %>% # inner_join(bpl_string_code, by = c("bpl" = "Code")) %>% rename(statefip_string = "state.name") %>% mutate(statefip = as.factor(statefip), bpl = as.factor(bpl))) } #Update numident and demo demo_file_numident<-statefip_string_fun(demo_file_numident) demo_file_dmf<-statefip_string_fun(demo_file_dmf) ## Write out csv file write_csv(demo_file_dmf, "/data/censoc/censoc_data_releases/data_release_demo_v2.1/censoc_dmf_demo_v2.1.csv") write_csv(demo_file_numident, "/data/censoc/censoc_data_releases/data_release_demo_v2.1/censoc_numident_demo_v2.1.csv")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.