inst/doc/value-labels.R

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----message=FALSE------------------------------------------------------------
library(ipumsr)

ddi <- read_ipums_ddi(ipums_example("cps_00160.xml"))
cps <- read_ipums_micro(ddi, verbose = FALSE)

cps

## -----------------------------------------------------------------------------
is.labelled(cps$STATEFIP)

## -----------------------------------------------------------------------------
# Labels print when accessing the column
head(cps$MONTH)

# Get labels alone
ipums_val_labels(cps$MONTH)

## -----------------------------------------------------------------------------
head(cps$AGE)

## -----------------------------------------------------------------------------
cps$AGE_FACTOR <- as_factor(cps$AGE)

age0_factor <- cps[cps$AGE == 0, ]$AGE_FACTOR

# The levels look the same
unique(age0_factor)

# But the values have changed
unique(as.numeric(age0_factor))

## -----------------------------------------------------------------------------
age85_factor <- cps[cps$AGE == 85, ]$AGE_FACTOR

unique(as.numeric(age85_factor))

## -----------------------------------------------------------------------------
mean(cps$AGE)

mean(as.numeric(cps$AGE_FACTOR))

## -----------------------------------------------------------------------------
ipums_val_labels(cps$HEALTH)

HEALTH2 <- ifelse(cps$HEALTH > 3, 3, cps$HEALTH)
ipums_val_labels(HEALTH2)

## -----------------------------------------------------------------------------
ipums_val_labels(cps$MONTH)

cps$MONTH <- as_factor(cps$MONTH)

## ----eval=FALSE---------------------------------------------------------------
#  cps <- as_factor(cps)
#  
#  # ... further preparation of variables as factors

## -----------------------------------------------------------------------------
inctot_num <- zap_labels(cps$INCTOT)

typeof(inctot_num)

ipums_val_labels(inctot_num)

## -----------------------------------------------------------------------------
ipums_val_labels(cps$INCTOT)

## -----------------------------------------------------------------------------
ipums_val_labels(cps$INCTOT)

## -----------------------------------------------------------------------------
# Convert to NA using function that returns TRUE for all labelled values equal to 99999999
inctot_na <- lbl_na_if(
  cps$INCTOT,
  function(.val, .lbl) .val == 999999999
)

# All 99999999 values have been converted to NA
any(inctot_na == 999999999, na.rm = TRUE)

# And the label has been removed:
ipums_val_labels(inctot_na)

## -----------------------------------------------------------------------------
# Convert to NA for labels that contain "N.I.U."
inctot_na2 <- lbl_na_if(
  cps$INCTOT,
  function(.val, .lbl) grepl("N.I.U.", .lbl)
)

# Same result
all(inctot_na2 == inctot_na, na.rm = TRUE)

## ----eval=FALSE---------------------------------------------------------------
#  lbl_na_if(cps$INCTOT, ~ .val == 999999999)

## -----------------------------------------------------------------------------
x <- lbl_na_if(cps$INCTOT, ~ .val >= 0)

# Unlabelled values greater than the cutoff are still present:
length(which(x > 0))

## -----------------------------------------------------------------------------
ipums_val_labels(cps$MIGRATE1)

cps$MIGRATE1 <- lbl_relabel(
  cps$MIGRATE1,
  lbl(0, "NIU / Missing / Unknown") ~ .val %in% c(0, 2, 9),
  lbl(1, "Stayed in state") ~ .val %in% c(1, 3, 4)
)

ipums_val_labels(cps$MIGRATE1)

## -----------------------------------------------------------------------------
head(ipums_val_labels(cps$EDUC), 15)

## -----------------------------------------------------------------------------
# %/% refers to integer division, which divides but discards the remainder
10 %/% 10
11 %/% 10

# Convert to groups by tens digit
cps$EDUC2 <- lbl_collapse(cps$EDUC, ~ .val %/% 10)

ipums_val_labels(cps$EDUC2)

## -----------------------------------------------------------------------------
ipums_val_labels(cps$STATEFIP)

ipums_val_labels(lbl_clean(cps$STATEFIP))

## -----------------------------------------------------------------------------
x <- haven::labelled(
  c(100, 200, 105, 990, 999, 230),
  c(`Unknown` = 990, NIU = 999)
)

lbl_add(
  x,
  lbl(100, "$100"),
  lbl(105, "$105"),
  lbl(200, "$200"),
  lbl(230, "$230")
)

## -----------------------------------------------------------------------------
# `.` refers to each label value
lbl_add_vals(x, ~ paste0("$", .))

## -----------------------------------------------------------------------------
age <- c(10, 12, 16, 18, 20, 22, 25, 27)

# Group age values into two label groups.
# Values not captured by the right hand side functions remain unlabelled
lbl_define(
  age,
  lbl(1, "Pre-college age") ~ .val < 18,
  lbl(2, "College age") ~ .val >= 18 & .val <= 22
)

Try the ipumsr package in your browser

Any scripts or data that you put into this service are public.

ipumsr documentation built on Oct. 20, 2023, 5:10 p.m.