inst/doc/crosswalk.R

## ----knitrsetup, include = FALSE----------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup, message=FALSE-----------------------------------------------------
library(retroharmonize)
library(dplyr)

## ----surveyfiles--------------------------------------------------------------
examples_dir <- system.file("examples", package = "retroharmonize")
survey_files <- dir(examples_dir)[grepl("\\.rds", dir(examples_dir))]
survey_files

## ----surveypaths--------------------------------------------------------------
survey_paths <- file.path(examples_dir, survey_files)

## ----examplemetadata----------------------------------------------------------
example_metadata <- metadata_create(survey_paths = survey_paths)

## ----subsetmetadata-----------------------------------------------------------
subset_example_metadata <- example_metadata %>%
  filter(grepl("^unique_identifier_in|trust|country_code", var_label_orig)) %>%
  filter(grepl(
    "^unique_identifier_in|european_parliament|country_code",
    var_label_orig
  )) %>%
  filter(var_name_orig != "uniqid")

subset_example_metadata

## ----ct1----------------------------------------------------------------------
ct <- crosswalk_table_create(subset_example_metadata)
ct

## ----varnames-----------------------------------------------------------------
ct %>%
  mutate(var_name_target = case_when(
    var_name_orig == "rowid" ~ .data$var_name_orig,
    var_name_orig == "isocntry" ~ "geo",
    TRUE ~ "trust_ep"
  )) %>%
  distinct(across(all_of(c("filename", "var_name_orig", "var_name_target"))))

## ----vallabels----------------------------------------------------------------
ct %>%
  mutate(val_numeric_target = case_when(
    val_numeric_orig == 1 ~ .data$val_numeric_orig,
    val_numeric_orig == 2 ~ 0,
    TRUE ~ 99999
  )) %>%
  mutate(val_label_target = case_when(
    val_numeric_orig == 1 ~ "trust",
    val_numeric_orig == 2 ~ "distrust",
    TRUE ~ "declined"
  )) %>%
  distinct(across(all_of(c(
    "filename",
    "val_numeric_orig", "val_numeric_target",
    "val_label_orig", "val_label_target"
  ))))

## ----nalabels-----------------------------------------------------------------
ct %>%
  mutate(na_numeric_target = case_when(
    na_numeric_orig == 3 ~ 99999,
    TRUE ~ NA_real_
  )) %>%
  mutate(na_label_target = case_when(
    na_numeric_target == 99999 ~ "declined",
    TRUE ~ NA_character_
  )) %>%
  distinct(across(all_of(c(
    "filename", "val_numeric_orig",
    "na_numeric_orig", "na_numeric_target",
    "na_label_orig", "na_label_target"
  ))))

## ----crosswalktarget----------------------------------------------------------
example_crosswalk_table <- ct %>%
  mutate(var_name_target = case_when(
    var_name_orig == "rowid" ~ .data$var_name_orig,
    var_name_orig == "isocntry" ~ "geo",
    TRUE ~ "trust_ep"
  )) %>%
  mutate(val_numeric_target = case_when(
    val_numeric_orig == 1 ~ .data$val_numeric_orig,
    val_numeric_orig == 2 ~ 0,
    TRUE ~ 99999
  )) %>%
  mutate(val_label_target = case_when(
    val_numeric_orig == 1 ~ "trust",
    val_numeric_orig == 2 ~ "distrust",
    TRUE ~ "declined"
  )) %>%
  mutate(na_numeric_target = case_when(
    na_numeric_orig == 3 ~ 99999,
    TRUE ~ NA_real_
  )) %>%
  mutate(na_label_target = case_when(
    na_numeric_target == 99999 ~ "declined",
    TRUE ~ NA_character_
  ))

example_crosswalk_table

## ----readtomemory-------------------------------------------------------------
example_surveys <- read_surveys(survey_paths, .f = "read_rds")

## ----subsetsave1--------------------------------------------------------------
subset_survey_list_1 <- subset_surveys(
  survey_list = example_surveys,
  subset_vars = c("rowid", "isocntry", "qa10_1", "qa14_1"),
  subset_name = "subset_example"
)

## ----subsetsave11-------------------------------------------------------------
vapply(subset_survey_list_1, function(x) attr(x, "id"), character(1))

## ----subsetsave13-------------------------------------------------------------
head(subset_survey_list_1[[1]])

## ----subsetsave14-------------------------------------------------------------
lapply(subset_survey_list_1, names)

## ----subsetsave21-------------------------------------------------------------
subset_survey_list_2 <- subset_surveys(
  crosswalk_table = example_crosswalk_table,
  survey_list = example_surveys,
  subset_name = "trust_ep"
)

## ----subsetsave22-------------------------------------------------------------
vapply(subset_survey_list_2, function(x) attr(x, "id"), character(1))

## ----subsetsave23-------------------------------------------------------------
head(subset_survey_list_2[[1]])

## ----subsetsave24-------------------------------------------------------------
lapply(subset_survey_list_2, names)

## ----subsetsave---------------------------------------------------------------
subset_surveys(
  survey_list = example_surveys,
  crosswalk_table = example_crosswalk_table,
  subset_name = "trust_ep",
  import_path = examples_dir,
  export_path = tempdir()
)

## ----reread-------------------------------------------------------------------
readRDS(file.path(tempdir(), "ZA5913_trust_ep.rds")) %>%
  head()

Try the retroharmonize package in your browser

Any scripts or data that you put into this service are public.

retroharmonize documentation built on Jan. 14, 2026, 9:08 a.m.