options(width = 100)
This package offers a pair of functions, renamefrom()
and
encodefrom()
, for renaming and encoding data frames using external
crosswalk files. It is especially useful when constructing master
data sets from multiple smaller data sets that do not name or encode
variables consistently across files. Based on renamefrom
and
encodefrom
Stata commands written by Sally Hudson and
team.
Install the latest release version from CRAN with
install.packages('crosswalkr')
Install the latest development version from Github with
devtools::install_github('btskinner/crosswalkr')
library(crosswalkr) library(dplyr) library(haven)
## starting data frame df <- data.frame(state = c('Kentucky','Tennessee','Virginia'), fips = c(21,47,51), region = c('South','South','South')) df ## crosswalk with which to convert old names to new names with labels cw <- data.frame(old_name = c('state','fips'), new_name = c('stname','stfips'), label = c('Full state name', 'FIPS code')) cw
Convert old variable names to new names and add labels from crosswalk.
df1 <- renamefrom(df, cw_file = cw, raw = old_name, clean = new_name, label = label) df1
Convert old variable names to new names using old names as labels (ignoring labels in crosswalk).
df2 <- renamefrom(df, cw_file = cw, raw = old_name, clean = new_name, name_label = TRUE) df2
Convert old variable names to new names, but keep unmatched old names in the data frame.
df3 <- renamefrom(df, cw_file = cw, raw = old_name, clean = new_name, drop_extra = FALSE) df3
## starting data frame df <- data.frame(state = c('Kentucky','Tennessee','Virginia'), stfips = c(21,47,51), cenregnm = c('South','South','South')) df ## use state crosswalk data file from package cw <- get(data(stcrosswalk)) cw
Create a new column with factor-encoded values
df$state2 <- encodefrom(df, var = state, cw_file = cw, raw = stname, clean = stfips, label = stabbr) df
Create a new column with labelled values.
## convert to tbl_df df <- tibble::as_tibble(df) df$state3 <- encodefrom(df, var = state, cw_file = cw, raw = stname, clean = stfips, label = stabbr)
Create new column with factor-encoded values (ignores the fact that df
is a tibble)
df$state4 <- encodefrom(df, var = state, cw_file = cw, raw = stname, clean = stfips, label = stabbr, ignore_tibble = TRUE)
Show factors with labels:
as_factor(df)
Show factors without labels:
zap_labels(df)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.