knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "man/figures/" ) options(digits = 4, width = 120)
Tools for creating and applying dictionaries of value-replacement pairs, to clean non-valid values of numeric, categorical, or date-type variables within a dataset.
Install from GitHub with:
# install.packages("remotes") remotes::install_github("epicentre-msf/dbc")
library(dbc) data(ll1) # example messy dataset data(dict_categ1) # example dictionary of categorical vars and allowed values ll1
dict_clean_numeric <- check_numeric( ll1, vars = c("age", "contacts"), # cols that should be numeric fn = as.integer # values not coercible by `fn` are non-valid ) dict_clean_numeric
Normally one would do this step in a spreadsheet but we'll do it in R here for simplicity.
dict_clean_numeric$replacement <- c(".na", "39", "10", ".na")
clean_numeric( ll1, vars = c("age", "contacts"), dict_clean = dict_clean_numeric, fn = as.integer )
Check for new non-valid numeric values, after incorporating previous cleaning
dict_clean_numeric_update <- check_numeric( ll2, # same as ll1 but with 3 additional entries vars = c("age", "contacts"), dict_clean = dict_clean_numeric, # incorporate previous cleaning before checking fn = as.integer, return_all = TRUE # return original cleaning dict + new entries ) dict_clean_numeric_update
Manually specify replacement for new non-valid entry
dict_clean_numeric_update$replacement[5] <- "6"
Apply updated cleaning dictionary to updated dataset
clean_numeric( ll2, vars = c("age", "contacts"), dict_clean = dict_clean_numeric_update, fn = as.integer )
dict_clean_categ <- check_categorical( ll1, dict_allowed = dict_categ1 # dictionary of categorical vars and their allowed values ) dict_clean_categ
Again, we would normally do this step in a spreadsheet but we do it in R here for simplicity.
dict_clean_categ$replacement <- c( "Years", "Years", "Cured", ".na", "M", ".na", "Suspected" )
clean_categorical( ll1, dict_allowed = dict_categ1, dict_clean = dict_clean_categ )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.