knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

The validR is a set of functions to be used for testing and validating data. The main functions are:

To use the testing functions first create the test setup. How each test class should be setup can be found by ?setup_test_range in the details section.

Column Tests

library(validR)

df <- data.frame(w = c(NA, 2:10),
                 x = 1:10, 
                 y = c(rep("X", 4), rep("Y", 4), rep("Z", 2)),
                 z = c(1,1,3:10), 
                 stringsAsFactors = F)

# Setup the test classes

setup1 <- setup_test_range(df_name = "df", col_name = "x", int = TRUE, lower_inclu = TRUE,
                                  upper_inclu = TRUE, lower = 1, upper = 10)
setup2 <- setup_test_range(df_name = "df", col_name = "z", int = TRUE, lower_inclu = TRUE,
                                  upper_inclu = TRUE, lower = 3, upper = 10)
setup3 <- setup_test_range(df_name = "df", col_name = "w", int = TRUE, lower_inclu = FALSE,
                                  upper_inclu = NULL, lower = 2, upper = NULL)
setup4 <- setup_test_values(df_name = "df", col_name = "y", values = c("X"))
setup5 <- setup_test_unique(df_name = "df", col_name = "x")
setup6 <- setup_test_unique(df_name = "df", col_name = "z")
setup7 <- setup_test_na(df_name = "df", col_name = "y")
setup8 <- setup_test_na(df_name = "df", col_name = "w")

setup <- list(setup1, setup2, setup3, setup4, setup5, setup6, setup7, setup8)

# If the tests all rely on the same dataframe use the following line of code to apply the tests
tests_all <- lapply(setup, test, df = df)

col_tests <- do.call("rbind", lapply(tests_all, test_summary))

issues <- do.call("rbind", lapply(tests_all, wrong_rows))

Merge Tests

library(validR)

primary_df <- data.frame(id1 = c(1,2,2,3), id2 = c(1,1,2,1), y = c(1,1,2,5))
related_df <- data.frame(id1 = c(1,2,3), id2 = c(1,1,1), x = c(1,2,3))

setup1 <- setup_test_orphan_rec(primary_df = "df1", related_df = "df2", 
                               primary_key = c("id1", "id2"), foreign_key = c("id1", "id2"))

test1 <- test(setup1, primary_df, related_df)
merge_test1 <- test_summary(test1)
issues1 <- wrong_rows(test1)


primary_df <- data.frame(id = c(1,2,2), id2 = c(1,1,2), y = c(1,1,2))
related_df <- data.frame(id1 = c(1,2,15), id2 = c(1,1,1), x = c(1,2,3))

setup2 <- setup_test_orphan_rec(primary_df = "df1", related_df = "df2", 
                               primary_key = c("id", "id2"), foreign_key = c("id1", "id2"))

test2 <- test(setup2, primary_df, related_df)
merge_test2 <- test_summary(test2)
issues2 <- wrong_rows(test2)

# Merge tests

merge_tests <- rbind(merge_test1, merge_test2)
merge_problems <- rbind(issues1, issues2)


holmesjoli/validR documentation built on Dec. 1, 2019, 12:38 p.m.