#library(knitr)
options(htmltools.dir.version = FALSE, cache=TRUE)
#opts_chunk$set(dev.args=list(bg="transparent"), fig.width=15, fig.height=7)
source("kutheme.R")

library(dataMaid)
library(assertr)
library(dplyr)
library(validate)
data(bigPresidentData)
bpD <- bigPresidentData

Row-wise or column-wise checks?

knitr::include_graphics("pics/colrow1.png")

Row-wise or column-wise checks?

knitr::include_graphics("pics/colrow2.png")

Row-wise or column-wise checks?

knitr::include_graphics("pics/colrow3.png")

Row-wise and column-wise constraints!


Row-wise and column-wise constraints!

An R-packages that performs row-wise checks: validate

library(validate)

Note: Different use of the term "validation" - no longer about format, type and range, but used as synonym to "check".


validate - overview:


Make validator object

.footnotesize[

val1 <- 
 validator(
   ageAtDeath := floor((dateOfDeath - birthday)/365.25),  
   `Adult president` = ageAtInauguration >= 18,
   `Alive at inauguration` = ageAtDeath >= ageAtInauguration,
   `Positive first name` = firstName*2 > firstName,
   `Death by assassination` = 
     if (dateOfDeath == presidencyEndDate) 
       assassinationAttempt == 1
)

]


Confront data with validator object:

.footnotesize[

con1 <- confront(bpD, val1)
summary(con1)[, 1:6]

]


Understand confrontation results

Lots of functions available for inspecting confrontations: summary(): Overview of confrontation results aggregate(): Compute percentage pass/fail/na sort(): Sort results by problem prevalence values(): For each observation and each check: TRUE/FALSE/NA barplot(): Visual overview of check results errors(): What errors were caught? * warnings(): What warnings were caught?


Warning: modify-by-reference


Warning: modify-by-reference

v1 <- validator(check1 = sex == "Male")
v1

v2 <- v1  
names(v2) <- "All males"
v1

Warning: modify-by-reference

Make a copy using [TRUE]:

v1 <- validator(check1 = sex == "Male")
v2 <- v1[TRUE]  
names(v2) <- "All males"
v1


ekstroem/dataMaid documentation built on Jan. 31, 2022, 9:10 a.m.