cookbook.R
In validate: Data Validation Infrastructure

## ----include=FALSE------------------------------------------------------------
source("chunk_opts.R")

## ----echo=TRUE, eval=FALSE----------------------------------------------------
# install.packages("validate")

## ----echo=FALSE, include=!knitr::is_latex_output()----------------------------
#knitr::asis_output("
#[![Creative Commons License](https://i.creativecommons.org/l/by-nc/4.0/88x31.png)](https://creativecommons.org/licenses/by/4.0/)
#")

## ----include=FALSE------------------------------------------------------------
source("chunk_opts.R")

## ----include=FALSE, eval=knitr::is_latex_output()-----------------------------
# knitr::opts_chunk$set(comment=NA)

## -----------------------------------------------------------------------------
data(cars)
head(cars, 3)

## -----------------------------------------------------------------------------
library(validate)
rules <- validator(speed >= 0
                 , dist >= 0
                 , speed/dist <= 1.5
                 , cor(speed, dist)>=0.2)

## -----------------------------------------------------------------------------
out   <- confront(cars, rules)

## -----------------------------------------------------------------------------
summary(out)

## ----fig.height=7, fig.width=7, echo=!knitr::is_latex_output(), eval=!knitr::is_latex_output()----
plot(out)

## ----label="validateplot", fig.height=5, fig.width=5, out.width="0.7\\textwidth", fig.align="center", echo=knitr::is_latex_output(), eval=knitr::is_latex_output(), fig.env="figure",fig.pos="!t", fig.cap="Plot of validation output."----
# plot(out)

## -----------------------------------------------------------------------------
violating(cars, out[1:3])

## -----------------------------------------------------------------------------
df_out <- as.data.frame(out)
head(df_out, 3)

## ----include=FALSE------------------------------------------------------------
source("chunk_opts.R")

## -----------------------------------------------------------------------------
library(validate)
data(SBS2000)
head(SBS2000, 3)

## -----------------------------------------------------------------------------
is.character("hihi")
is.character(3)

## -----------------------------------------------------------------------------
rules <- validator(
    is.character(size)
  , is.numeric(turnover)
)
out <- confront(SBS2000, rules)
summary(out)

## -----------------------------------------------------------------------------
rule <- validator(
   !is.na(turnover)
 , !is.na(other.rev)
 , !is.na(profit)
)
out <- confront(SBS2000, rule)
summary(out)

## -----------------------------------------------------------------------------
rules <- validator( 
    !any(is.na(incl.prob))
    , all(is.na(vat)) )
out <- confront(SBS2000, rules)
summary(out) 

## -----------------------------------------------------------------------------
rules <- validator(
   nchar(as.character(size)) >= 2
 , field_length(id, n=5)
 , field_length(size, min=2, max=3)
)
out <- confront(SBS2000, rules)
summary(out)

## -----------------------------------------------------------------------------
dat <- data.frame(x = c("2.54","2.66","8.142","23.53"))

## -----------------------------------------------------------------------------
rule <- validator( number_format(x, format="d.dd"))
values(confront(dat, rule))

## -----------------------------------------------------------------------------
x <- c("12.123","123.12345")
number_format(x, min_dig=4)
number_format(x, max_dig=3)
number_format(x, min_dig=2, max_dig=4)
number_format(x, min_dig=2, max_dig=10)
# specify the decimal separator.
number_format("12,123", min_dig=2, dec=",")

## -----------------------------------------------------------------------------
rule <- validator(field_format(id, "RET*")
                , field_format(size, "sc?" ))
out  <- confront(SBS2000, rule)
summary(out)

## -----------------------------------------------------------------------------
rule <- validator(
          grepl("^sc[0-9]$", size)
        , field_format(id, "^RET\\d{2}$" , type="regex") )
summary(confront(SBS2000, rule))

## -----------------------------------------------------------------------------
rules <- validator(TO = turnover >= 0
                 , TC = total.costs >= 0)

## -----------------------------------------------------------------------------
rules <- rules + 
  validator(PR = in_range(incl.prob, min=0, max=1))

## -----------------------------------------------------------------------------
out <- confront(SBS2000, rules, lin.ineq.eps=0)

## -----------------------------------------------------------------------------
summary(out)

## -----------------------------------------------------------------------------
period = sprintf("2018Q%d", 1:4)
period

## -----------------------------------------------------------------------------
in_range(period, min="2017Q2", max = "2018Q2")

## -----------------------------------------------------------------------------
rule <- validator(size %in% c("sc0","sc1","sc2","sc3"))
out  <- confront(SBS2000, rule)
summary(out)

## -----------------------------------------------------------------------------
c(1, 3, NA) %in% c(1,2)
c(1, 3, NA) %vin% c(1,2)

## -----------------------------------------------------------------------------
rule <- validator(
  x %in% read.csv("codelist.csv")$code
)
## Or, equivalently
rule <- validator(
  valid_codes := read.csv("codelist.csv")$code
  , x %in% valid_codes
)


## -----------------------------------------------------------------------------
codelist <- c("sc0","sc1","sc2","sc3")
rule <- validator(size %in% valid_codes)
# pass the codelist
out <- confront(SBS2000, rule
              , ref=list(valid_codes=codelist))
summary(out)

## ----include=FALSE------------------------------------------------------------
source("chunk_opts.R")

## -----------------------------------------------------------------------------
library(validate)
data(samplonomy)
head(samplonomy, 3)

## -----------------------------------------------------------------------------
head(samplonomy,3)

## -----------------------------------------------------------------------------
rule <- validator(is_unique(region, period, measure))
out <- confront(samplonomy, rule)
# showing 7 columns of output for readability
summary(out)[1:7]

## -----------------------------------------------------------------------------
violating(samplonomy, out)

## -----------------------------------------------------------------------------
df <- data.frame(x = c(1,1), y = c("A",NA))
df

## -----------------------------------------------------------------------------
df <- data.frame(x=rep(1,3), y = c("A", NA, NA))
is_unique(df$x, df$y)

## -----------------------------------------------------------------------------
# y is unique, given x. But not by itself
df <- data.frame(x=rep(letters[1:2],each=3), y=rep(1:3,2))

# the split-apply-combine approach
unsplit(tapply(df$y, df$x, is_unique), df$x)

# the combined approach
is_unique(df$x, df$y)

## -----------------------------------------------------------------------------
rule <- validator(
  contains_at_least( 
      keys = data.frame(period = as.character(2014:2019))
    , by=list(region, measure) )
)
out <- confront(samplonomy, rule)
# showing 7 columns of output for readability
summary(out)[1:7]

## -----------------------------------------------------------------------------
head(violating(samplonomy, out))

## -----------------------------------------------------------------------------
years <- as.character(2014:2019)
quarters <- paste0("Q",1:4)

keyset <- expand.grid(
  region = c("Agria", "Induston")
  , period = sapply(years, paste0, quarters))

head(keyset)

## -----------------------------------------------------------------------------
rule <- validator(
          contains_at_least(keys=minimal_keys, by=measure) 
        )
out <- confront(samplonomy, rule
              , ref=list(minimal_keys=keyset))
# showing 7 columns of output for readability
summary(out)[1:7]

## -----------------------------------------------------------------------------
years <- as.character(2014:2019)
quarters <- paste0("Q",1:4)

keyset <- expand.grid(
  region  = c(
    "Agria" 
   ,"Crowdon"
   ,"Greenham"
   ,"Induston"
   ,"Mudwater"
   ,"Newbay"
   ,"Oakdale"
   ,"Samplonia"
   ,"Smokely"
   ,"Wheaton"
  )
 ,period = c(years, sapply(years, paste0, quarters))
)
head(keyset)

## -----------------------------------------------------------------------------
rule <- validator(contains_exactly(all_keys, by=measure))
out  <- confront(samplonomy, rule
               , ref=list(all_keys=keyset))
# showing 7 columns of output for readability
summary(out)[1:7]

## -----------------------------------------------------------------------------
erroneous_records <- violating(samplonomy, out)
unique(erroneous_records$measure)

## -----------------------------------------------------------------------------
is_linear_sequence(c(1,2,3,4))
is_linear_sequence(c(8,6,4,2))
is_linear_sequence(c(2,4,8,16))

## -----------------------------------------------------------------------------
is_linear_sequence(c("2020Q1","2020Q2","2020Q3","2020Q4"))

## -----------------------------------------------------------------------------
is_linear_sequence(c("2020Q4","2020Q2","2020Q3","2020Q1"))

## -----------------------------------------------------------------------------
is_linear_sequence(c("2020Q4","2020Q2","2020Q3","2020Q1")
                 , begin = "2020Q2")

## -----------------------------------------------------------------------------
series <- c(1,2,3,4,1,2,3,3)
blocks <- rep(c("a","b"), each = 4)
is_linear_sequence(series, by = blocks)

## -----------------------------------------------------------------------------
in_linear_sequence(series, by = blocks)

## -----------------------------------------------------------------------------
is_linear_sequence(5)

## -----------------------------------------------------------------------------
blocks[8] <- "c"
data.frame(series = series, blocks = blocks)
in_linear_sequence(series, blocks)

## -----------------------------------------------------------------------------
in_linear_sequence(series, blocks, begin = 1, end = 4)

## -----------------------------------------------------------------------------
rule <- validator(
          in_linear_sequence(period
            , by = list(region, freq, measure))
        )
out  <- confront(samplonomy, rule)
summary(out)[1:7]

## ----results='hide'-----------------------------------------------------------
violating(samplonomy, out)

## ----include=FALSE------------------------------------------------------------
source("chunk_opts.R")

## -----------------------------------------------------------------------------
library(validate)
data(SBS2000)
head(SBS2000, 3)


## -----------------------------------------------------------------------------
rules <- validator(
          is_complete(id)
        , is_complete(id, turnover)
        , is_complete(id, turnover, profit )
        , all_complete(id)
)
out <- confront(SBS2000, rules)
# suppress last column for brevity
summary(out)[1:7]

## -----------------------------------------------------------------------------
rules <- validator(
    total.rev - profit == total.costs
  , turnover + other.rev == total.rev
  , profit <= 0.6*total.rev
)

out <- confront(SBS2000, rules)
summary(out)

## -----------------------------------------------------------------------------
out <- confront(SBS2000, rules, lin.ineq.eps=0, lin.eq.eps=0.01)
summary(out)

## -----------------------------------------------------------------------------
rule <- validator(if (staff >= 1) staff.costs >= 1)
out  <- confront(SBS2000, rule)
summary(out)

## -----------------------------------------------------------------------------
transactions <- data.frame(
   sender   = c("S21", "X34", "S45","Z22")
 , receiver = c("FG0", "FG2", "DF1","KK2")
 , value    = sample(70:100,4)
)

## -----------------------------------------------------------------------------
forbidden <- data.frame(sender="S*",receiver = "FG*")

## -----------------------------------------------------------------------------
rule <- validator(does_not_contain(glob(forbidden_keys)))
out <- confront(transactions, rule, ref=list(forbidden_keys=forbidden))
## Suppress columns for brevity
summary(out)[1:7]

## -----------------------------------------------------------------------------
violating(transactions, out)

## ----include=FALSE------------------------------------------------------------
source("chunk_opts.R")

## -----------------------------------------------------------------------------
library(validate)
data(SBS2000)
head(SBS2000, 3)

## -----------------------------------------------------------------------------
data(samplonomy)
head(samplonomy, 3)

## -----------------------------------------------------------------------------
rule <- validator(
    mean(profit, na.rm=TRUE) >= 1
  , cor(turnover, staff, use="pairwise.complete.obs") > 0
)
out <- confront(SBS2000, rule)
# suppress some columns for brevity
summary(out)[1:7]

## -----------------------------------------------------------------------------
rule <- validator(
  turnover <= 10*do_by(turnover, by=size, fun=median, na.rm=TRUE)
)
out <- confront(SBS2000, rule)
# suppress some columns for brevity
summary(out)[1:7]

## -----------------------------------------------------------------------------
medians <- with(SBS2000, do_by(turnover, by=size, fun=median, na.rm=TRUE))
head(data.frame(size = SBS2000$size, median=medians))

## -----------------------------------------------------------------------------
d <- data.frame(
   hhid   = c(1,  1,  2,  1,  2,  2,  3 )
 , person = c(1,  2,  3,  4,  5,  6,  7 )
 , hhrole = c("h","h","m","m","h","m","m")
)
d

## -----------------------------------------------------------------------------
rule <- validator(exists_one(hhrole == "h", by=hhid))
out <- confront(d, rule)
# suppress some columns for brevity
summary(out)

## -----------------------------------------------------------------------------
violating(d, out)

## -----------------------------------------------------------------------------
violating(d, validator(exists_any(hhrole=="h",by=hhid) ))

## -----------------------------------------------------------------------------
rule <- validator(exists_one(region=="Samplonia", by=list(period, measure)))

## -----------------------------------------------------------------------------
out <- confront(samplonomy, rule)
# suppress some columns for brevity
summary(out)[1:7]

## -----------------------------------------------------------------------------
violating(samplonomy, out)

## -----------------------------------------------------------------------------
data(nace_rev2)
head(nace_rev2[1:4])

## -----------------------------------------------------------------------------
dat <- data.frame(
        nace   = c("01","01.1","01.11","01.12", "01.2")
      , volume = c(100 ,70    , 30    ,40     , 25    )
     )
dat

## -----------------------------------------------------------------------------
dat$check <- hierarchy(dat$volume, dat$nace, nace_rev2[3:4])
dat

## -----------------------------------------------------------------------------
samplonia <- data.frame(
    region   = c("Agria", "Induston"
               , "Wheaton", "Greenham"
               , "Smokely", "Mudwater", "Newbay", "Crowdon")
  , parent = c(rep("Samplonia",2), rep("Agria",2), rep("Induston",4))
) 
samplonia

## -----------------------------------------------------------------------------
data(samplonomy)
head(samplonomy)

## -----------------------------------------------------------------------------
rule <- validator(
  hierarchy(value, region, hierarchy=ref$codelist, by=list(period, measure))
)
out <- confront(samplonomy, rule, ref=list(codelist=samplonia))
summary(out)

## -----------------------------------------------------------------------------
warnings(out)

## -----------------------------------------------------------------------------
subset(samplonomy, region  == "Induston" & 
                   period  == "2018Q2"   & 
                   measure == "export")

## -----------------------------------------------------------------------------
i <- !duplicated(samplonomy[c("region","period","measure")])
samplonomy2 <- samplonomy[i, ]

out <- confront(samplonomy2, rule, ref=list(codelist=samplonia))
# suppress some columns for brevity
summary(out)[1:7]

## -----------------------------------------------------------------------------
rules <- validator(
   level0 = hierarchy(value, region, ref$level0, by=list(period, measure))
 , level1 = hierarchy(value, region, ref$level1, by=list(period, measure))
)
out <- confront(samplonomy2, rules
        , ref=list(level0=samplonia[1:2,], level1=samplonia[3:8,])
       )
summary(out)

## -----------------------------------------------------------------------------
violating(samplonomy2, out["level0"]) 

## -----------------------------------------------------------------------------
rules <- validator(
  part_whole_relation(value
    , labels=region
    , whole="Samplonia"
    , part =c("Agria","Induston")
    , by=list(measure, period)
  )
)

## -----------------------------------------------------------------------------
out <- confront(samplonomy, rules)
# suppress some columns for brevity
summary(out)[1:7]

## -----------------------------------------------------------------------------
violating(samplonomy, out)

## -----------------------------------------------------------------------------
subset(samplonomy, region=="Agria" & period == "2015" & measure == "gdp")

## -----------------------------------------------------------------------------
subset(samplonomy, region=="Induston" & freq == "A" & measure=="export")

## -----------------------------------------------------------------------------
rules <- validator(part_whole_relation(value
  , labels = period
  , whole  = rx("^\\d{4}$")
  , by = list(region, substr(period,1,4), measure) 
  ))
out <- confront(samplonomy, rules)

## -----------------------------------------------------------------------------
errors(out)
# suppress some columns for brevity
summary(out)[1:7]

## -----------------------------------------------------------------------------
lacking(samplonomy, out)

## -----------------------------------------------------------------------------
violating(samplonomy, out)

## ----include=FALSE------------------------------------------------------------
source("chunk_opts.R")

## -----------------------------------------------------------------------------
library(validate)
ii <- indicator(
    BMI = (weight/2.2046)/(height*0.0254)^2 
  , mh  = mean(height)
  , mw  = mean(weight))
out <- confront(women, ii)

## -----------------------------------------------------------------------------
out

## -----------------------------------------------------------------------------
summary(out)

## -----------------------------------------------------------------------------
head(add_indicators(women, out), 3)

## -----------------------------------------------------------------------------
women$id <- letters[1:15]

## -----------------------------------------------------------------------------
out <- confront(women, ii,key="id")
tail( as.data.frame(out) )

## ----include=FALSE------------------------------------------------------------
source("chunk_opts.R")

## ----echo=FALSE---------------------------------------------------------------
library(validate)

## -----------------------------------------------------------------------------
v <- validator(speed >= 0, dist>=0, speed/dist <= 1.5)
v

## -----------------------------------------------------------------------------
w <- v[c(1,3)]

## -----------------------------------------------------------------------------
w <- v[c("V1","V3")]

## -----------------------------------------------------------------------------
rules1 <- validator(speed>=0)
rules2 <- validator(dist >= 0)
all_rules <- rules1 + rules2

## -----------------------------------------------------------------------------
v[[3]]

## -----------------------------------------------------------------------------
rules <- validator(positive_speed = speed >= 0, ratio = speed/dist <= 1.5)
rules

## -----------------------------------------------------------------------------
names(rules)
names(rules)[1] <- "nonnegative_speed"

## -----------------------------------------------------------------------------
# add 'foo' to the first rule:
meta(rules[1],"foo") <- 1
# Add 'bar' to all rules
meta(rules,"bar") <- "baz"

## -----------------------------------------------------------------------------
v[[1]]

## -----------------------------------------------------------------------------
meta(v)

## -----------------------------------------------------------------------------
summary(v)

## -----------------------------------------------------------------------------
length(v)

## -----------------------------------------------------------------------------
variables(v)
variables(v,as="matrix")

## -----------------------------------------------------------------------------
rules <- validator(speed >= 0, dist >= 0, speed/dist <= 1.5)
df <- as.data.frame(rules)

## -----------------------------------------------------------------------------
rules <- validator(.data=df)

## ----eval=FALSE---------------------------------------------------------------
# ?syntax

## -----------------------------------------------------------------------------
sum_by(1:10, by = rep(c("a","b"), each=5) )

## -----------------------------------------------------------------------------
v  <- validator(height>0, weight>0,height/weight < 0.5)
cf <- confront(women, rules)
aggregate(cf) 

## -----------------------------------------------------------------------------
head(aggregate(cf,by='record'))

## -----------------------------------------------------------------------------
# rules with most violations sorting first:
sort(cf)

## -----------------------------------------------------------------------------
v <- validator(hite > 0, weight>0)
summary(confront(women, v))

## ----eval=TRUE, error=TRUE----------------------------------------------------
try({
# this gives an error
confront(women, v, raise='all')
})

## -----------------------------------------------------------------------------
women1 <- women
rules <- validator(height == women_reference$height)
cf <- confront(women, rules, ref = list(women_reference = women1))
summary(cf)

## -----------------------------------------------------------------------------
rules <- validator( fruit %in% codelist )
fruits <-  c("apple", "banana", "orange")
dat <- data.frame(fruit = c("apple","broccoli","orange","banana"))
cf <- confront(dat, rules, ref = list(codelist = fruits))
summary(cf)

## ----include=FALSE------------------------------------------------------------
source("chunk_opts.R")
library(validate)

## ----eval=FALSE---------------------------------------------------------------
# # basic range checks
# speed >= 0
# dist  >= 0
# 
# # ratio check
# speed / dist <= 1.5

## -----------------------------------------------------------------------------
rules <- validator(.file="myrules.R")

## -----------------------------------------------------------------------------
rules <- validator(.file="myrules.yaml")
rules

## -----------------------------------------------------------------------------
rules1 <- rules[c(1,3)]
export_yaml(rules1, file="myrules2.yaml")

## ----eval=FALSE---------------------------------------------------------------
# v <- validator(height>0, weight> 0)
# export_yaml(v,file="my_rules.yaml")

## ----eval=FALSE---------------------------------------------------------------
# df <- as.data.frame(v)

## ----include=FALSE------------------------------------------------------------
source("chunk_opts.R")
library(validate)

## ----eval=FALSE---------------------------------------------------------------
# sdmx_endpoint()

## ----eval=TRUE----------------------------------------------------------------
sdmx_endpoint(registry="global")

## ----eval=FALSE---------------------------------------------------------------
# codelist <- sdmx_codelist(
#   endpoint = sdmx_endpoint("global")
#   , agency_id = "ESTAT"
#   , resource_id = "CL_ACTIVITY")
# 
# head(codelist)
# [1] "_T"  "_X"  "_Z"  "A"   "A_B" "A01"

## ----eval=FALSE---------------------------------------------------------------
# Activity %in% global_codelist(agency_id="ESTAT", resource_id="CL_ACTIVITY")

## ----eval=FALSE---------------------------------------------------------------
# rules <- validator_from_dsd(endpoint = sdmx_endpoint("ESTAT")
#    , agency_id = "ESTAT", resource_id = "STSALL", version="latest")
# 
# length(rules)
# [1] 13
# rules[1]
# Object of class 'validator' with 1 elements:
#  CL_FREQ: FREQ %in% sdmx_codelist(endpoint = "https://ec.europa.eu/tools/cspa_services_global/sdmxregistry/rest", agency_id = "SDMX", resource_id = "CL_FREQ", version = "2.0")
# Rules are evaluated using locally defined options

## ----eval=FALSE---------------------------------------------------------------
# rule[[1]]

## ----include=FALSE------------------------------------------------------------
source("chunk_opts.R")

## ----echo=FALSE---------------------------------------------------------------
library(validate)

## -----------------------------------------------------------------------------
library(validate)
data(SBS2000)
original <- SBS2000
version2 <- original
version2$other.rev <- abs(version2$other.rev)
version3 <- version2
version3$turnover[is.na(version3$turnover)] <- version3$vat[is.na(version3$turnover)]

## -----------------------------------------------------------------------------
cells(input = original, cleaned = version2, imputed = version3)

## -----------------------------------------------------------------------------
cells(input = original, cleaned = version2, imputed = version3
    , compare="sequential")

## -----------------------------------------------------------------------------
version4 <- version3
version4$turnover[is.na(version4$turnover)] <- median(version4$turnover, na.rm=TRUE)

# from kEUR to EUR
version5 <- version4
version5$staff.costs <- version5$staff.costs * 1000

## -----------------------------------------------------------------------------
out <- cells(input = original
           , cleaned = version2
           , vat_imp = version3
           , med_imp = version4
           , units   = version5)
par(mfrow=c(2,1))
barplot(out)
plot(out)

## -----------------------------------------------------------------------------
rules <- validator(other.rev >= 0
                 , turnover >= 0
                 , turnover + other.rev == total.rev
)

comparison <- compare(rules
                    , input = original
                    , cleaned = version2
                    , vat_imp = version3
                    , med_imp = version4
                    , units   = version5)
comparison

## -----------------------------------------------------------------------------
par(mfrow=c(2,1))
barplot(comparison)
plot(comparison)

## ----eval=FALSE---------------------------------------------------------------
# ## Contents of clean_supermarkets.R
# library(validate)
# 
# # 1. simulate reading data
# data(SBS2000)
# spm <- SBS2000[c("id","staff","turnover","other.rev","total.rev")]
# 
# # 2. add a logger from 'validate'
# start_log(spm, logger=lbj_cells())
# 
# # 3. assume empty values should be filled with 0
# spm <- transform(spm, other.rev = ifelse(is.na(other.rev),0,other.rev))
# 
# # 4. assume that negative amounts have only a sign error
# spm <- transform(spm, other.rev = abs(other.rev))
# 
# # 5a. ratio estimator for staff conditional on turnover
# Rhat <- with(spm, mean(staff,na.rm=TRUE)/mean(turnover,na.rm=TRUE))
# 
# # 5b. impute 'staff' variable where possible using ratio estimator
# spm <- transform(spm, staff = ifelse(is.na(staff), Rhat * turnover, staff))
# 
# # 6. write output
# write.csv(spm, "supermarkets_treated.csv", row.names = FALSE)

## -----------------------------------------------------------------------------
library(lumberjack)
run_file('clean_supermarkets.R')

## -----------------------------------------------------------------------------
logfile <- read.csv("spm_lbj_cells.csv")

## -----------------------------------------------------------------------------
logfile[3:4,]

## ----eval=FALSE---------------------------------------------------------------
# ## Contents of clean_supermarkets2.R
# library(validate)
# 
# #1.a simulate reading data
# data(SBS2000, package="validate")
# spm <- SBS2000[c("id","staff","other.rev","turnover","total.rev")]
# 
# # 1.b Create rule set
# rules <- validator(staff >= 0, other.rev>=0, turnover>=0
#                  , other.rev + turnover == total.rev)
# 
# 
# # 2. add two loggers
# start_log(spm, logger=lbj_cells())
# start_log(spm, logger=lbj_rules(rules))
# 
# ## The rest is the same as above ...

## -----------------------------------------------------------------------------
run_file("clean_supermarkets2.R")

## -----------------------------------------------------------------------------
read.csv("spm_lbj_rules.csv")[3:4,]

## ----eval=FALSE---------------------------------------------------------------
# stop_log(spm, logger="lbj_rules",file="my_output.csv")

## ----include=FALSE------------------------------------------------------------
source("chunk_opts.R")