library(xaringanthemer)
library(sdcLog)
library(DiagrammeR)
style_mono_accent(
  # base_color = "#C85D79",
  header_font_google = google_font("Fira Sans"),
  text_font_google   = google_font("Fira Sans", "300", "300i"),
  code_font_google   = google_font("Fira Mono")
)
options(htmltools.dir.version = FALSE)
options(datatable.print.keys = FALSE)
options(datatable.print.class = FALSE)

knitr::opts_chunk$set(
  comment = "#>",
  collapse = TRUE
)

What do I do?

I work at the Bundesbank's Research Data Center.


What do I do?

I work at the Bundesbank's Research Data Center.

We provide


What do I do?

I work at the Bundesbank's Research Data Center.

We provide

Before researchers receive their research results, we need to check if the results contain any confidential information.


What do I do?

I work at the Bundesbank's Research Data Center.

We provide

Before researchers receive their research results, we need to check if the results contain any confidential information.

It's the researchers duty to show that their results are not confidential.

This is where sdcLog comes into play.


A tiny bit of theory

Two simple rules:

1. Each result must be based on at least 5 different entities.


A tiny bit of theory

Two simple rules:

1. Each result must be based on at least 5 different entities.

2. The largest two entities must account for less than 85% of a result (dominance).


An example

data("sdc_descriptives_DT")
DT <- sdc_descriptives_DT
head(DT)

An example

data("sdc_descriptives_DT")
DT <- sdc_descriptives_DT
head(DT)
# results wanted
DT[, .(mean = mean(val_1, na.rm = TRUE)), by = "sector"]

An example

data("sdc_descriptives_DT")
DT <- sdc_descriptives_DT
head(DT)
# results wanted
DT[, .(mean = mean(val_1, na.rm = TRUE)), by = "sector"]
# show that this result is fine
sdc_descriptives(DT, id_var = "id", val_var = "val_1", by = "sector")

Another example

sdc_descriptives(DT, id_var = "id", val_var = "val_1", by = c("sector", "year"))

Other functionality

sdc_model() for disclosure control of models

sdc_min_max() for non-confidential min/max values


Why is it called sdcLog?

grViz("
digraph boxes_and_circles {

  # a 'graph' statement
  graph [overlap = true, fontsize = 30]

  # several 'node' statements
  node [shape = box, fontname = 'Fira Sans', width = 5.5]
  script [label = 'R Script\ncontains sdc_descriptives() / sdc_min_max() / sdc_model()'];

  node [width = 1] 
  'log file'

  node [shape = box, width = 3]
  checks [label = 'manual checks by RDC staff']

  node [shape = oval, width = 1]
  'sdc_log()'

  # several 'edge' statements
  'script' -> 'sdc_log()'
  'sdc_log()' -> 'log file'
  'log file' -> checks
}
")


Why is it called sdcLog?

grViz("
digraph boxes_and_circles {

  # a 'graph' statement
  graph [overlap = true, fontsize = 30]

  # several 'node' statements
  node [shape = box, fontname = 'Fira Sans', width = 5.5]
  script [label = 'R Script\ncontains sdc_descriptives() / sdc_min_max() / sdc_model()'];

  node [width = 1] 
  'log file'

  node [shape = box, width = 3]
  checks [label = 'manual checks by RDC staff']

  node [shape = oval, width = 1]
  'sdc_log()'

  # several 'edge' statements
  'script' -> 'sdc_log()'
  'sdc_log()' -> 'log file'
  'log file' -> checks
}
")


Why is it called sdcLog?

grViz("
digraph boxes_and_circles {

  # a 'graph' statement
  graph [overlap = true, fontsize = 30]

  # several 'node' statements
  node [shape = box, fontname = 'Fira Sans', width = 5.5]
  script [label = 'R Script\ncontains sdc_descriptives() / sdc_min_max() / sdc_model()'];

  node [width = 1] 
  'log file'

  node [shape = box, width = 3]
  checks [label = 'manual checks by RDC staff']

  node [shape = oval, width = 1]
  'sdc_log()'

  # several 'edge' statements
  'script' -> 'sdc_log()'
  'sdc_log()' -> 'log file'
  'log file' -> checks
}
")


class: middle

Get in touch!

# CRAN
install.packages("sdcLog")

# GitHub
remotes::install_github("https://github.com/matthiasgomolka/sdcLog")


matthiasgomolka/sdcLog documentation built on July 17, 2025, 3:21 a.m.