inst/doc/discovering-join-paths.R

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(data.table)
library(DBmaps)

## ----setup_metadata-----------------------------------------------------------
# Define metadata for each table
customers_meta <- table_info("customers", "c.csv", "customer_id", list(list(OutcomeName="x",ValueExpression=1,AggregationMethods=list(list(AggregatedName="y",AggregationFunction="z",GroupingVariables="region")))))

products_meta <- table_info("products", "p.csv", "product_id", list(list(OutcomeName="x",ValueExpression=1,AggregationMethods=list(list(AggregatedName="y",AggregationFunction="z",GroupingVariables="category")))))

transactions_meta <- table_info("transactions", "t.csv", "trans_id", list(
  list(OutcomeName="rev", ValueExpression=1, AggregationMethods=list(
    # This grouping variable will match the primary key of 'customers'
    list(AggregatedName="a", AggregationFunction="sum", GroupingVariables="customer_id"),
    # This one will match the primary key of 'products'
    list(AggregatedName="b", AggregationFunction="sum", GroupingVariables="product_id")
  ))
))

# Combine into a master metadata object
master_meta <- rbindlist(list(customers_meta, products_meta, transactions_meta))

## ----metadata_only_run--------------------------------------------------------
# Find paths without looking at the data
metadata_paths <- map_join_paths(master_meta)
print(metadata_paths)

## ----multi_key_setup----------------------------------------------------------
daily_promos_meta <- table_info("daily_promos", "d.csv", c("product_id", "region"), list(list(OutcomeName="x",ValueExpression=1,AggregationMethods=list(list(AggregatedName="y",AggregationFunction="z",GroupingVariables="region")))))

# Add a grouping variable to transactions that matches this composite key
transactions_multi_meta <- table_info("transactions", "t.csv", "trans_id", list(
  list(OutcomeName="rev", ValueExpression=1, AggregationMethods=list(
    list(AggregatedName="promo_rev", AggregationFunction="sum", GroupingVariables=c("product_id", "region"))
  ))
))

multi_key_meta <- rbindlist(list(daily_promos_meta, transactions_multi_meta))

## ----multi_key_run------------------------------------------------------------
multi_key_paths <- map_join_paths(multi_key_meta)
print(multi_key_paths)

## ----inferred_setup-----------------------------------------------------------
# Define the data
inventory_data <- data.table(sku = c("s1", "s2", "s3"), stock = c(10, 20, 5))
orders_data <- data.table(order_id = 1:2, customer_ref = "c1", product_code = c("s1", "s2"))

data_list <- list(
  inventory = inventory_data,
  orders = orders_data
)

# Define the metadata. Note the mismatched names.
inventory_meta <- table_info("inventory", "i.csv", "sku", list(list(OutcomeName="x",ValueExpression=1,AggregationMethods=list(list(AggregatedName="y",AggregationFunction="z",GroupingVariables="stock")))))
orders_meta <- table_info("orders", "o.csv", "order_id", list(list(OutcomeName="x",ValueExpression=1,AggregationMethods=list(list(AggregatedName="y",AggregationFunction="z",GroupingVariables="product_code")))))

inferred_meta <- rbindlist(list(inventory_meta, orders_meta))

## ----inferred_run-------------------------------------------------------------
inferred_paths <- map_join_paths(inferred_meta, data_list = data_list)
print(inferred_paths)

Try the DBmaps package in your browser

Any scripts or data that you put into this service are public.

DBmaps documentation built on Sept. 9, 2025, 5:44 p.m.