inst/doc/defining-table-metadata.R

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(data.table)
library(DBmaps)


## ----customers-example, echo = TRUE-------------------------------------------
customers <- data.table(
  customer_id = c("C001", "C002", "C003", "C004", "C005"),
  region = c("Asia", "Europe", "Asia", "Americas", "Europe")
)

# Use table_info() to capture the metadata
customers_info_dt <- table_info(
  table_name = "customers",
  source_identifier = "customers.csv",
  identifier_columns = "customer_id",
  key_outcome_specs = list(
    list(
      OutcomeName = "CustomerCount",
      ValueExpression = 1,  # Each row = one customer
      AggregationMethods = list(
        # ** NOTE **: We only define aggregations that group by a variable.
        # The table-wide "TotalCustomers" was removed as it doesn't prepare
        # the table for a join on a key.
        list(
          AggregatedName = "CustomersByRegion",
          AggregationFunction = "sum",
          GroupingVariables = "region"  # Grouped by region
        )
      )
    )
  )
)

print(customers_info_dt)

## ----products-example, echo = TRUE--------------------------------------------
products <- data.table(
  product_id = c("P001", "P002", "P003", "P004", "P005", "P006"),
  category   = c("A", "B", "A", "C", "B", "C")
)

# Capture metadata via table_info():
products_info_dt <- table_info(
  table_name = "products",
  source_identifier = "products.csv",
  identifier_columns = "product_id",
  key_outcome_specs = list(
    list(
      OutcomeName = "ProductCount",
      ValueExpression = 1,  # Each row = one product
      AggregationMethods = list(
        list(
          AggregatedName = "ProductsPerCategory",
          AggregationFunction = "sum",
          GroupingVariables = "category"
        )
      )
    )
  )
)

print(products_info_dt)

## ----transactions-example, echo = TRUE----------------------------------------
# Tiny in-memory mimic of "transactions.csv":
transactions <- data.table(
  transaction_id = c("T001", "T002", "T003", "T004", "T005"),
  customer_id = c("C001", "C002", "C001", "C003", "C004"),
  product_id = c("P001", "P002", "P001", "P003", "P002"),
  price = c(10, 20, 22, 11, 21),
  quantity = c(1, 2, 1, 3, 2)
)

transactions_info_dt <- table_info(
  table_name = "transactions",
  source_identifier = "transactions.csv",
  identifier_columns = "transaction_id",
  key_outcome_specs = list(
    list(
      OutcomeName = "Revenue",
      ValueExpression = quote(price * quantity),
      AggregationMethods = list(
        list(
          AggregatedName = "RevenueByCustomer",
          AggregationFunction = "sum",
          GroupingVariables = "customer_id"
        ),
        list(
          AggregatedName = "RevenueByProduct",
          AggregationFunction = "sum",
          GroupingVariables = "product_id"
        )
      )
    )
  )
)

print(transactions_info_dt)

## ----all-example, echo = TRUE-------------------------------------------------
master_metadata_dt <- rbindlist(
  list(customers_info_dt, products_info_dt, transactions_info_dt)
)

# Print the combined master metadata
print(master_metadata_dt)

# Show the structure of the combined data.table
cat("\nStructure of the master metadata data.table:\n")
str(master_metadata_dt)

Try the DBmaps package in your browser

Any scripts or data that you put into this service are public.

DBmaps documentation built on Sept. 9, 2025, 5:44 p.m.