inst/doc/tech-dm-keyed.R

## ----setup, include = FALSE----------------------------------------------
source("setup/setup.R")

## ----zoom----------------------------------------------------------------
#  library(dm)
#  library(dplyr)
#  
#  flights_dm <- dm_nycflights13(cycle = TRUE)
#  flights_dm
#  flights_keyed <-
#    flights_dm %>%
#    dm_get_tables(keyed = TRUE)
#  
#  # The print output for a `dm_keyed_tbl` looks very much like that from a normal
#  # `tibble`, with additional details about keys.
#  flights_keyed$flights
#  flights_tbl_mutate <-
#    flights_keyed$flights %>%
#    mutate(am_pm_dep = if_else(dep_time < 1200, "am", "pm"), .after = dep_time)
#  
#  flights_tbl_mutate

## ----zoom2---------------------------------------------------------------
#  updated_flights_dm <- dm(
#    flights = flights_tbl_mutate,
#    !!!flights_keyed[c("airlines", "airports", "planes", "weather")]
#  )
#  
#  # The only difference in the `dm` print output is the increased number of
#  # columns
#  updated_flights_dm
#  # The schematic view of the data model remains unchanged
#  dm_draw(updated_flights_dm)

## ------------------------------------------------------------------------
#  library(tidyr)
#  
#  flights_keyed$weather
#  
#  # Maybe there is some hidden candidate for a primary key that we overlooked?
#  enum_pk_candidates(flights_keyed$weather)
#  # Seems we have to construct a column with unique values
#  # This can be done by combining column `origin` with `time_hour`, if the latter
#  # is converted to a single time zone first; all within the `dm`:
#  weather_tbl_mutate <-
#    flights_keyed$weather %>%
#    # first convert all times to the same time zone:
#    mutate(time_hour_fmt = format(time_hour, tz = "UTC")) %>%
#    # paste together as character the airport code and the time
#    unite("origin_slot_id", origin, time_hour_fmt) %>%
#    select(origin_slot_id, everything())
#  
#  # check if we the result is as expected:
#  weather_tbl_mutate %>%
#    enum_pk_candidates() %>%
#    filter(candidate)
#  # We apply the same transformation to create
#  # the foreign key in the flights table:
#  flights_tbl_mutate <-
#    flights_keyed$flights %>%
#    mutate(time_hour_fmt = format(time_hour, tz = "UTC")) %>%
#    unite("origin_slot_id", origin, time_hour_fmt) %>%
#    select(origin_slot_id, everything())
#  
#  surrogate_flights_dm <-
#    dm(
#      weather = weather_tbl_mutate,
#      flights = flights_tbl_mutate,
#      !!!flights_keyed[c("airlines", "airports", "planes")]
#    ) %>%
#    dm_add_pk(weather, origin_slot_id) %>%
#    dm_add_fk(flights, origin_slot_id, weather)
#  
#  surrogate_flights_dm %>%
#    dm_draw()

## ------------------------------------------------------------------------
#  disentangled_flights_dm <-
#    dm(
#      destination = flights_keyed$airports,
#      origin = flights_keyed$airports,
#      !!!flights_keyed[c("flights", "airlines", "planes", "weather")]
#    ) %>%
#    # Key relations are also duplicated, so the wrong ones need to be removed
#    dm_rm_fk(flights, dest, origin) %>%
#    dm_rm_fk(flights, origin, destination)
#  
#  disentangled_flights_dm %>%
#    dm_draw()

## ------------------------------------------------------------------------
#  flights_derived <-
#    flights_dm %>%
#    pull_tbl(flights, keyed = TRUE) %>%
#    dplyr::count(origin, carrier)
#  
#  derived_flights_dm <- dm(flights_derived, !!!flights_keyed)
#  
#  derived_flights_dm %>%
#    dm_draw()

## ------------------------------------------------------------------------
#  planes_for_join <-
#    flights_keyed$planes %>%
#    select(tailnum, plane_type = type)
#  
#  joined_flights_tbl <-
#    flights_keyed$flights %>%
#    # let's first reduce the number of columns of flights
#    select(-dep_delay:-arr_delay, -air_time:-minute, -starts_with("sched_")) %>%
#    # in the {dm}-method for the joins you can specify which columns you want to
#    # add to the subsetted table
#    left_join(planes_for_join)
#  
#  joined_flights_dm <- dm(
#    flights_plane_type = joined_flights_tbl,
#    !!!flights_keyed[c("airlines", "airports", "weather")]
#  )
#  
#  # this is how the table looks now
#  joined_flights_dm$flights_plane_type
#  # also here, the FK-relations are transferred to the new table
#  joined_flights_dm %>%
#    dm_draw()

## ------------------------------------------------------------------------
#  dm <- dm_nycflights13()
#  dm_deconstruct(dm)

Try the dm package in your browser

Any scripts or data that you put into this service are public.

dm documentation built on Nov. 2, 2023, 6:07 p.m.