inst/doc/tech-dm-zoom.R

## ----setup, include = FALSE----------------------------------------------
source("setup/setup.R")

## ----zoom----------------------------------------------------------------
#  library(dm)
#  library(dplyr)
#  flights_dm <- dm_nycflights13()
#  flights_dm
#  flights_zoomed <-
#    flights_dm %>%
#    dm_zoom_to(flights)
#  # The print output for a `dm_zoomed` looks very much like that from a normal `tibble`.
#  flights_zoomed
#  
#  flights_zoomed_mutate <-
#    flights_zoomed %>%
#    mutate(am_pm_dep = if_else(dep_time < 1200, "am", "pm")) %>%
#    # in order to see our changes in the output we use `select()` for reordering the columns
#    select(year:dep_time, am_pm_dep, everything())
#  
#  flights_zoomed_mutate
#  
#  # To update the original `dm` with a new `flights` table we use `dm_update_zoomed()`:
#  updated_flights_dm <-
#    flights_zoomed_mutate %>%
#    dm_update_zoomed()
#  # The only difference in the `dm` print output is the increased number of columns
#  updated_flights_dm
#  # The schematic view of the data model remains unchanged
#  dm_draw(updated_flights_dm)

## ------------------------------------------------------------------------
#  library(tidyr)
#  
#  weather_zoomed <-
#    flights_dm %>%
#    dm_zoom_to(weather)
#  weather_zoomed
#  # Maybe there is some hidden candidate for a primary key that we overlooked
#  enum_pk_candidates(weather_zoomed)
#  # Seems we have to construct a column with unique values
#  # This can be done by combining column `origin` with `time_hour`, if the latter
#  # is converted to a single time zone first; all within the `dm`:
#  weather_zoomed_mutate <-
#    weather_zoomed %>%
#    # first convert all times to the same time zone:
#    mutate(time_hour_fmt = format(time_hour, tz = "UTC")) %>%
#    # paste together as character the airport code and the time
#    unite("origin_slot_id", origin, time_hour_fmt) %>%
#    select(origin_slot_id, everything())
#  # check if we the result is as expected:
#  enum_pk_candidates(weather_zoomed_mutate) %>% filter(candidate)
#  flights_upd_weather_dm <-
#    weather_zoomed_mutate %>%
#    dm_update_zoomed() %>%
#    dm_add_pk(weather, origin_slot_id)
#  flights_upd_weather_dm
#  # creating the coveted FK relation between `flights` and `weather`
#  extended_flights_dm <-
#    flights_upd_weather_dm %>%
#    dm_zoom_to(flights) %>%
#    mutate(time_hour_fmt = format(time_hour, tz = "UTC")) %>%
#    # need to keep `origin` as FK to airports, so `remove = FALSE`
#    unite("origin_slot_id", origin, time_hour_fmt, remove = FALSE) %>%
#    dm_update_zoomed() %>%
#    dm_add_fk(flights, origin_slot_id, weather)
#  extended_flights_dm %>% dm_draw()

## ------------------------------------------------------------------------
#  dm_draw(dm_nycflights13(cycle = TRUE))

## ------------------------------------------------------------------------
#  disentangled_flights_dm <-
#    dm_nycflights13(cycle = TRUE) %>%
#    # zooming and immediately inserting essentially creates a copy of the original table
#    dm_zoom_to(airports) %>%
#    # reinserting the `airports` table under the name `destination`
#    dm_insert_zoomed("destination") %>%
#    # renaming the originally zoomed table
#    dm_rename_tbl(origin = airports) %>%
#    # Key relations are also duplicated, so the wrong ones need to be removed
#    dm_rm_fk(flights, dest, origin) %>%
#    dm_rm_fk(flights, origin, destination)
#  dm_draw(disentangled_flights_dm)

## ------------------------------------------------------------------------
#  dm_with_summary <-
#    flights_dm %>%
#    dm_zoom_to(flights) %>%
#    dplyr::count(origin, carrier) %>%
#    dm_insert_zoomed("dep_carrier_count")
#  dm_draw(dm_with_summary)

## ------------------------------------------------------------------------
#  joined_flights_dm <-
#    flights_dm %>%
#    dm_zoom_to(flights) %>%
#    # let's first reduce the number of columns of flights
#    select(-dep_delay:-arr_delay, -air_time:-time_hour) %>%
#    # in the {dm}-method for the joins you can specify which columns you want to add to the zoomed table
#    left_join(planes, select = c(tailnum, plane_type = type)) %>%
#    dm_insert_zoomed("flights_plane_type")
#  # this is how the table looks now
#  joined_flights_dm$flights_plane_type
#  # also here, the FK-relations are transferred to the new table
#  dm_draw(joined_flights_dm)

## ------------------------------------------------------------------------
#  flights_dm %>%
#    dm_zoom_to(flights) %>%
#    select(-dep_delay:-arr_delay, -air_time:-time_hour) %>%
#    left_join(planes, select = c(tailnum, plane_type = type)) %>%
#    pull_tbl()

Try the dm package in your browser

Any scripts or data that you put into this service are public.

dm documentation built on May 29, 2024, 11:41 a.m.