knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(mandrake)
library(drake)
library(knitr)
library(magrittr)

source("mtcars/R/functions.R")

Introduction

This tutorial assumes that you have basic familiarity with {drake}, as most of the patterns shown here build off of {drake} workflow practices.

The plan

We start by building our plan. Imagine that we have the mtcars dataset, & we want to run some models on it.

Take note of the desc argument provided the mtcars target. This allows us to write a markdown description for a target.

# This is where you set up your workflow plan,
# a data frame with the steps of your analysis.

my_plan <- drake::drake_plan(
  report = knit(knitr_in("mtcars/report.Rmd"), file_out("mtcars/report.md"), quiet = TRUE),
  small = simulate(48),
  large = simulate(64),
  regression1 = target(
    reg1(data),
    transform = map(data = c(small, large), .tag_out = reg, .tag_in = cluster_id)
  ),
  regression2 = target(
    reg2(data),
    transform = map(data, .tag_out = reg, .tag_in = cluster_id)
  ),
  summ = target(
    suppressWarnings(summary(reg$residuals)),
    transform = map(reg, .tag_in = cluster_id)
  ),
  coef = target(
    suppressWarnings(summary(reg))$coefficients,
    transform = map(reg, .tag_in = cluster_id)
  ),

  mtcars = target(
    mtcars,
    desc = "
**This dataframe describes a bunch of cars that go vroom!**

I can't drive but one day I will learn
"
  ),

  mtcars_summary = target({
      out <- mtcars %>%
        dplyr::group_by(cyl) %>%
        dplyr::summarise(dplyr::across(c(disp, hp, drat, wt, qsec), mean))
      out    
    },
   desc = "
**Average of attributes of cars according to number of cylinders** 
This dataframe relates `cyl` (number of cylinders) to a few properties of mtcars
"
  ),
  trace = TRUE
)

Let's examine our plan as a dataframe:

html_plan <- my_plan %>% 
  dplyr::mutate(
    dplyr::across("command", rlang::as_label),
    desc = glue::glue("<code>{desc}</code>", .sep = "\n")
    ) %>% 
  knitr::kable("html", escape = FALSE) %>%
  kableExtra::kable_styling(c("striped", "responsive", "condensed")) 

Now, let's build it.

cache <- drake::get_cache()
my_config <- drake_config(my_plan)
drake::make(config = my_config)

Taking it further

Usually, this is where {drake}'s job is done, but {mandrake} extends this a little, & lets us access documentation for this dataset directly from the workflow graph.

We load the column specification for mandrake.

lookup_cache <- mandrake::load_package_colspec("mandrake")

By default, this is found at {pkgname}/inst/{pkgname}.yaml, and generated using the #' @col & #' @inheritCol roxygen2 tags.

Examining this yields:

system.file("mandrake", "mandrake.yaml", package = "mandrake") %>% 
  yaml::read_yaml() %>%
  {list(am = .$am, '...'='...more column specs...' ,y = .$y)} %>%
  yaml::as.yaml() %>%
  writeLines()

Attach the column documentation to our plan.

plan_extracted <- my_plan %>%
  mandrake::decorate_plan(cache, group = "cluster_id", lookup_cache = lookup_cache)
plan_extracted %>%
  dplyr::mutate(dplyr::across("command", rlang::as_label)) %>%
  knitr::kable("html", escape = TRUE) %>%
  kableExtra::kable_styling(c("striped", "responsive", "condensed"))
my_config <- drake_config(plan_extracted)

Build the graph

graph_info <- drake_graph_info(
  my_config, 
  group = "cluster_id", 
  clusters = c("summ", "coef"), 
  build_times = "none",
  on_select_col = "desc")

graph <- render_drake_graph(
  graph_info, 
  on_select = "embedHandler",
  ncol_legend = 4
  ) %>% mandrake::attach_dependencies(standalone = F)
graph %<>%
  visNetwork::visHierarchicalLayout(
    direction = "LR", levelSeparation = 250
    ) %>%
  visNetwork::visEdges(
    smooth = list(type = "cubicBezier", forceDirection = "horizontal")
    )

Graph

graph


mstr3336/mandrake documentation built on April 27, 2021, 1:53 p.m.