knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(zddr)

Base Case - A Tibble

As a base case, consider a trivial example of a dataframe containing the the cross multiplication of several long lists of variables.

library(magrittr)
start_time <- Sys.time()
tb<-tibble::tibble(
  a = list(c( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20)),
  #b = list(c(21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40)),
  c = list(c(41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60)),
  d = list(c(61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80)),
  e = list(c(81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100))
) %>%
  tidyr::unnest(a) %>%
  #tidyr::unnest(b) %>%
  tidyr::unnest(c) %>%
  tidyr::unnest(d) %>%
  tidyr::unnest(e) %>%
  dplyr::rowwise() %>%
  dplyr::transmute(X = list(c(a,c,d,e)))
Sys.time() - start_time
tb
pryr::object_size(tb) 

The tibble is an inefficient way to deal with sets, especially if there is a need to remove nonminimal sets. For example, say the above family add a single set of 1, making all existing sets containing 1 nonminimal.

start_time <- Sys.time()
tb %>% dplyr::filter(!1 %in% X) 
Sys.time() - start_time

Introduce the ZDD

Perform this same operation using a ZDD from this package and note the space efficiency achieved.

library(zddr)
reset_zdd_store()
start_time <- Sys.time()
zdd_or(   1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20) *
  #zdd_or(21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40) *
  zdd_or(41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60) *
  zdd_or(61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80) *
  zdd_or(81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100)
Sys.time() - start_time           

The importance of variable ordering

Now if the same number of elements are used, but in random order, note the difference in memory and time. This is a minor illustration of the importance of variable ordering.

reset_zdd_store()
start_time <- Sys.time()
zdd_or(  85,44,24,41, 5,20,74,71,56,52,92,76,42,89, 65,16, 1,38,10,97) *
  #zdd_or(21, 2,25,36,63,22,61,46,70,62,90,29,91,60, 79,83,51,39,81,58) *
  zdd_or( 7,40,31,26,72,45,34,57,49,67,80,87,86,19,  4,59,17,14,75,53) *
  zdd_or( 6,68,32,78,33,13,73,69,18,11,98,77,82,93, 95,23,99,96,50,88) *
  zdd_or(43,15,94,84,66,64,54, 8,47,28,55,35,48,37,100, 9, 3,30,12,27)
Sys.time() - start_time           

Minimizing the family of sets

Check out how the size of the ZDD and the calculation time change when a single non-minimal ZDD is unioned with the first one above.

reset_zdd_store()
start_time <- Sys.time()
zdd_or(   1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20) *
  #zdd_or(21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40) *
  zdd_or(41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60) *
  zdd_or(61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80) *
  zdd_or(81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100) |
  zdd(1) # 19*20^3+1 = 152,001
Sys.time() - start_time            

Using the same example, add one additional union of a non-minimal ZDD and watch the change in ZDD complexity (memory) and calculation time.

reset_zdd_store()
start_time <- Sys.time()
zdd_or(   1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20) *
  #zdd_or(21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40) *
  zdd_or(41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60) *
  zdd_or(61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80) *
  zdd_or(81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100) |
  zdd(1) | zdd(2) # 18*20^3+2 = 144,002
Sys.time() - start_time            

Minimize based on a higher ranked variable

There is a slight reduction in time and space needs when minimizing a large family of sets against a higher ranked variable. (Compare to the original minimization example above.)

reset_zdd_store()
start_time <- Sys.time()
(zdd_or(  1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20) *
    #zdd_or(21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40) *
    zdd_or(41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60) *
    zdd_or(61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80) *
    zdd_or(81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100)) | 
  zdd(90) # 19*20^3+1 = 152,001
Sys.time() - start_time

Minimize against a more complicated set

reset_zdd_store()
start_time <- Sys.time()
(zdd_or(   1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20) *
    #zdd_or(21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40) *
    zdd_or(41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60) *
    zdd_or(61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80) *
    zdd_or(81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100)) |
  zdd_and(1,90) 
Sys.time() - start_time


jordagaman/zddr documentation built on June 29, 2021, 4:23 a.m.