knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(zddr)
As a base case, consider a trivial example of a dataframe containing the the cross multiplication of several long lists of variables.
library(magrittr) start_time <- Sys.time() tb<-tibble::tibble( a = list(c( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20)), #b = list(c(21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40)), c = list(c(41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60)), d = list(c(61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80)), e = list(c(81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100)) ) %>% tidyr::unnest(a) %>% #tidyr::unnest(b) %>% tidyr::unnest(c) %>% tidyr::unnest(d) %>% tidyr::unnest(e) %>% dplyr::rowwise() %>% dplyr::transmute(X = list(c(a,c,d,e))) Sys.time() - start_time tb pryr::object_size(tb)
The tibble is an inefficient way to deal with sets, especially if there is a need to remove nonminimal sets. For example, say the above family add a single set of 1
, making all existing sets containing 1
nonminimal.
start_time <- Sys.time() tb %>% dplyr::filter(!1 %in% X) Sys.time() - start_time
Perform this same operation using a ZDD from this package and note the space efficiency achieved.
library(zddr) reset_zdd_store() start_time <- Sys.time() zdd_or( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20) * #zdd_or(21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40) * zdd_or(41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60) * zdd_or(61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80) * zdd_or(81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100) Sys.time() - start_time
Now if the same number of elements are used, but in random order, note the difference in memory and time. This is a minor illustration of the importance of variable ordering.
reset_zdd_store() start_time <- Sys.time() zdd_or( 85,44,24,41, 5,20,74,71,56,52,92,76,42,89, 65,16, 1,38,10,97) * #zdd_or(21, 2,25,36,63,22,61,46,70,62,90,29,91,60, 79,83,51,39,81,58) * zdd_or( 7,40,31,26,72,45,34,57,49,67,80,87,86,19, 4,59,17,14,75,53) * zdd_or( 6,68,32,78,33,13,73,69,18,11,98,77,82,93, 95,23,99,96,50,88) * zdd_or(43,15,94,84,66,64,54, 8,47,28,55,35,48,37,100, 9, 3,30,12,27) Sys.time() - start_time
Check out how the size of the ZDD and the calculation time change when a single non-minimal ZDD is unioned with the first one above.
reset_zdd_store() start_time <- Sys.time() zdd_or( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20) * #zdd_or(21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40) * zdd_or(41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60) * zdd_or(61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80) * zdd_or(81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100) | zdd(1) # 19*20^3+1 = 152,001 Sys.time() - start_time
Using the same example, add one additional union of a non-minimal ZDD and watch the change in ZDD complexity (memory) and calculation time.
reset_zdd_store() start_time <- Sys.time() zdd_or( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20) * #zdd_or(21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40) * zdd_or(41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60) * zdd_or(61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80) * zdd_or(81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100) | zdd(1) | zdd(2) # 18*20^3+2 = 144,002 Sys.time() - start_time
There is a slight reduction in time and space needs when minimizing a large family of sets against a higher ranked variable. (Compare to the original minimization example above.)
reset_zdd_store() start_time <- Sys.time() (zdd_or( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20) * #zdd_or(21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40) * zdd_or(41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60) * zdd_or(61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80) * zdd_or(81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100)) | zdd(90) # 19*20^3+1 = 152,001 Sys.time() - start_time
reset_zdd_store() start_time <- Sys.time() (zdd_or( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20) * #zdd_or(21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40) * zdd_or(41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60) * zdd_or(61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80) * zdd_or(81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100)) | zdd_and(1,90) Sys.time() - start_time
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.