inst/doc/parallelDist.R

### R code from vignette source 'parallelDist.Rnw'

###################################################
### code chunk number 1: parallelDist.Rnw:31-33
###################################################
prettyVersion <- packageDescription("parallelDist")$Version
require(ggplot2)


###################################################
### code chunk number 2: performanceDtw
###################################################
comparison <- structure(list(expr = c(10, 100, 1000, 10000, 10, 100, 1000,
10000, 10, 100, 1000, 10000), min = c(0.02173888, 2.508674745,
247.536645172, 24893.826760134, 0.001671714, 0.003850385, 0.36582544,
37.370421954, 0.000135292, 0.001352922, 0.123839325, 11.108382113
), lq = c(0.02173888, 2.508674745, 247.536645172, 24893.826760134,
0.001671714, 0.003850385, 0.36582544, 37.370421954, 0.000135292,
0.001352922, 0.123839325, 11.108382113), mean = c(0.02173888,
2.508674745, 247.536645172, 24893.826760134, 0.001671714, 0.003850385,
0.36582544, 37.370421954, 0.000135292, 0.001352922, 0.123839325,
11.108382113), median = c(0.02173888, 2.508674745, 247.536645172,
24893.826760134, 0.001671714, 0.003850385, 0.36582544, 37.370421954,
0.000135292, 0.001352922, 0.123839325, 11.108382113), uq = c(0.02173888,
2.508674745, 247.536645172, 24893.826760134, 0.001671714, 0.003850385,
0.36582544, 37.370421954, 0.000135292, 0.001352922, 0.123839325,
11.108382113), max = c(0.02173888, 2.508674745, 247.536645172,
24893.826760134, 0.001671714, 0.003850385, 0.36582544, 37.370421954,
0.000135292, 0.001352922, 0.123839325, 11.108382113), neval = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), method = c("dtw", "dtw", "dtw",
"dtw", "parDist threads=1", "parDist threads=1", "parDist threads=1",
"parDist threads=1", "parDist threads=8", "parDist threads=8",
"parDist threads=8", "parDist threads=8")), unit = "seconds", row.names = c(2L,
3L, 4L, 5L, 9L, 10L, 11L, 12L, 16L, 17L, 18L, 19L), class = "data.frame")
fig2 <- ggplot(data=comparison, aes(x=expr, y=min, group = method, colour = method)) +
  geom_line() +
  geom_point() +
  scale_y_log10(breaks=c(0.0001,.001,.01,.1,1,10,100,1000,10000), labels=c(0.0001,.001,.01,.1,1,10,100,1000,10000)) +
  scale_x_log10(breaks=c(0,10,100,1000,10000),labels=c(0,10,100,1000,10000)) +
  guides(fill=guide_legend(title="Method")) +
  xlab("Number of series (length 10)") +
  ylab("Computation time in s") +
  theme_light() +
  theme(legend.position="bottom") +
  ggtitle("Distance matrix computation time (dtw, parDist)")
print(fig2)


###################################################
### code chunk number 3: benchmarkOverall
###################################################
comparison.overall <- structure(list(expr = c("dist", "parDist", "dist", "parDist",
"dist", "parDist", "dist", "parDist", "dist", "parDist", "dist",
"parDist", "dist", "parDist", "dist", "parDist", "dist", "parDist",
"dist", "parDist", "dist", "parDist", "dist", "parDist", "dist",
"parDist", "dist", "parDist", "dist", "parDist", "dist", "parDist",
"dist", "parDist", "dist", "parDist", "dist", "parDist", "dist",
"parDist", "dist", "parDist", "dist", "parDist", "dist", "parDist",
"dist", "parDist", "dist", "parDist", "dist", "parDist", "dist",
"parDist", "dist", "parDist", "dist", "parDist", "dist", "parDist",
"dist", "parDist", "dist", "parDist", "dist", "parDist", "dist",
"parDist", "dist", "parDist", "dist", "parDist"), min = c(27.023019786,
0.42355673, 23.337486167, 0.162221163, 1.132874622, 0.258428158,
57.787881691, 0.192326593, 38.823629012, 0.218193139, 0.943546255,
0.125315296, 1.19984082, 0.560230453, 49.638932656, 0.287721827,
38.601467265, 0.575263573, 40.453796831, 1.274451564, 0.933230054,
0.122381139, 0.933518058, 0.124280224, 894.344310554, 1.209659702,
24.010877591, 0.504667156, 22.613629988, 0.625519014, 28.623759888,
0.311995644, 0.748200286, 0.150044756, 25.131624635, 0.151615403,
21.827435577, 0.151512145, 32.544226019, 0.166376382, 17.890447476,
0.156928859, 20.680027643, 0.142911527, 16.802475029, 0.152215982,
27.751515131, 0.140722573, 45.313170363, 0.163320617, 27.947986367,
0.146164789, 26.016592883, 0.164806356, 24.045695896, 0.159119679,
46.747133491, 0.150115357, 12.639720779, 0.134051207, 16.417508337,
0.152236198, 23.787851211, 0.137237601, 664.737856716, 0.422527878,
26.300990224, 0.149172968, 28.118753098, 0.150392786, 35.712203217,
0.164383059), lq = c(27.023019786, 0.42355673, 23.337486167,
0.162221163, 1.132874622, 0.258428158, 57.787881691, 0.192326593,
38.823629012, 0.218193139, 0.943546255, 0.125315296, 1.19984082,
0.560230453, 49.638932656, 0.287721827, 38.601467265, 0.575263573,
40.453796831, 1.274451564, 0.933230054, 0.122381139, 0.933518058,
0.124280224, 894.344310554, 1.209659702, 24.010877591, 0.504667156,
22.613629988, 0.625519014, 28.623759888, 0.311995644, 0.748200286,
0.150044756, 25.131624635, 0.151615403, 21.827435577, 0.151512145,
32.544226019, 0.166376382, 17.890447476, 0.156928859, 20.680027643,
0.142911527, 16.802475029, 0.152215982, 27.751515131, 0.140722573,
45.313170363, 0.163320617, 27.947986367, 0.146164789, 26.016592883,
0.164806356, 24.045695896, 0.159119679, 46.747133491, 0.150115357,
12.639720779, 0.134051207, 16.417508337, 0.152236198, 23.787851211,
0.137237601, 664.737856716, 0.422527878, 26.300990224, 0.149172968,
28.118753098, 0.150392786, 35.712203217, 0.164383059), mean = c(27.023019786,
0.42355673, 23.337486167, 0.162221163, 1.132874622, 0.258428158,
57.787881691, 0.192326593, 38.823629012, 0.218193139, 0.943546255,
0.125315296, 1.19984082, 0.560230453, 49.638932656, 0.287721827,
38.601467265, 0.575263573, 40.453796831, 1.274451564, 0.933230054,
0.122381139, 0.933518058, 0.124280224, 894.344310554, 1.209659702,
24.010877591, 0.504667156, 22.613629988, 0.625519014, 28.623759888,
0.311995644, 0.748200286, 0.150044756, 25.131624635, 0.151615403,
21.827435577, 0.151512145, 32.544226019, 0.166376382, 17.890447476,
0.156928859, 20.680027643, 0.142911527, 16.802475029, 0.152215982,
27.751515131, 0.140722573, 45.313170363, 0.163320617, 27.947986367,
0.146164789, 26.016592883, 0.164806356, 24.045695896, 0.159119679,
46.747133491, 0.150115357, 12.639720779, 0.134051207, 16.417508337,
0.152236198, 23.787851211, 0.137237601, 664.737856716, 0.422527878,
26.300990224, 0.149172968, 28.118753098, 0.150392786, 35.712203217,
0.164383059), median = c(27.023019786, 0.42355673, 23.337486167,
0.162221163, 1.132874622, 0.258428158, 57.787881691, 0.192326593,
38.823629012, 0.218193139, 0.943546255, 0.125315296, 1.19984082,
0.560230453, 49.638932656, 0.287721827, 38.601467265, 0.575263573,
40.453796831, 1.274451564, 0.933230054, 0.122381139, 0.933518058,
0.124280224, 894.344310554, 1.209659702, 24.010877591, 0.504667156,
22.613629988, 0.625519014, 28.623759888, 0.311995644, 0.748200286,
0.150044756, 25.131624635, 0.151615403, 21.827435577, 0.151512145,
32.544226019, 0.166376382, 17.890447476, 0.156928859, 20.680027643,
0.142911527, 16.802475029, 0.152215982, 27.751515131, 0.140722573,
45.313170363, 0.163320617, 27.947986367, 0.146164789, 26.016592883,
0.164806356, 24.045695896, 0.159119679, 46.747133491, 0.150115357,
12.639720779, 0.134051207, 16.417508337, 0.152236198, 23.787851211,
0.137237601, 664.737856716, 0.422527878, 26.300990224, 0.149172968,
28.118753098, 0.150392786, 35.712203217, 0.164383059), uq = c(27.023019786,
0.42355673, 23.337486167, 0.162221163, 1.132874622, 0.258428158,
57.787881691, 0.192326593, 38.823629012, 0.218193139, 0.943546255,
0.125315296, 1.19984082, 0.560230453, 49.638932656, 0.287721827,
38.601467265, 0.575263573, 40.453796831, 1.274451564, 0.933230054,
0.122381139, 0.933518058, 0.124280224, 894.344310554, 1.209659702,
24.010877591, 0.504667156, 22.613629988, 0.625519014, 28.623759888,
0.311995644, 0.748200286, 0.150044756, 25.131624635, 0.151615403,
21.827435577, 0.151512145, 32.544226019, 0.166376382, 17.890447476,
0.156928859, 20.680027643, 0.142911527, 16.802475029, 0.152215982,
27.751515131, 0.140722573, 45.313170363, 0.163320617, 27.947986367,
0.146164789, 26.016592883, 0.164806356, 24.045695896, 0.159119679,
46.747133491, 0.150115357, 12.639720779, 0.134051207, 16.417508337,
0.152236198, 23.787851211, 0.137237601, 664.737856716, 0.422527878,
26.300990224, 0.149172968, 28.118753098, 0.150392786, 35.712203217,
0.164383059), max = c(27.023019786, 0.42355673, 23.337486167,
0.162221163, 1.132874622, 0.258428158, 57.787881691, 0.192326593,
38.823629012, 0.218193139, 0.943546255, 0.125315296, 1.19984082,
0.560230453, 49.638932656, 0.287721827, 38.601467265, 0.575263573,
40.453796831, 1.274451564, 0.933230054, 0.122381139, 0.933518058,
0.124280224, 894.344310554, 1.209659702, 24.010877591, 0.504667156,
22.613629988, 0.625519014, 28.623759888, 0.311995644, 0.748200286,
0.150044756, 25.131624635, 0.151615403, 21.827435577, 0.151512145,
32.544226019, 0.166376382, 17.890447476, 0.156928859, 20.680027643,
0.142911527, 16.802475029, 0.152215982, 27.751515131, 0.140722573,
45.313170363, 0.163320617, 27.947986367, 0.146164789, 26.016592883,
0.164806356, 24.045695896, 0.159119679, 46.747133491, 0.150115357,
12.639720779, 0.134051207, 16.417508337, 0.152236198, 23.787851211,
0.137237601, 664.737856716, 0.422527878, 26.300990224, 0.149172968,
28.118753098, 0.150392786, 35.712203217, 0.164383059), neval = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1), method = structure(c(36L, 36L, 33L,
33L, 32L, 32L, 31L, 31L, 29L, 29L, 28L, 28L, 25L, 25L, 24L, 24L,
22L, 22L, 19L, 19L, 18L, 18L, 17L, 17L, 11L, 11L, 7L, 7L, 4L,
4L, 3L, 3L, 35L, 35L, 34L, 34L, 30L, 30L, 27L, 27L, 26L, 26L,
23L, 23L, 21L, 21L, 20L, 20L, 16L, 16L, 15L, 15L, 14L, 14L, 13L,
13L, 12L, 12L, 10L, 10L, 9L, 9L, 8L, 8L, 6L, 6L, 5L, 5L, 2L,
2L, 1L, 1L), .Label = c("yule2", "yule", "whittaker", "wave",
"tanimoto", "stiles", "soergel", "simpson", "simple matching",
"russel", "podani", "phi", "ochiai", "mozley", "mountford", "michael",
"maximum", "manhattan", "kullback", "kulczynski2", "kulczynski1",
"hellinger", "hamman", "geodesic", "fJaccard", "faith", "fager",
"euclidean", "divergence", "dice", "chord", "canberra", "bray",
"braun-blanquet", "binary", "bhjattacharyya"), class = c("ordered",
"factor"))), .Names = c("expr", "min", "lq", "mean", "median",
"uq", "max", "neval", "method"), row.names = c(NA, -72L), unit = "seconds", class = "data.frame")
plot.distances <- ggplot(data=comparison.overall, aes(x=method, y=min, fill=expr)) +
  geom_bar(stat="identity", position=position_dodge()) +
  xlab("Distance method") +
  ylab("Computation time in s") +
  theme_light() +
  #scale_x_discrete(name="", limits = rev(levels(comparison.overall$method))) +
  coord_flip() +
 guides(fill=guide_legend(title="Method")) +
labs(title = "Distance matrix computation time (5000 series of length 10)",
     caption = "Excluded distances for better comparison: dtw, mahalanobis, minkowski")
print(plot.distances)


###################################################
### code chunk number 4: parallelDist.Rnw:258-260 (eval = FALSE)
###################################################
## # matrix where each row corresponds to one series
## sample.matrix <- matrix(c(1:100), ncol = 10)


###################################################
### code chunk number 5: parallelDist.Rnw:265-267 (eval = FALSE)
###################################################
## # euclidean distance
## dist.euclidean <- parDist(sample.matrix, method = "euclidean")


###################################################
### code chunk number 6: parallelDist.Rnw:272-277 (eval = FALSE)
###################################################
## # convert to matrix
## as.matrix(dist.euclidean)
## 
## # create hierarchical agglomerative clustering model
## hclust.model <- hclust(dist.euclidean, method="ward")


###################################################
### code chunk number 7: parallelDist.Rnw:282-287 (eval = FALSE)
###################################################
## # minkowski distance with parameter p=2
## parDist(x = sample.matrix, method = "minkowski", p=2)
## 
## # dynamic time warping distance normalized with warping path length
## parDist(x = sample.matrix, method = "dtw", norm.method="path.length")


###################################################
### code chunk number 8: parallelDist.Rnw:292-293 (eval = FALSE)
###################################################
## ?parDist


###################################################
### code chunk number 9: parallelDist.Rnw:298-300 (eval = FALSE)
###################################################
## # use 2 threads
## dist.euclidean <- parDist(sample.matrix, method = "euclidean", threads = 2)


###################################################
### code chunk number 10: parallelDist.Rnw:309-313 (eval = FALSE)
###################################################
## # defining a list of matrices, where each
## # list entry row corresponds to a two dimensional series
## tmp.mat <- matrix(c(1:40), ncol = 10)
## sample.matrix.list <- list(tmp.mat[1:2,], tmp.mat[3:4,])


###################################################
### code chunk number 11: parallelDist.Rnw:318-320 (eval = FALSE)
###################################################
## # multi-dimensional dynamic time warping
## parDist(x = sample.matrix.list, method = "dtw")


###################################################
### code chunk number 12: parallelDist.Rnw:337-350 (eval = FALSE)
###################################################
## # RcppArmadillo is used as dependency
## library(RcppArmadillo)
## # Use RcppXPtrUtils for simple usage of C++ external pointers
## library(RcppXPtrUtils)
## 
## # compile user-defined function and return pointer (RcppArmadillo is used as dependency)
## euclideanFuncPtr <- cppXPtr("double customDist(const arma::mat &A, const arma::mat &B) {
##                             return sqrt(arma::accu(arma::square(A - B))); }",
##                             depends = c("RcppArmadillo"))
## 
## # distance matrix for user-defined euclidean distance function
## # (note that method is set to "custom")
## parDist(matrix(1:16, ncol=2), method="custom", func = euclideanFuncPtr)


###################################################
### code chunk number 13: parallelDist.Rnw:381-387 (eval = FALSE)
###################################################
## # load dtw package
## library(dtw)
## # print the step pattern
## print(symmetric2)
## # use the symmetric2 object as input parameter for the parDist function
## parDist(x = sample.matrix, method = "dtw", step.pattern = symmetric2)

Try the parallelDist package in your browser

Any scripts or data that you put into this service are public.

parallelDist documentation built on Feb. 4, 2022, 1:06 a.m.