Let us check timings of simple sums:
library(fromo)
library(microbenchmark)
print(Sys.info())
## sysname release
## "Linux" "4.15.0-42-generic"
## version nodename
## "#45~16.04.1-Ubuntu SMP Mon Nov 19 13:02:27 UTC 2018" "c4667c3c0807"
## machine login
## "x86_64" "unknown"
## user effective_user
## "docker" "docker"
print(sessionInfo())
## R version 3.5.2 (2018-12-20)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Debian GNU/Linux buster/sid
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.8.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.8.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8
## [6] LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] utils methods base
##
## other attached packages:
## [1] microbenchmark_1.4-6 moments_0.14 dplyr_0.7.8 ggplot2_3.1.0 knitr_1.21 fromo_0.2.0.002
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.0 bindr_0.1.1 magrittr_1.5 grDevices_3.5.2 tidyselect_0.2.5 munsell_0.5.0 colorspace_1.3-2 R6_2.3.0 rlang_0.3.0.1
## [10] stringr_1.3.1 plyr_1.8.4 tools_3.5.2 grid_3.5.2 gtable_0.2.0 xfun_0.4 withr_2.1.2 stats_3.5.2 lazyeval_0.2.1
## [19] assertthat_0.2.0 tibble_1.4.2 crayon_1.3.4 bindrcpp_0.2.2 formatR_1.5 purrr_0.2.5 graphics_3.5.2 glue_1.3.0 evaluate_0.12
## [28] stringi_1.2.4 compiler_3.5.2 pillar_1.3.1 scales_1.0.0 pkgconfig_2.0.2
set.seed(12345)
x <- rnorm(1e+05)
wins <- 250
ref_running_sum <- function(x, wins) {
cx <- cumsum(x)
cx - c(rep(0, wins), cx[1:(length(cx) - wins)])
}
ref_running_sum2 <- function(x, wins) {
cx <- cumsum(x)
c(cx[1:wins], cx[(wins + 1):length(cx)] - cx[1:(length(cx) -
wins)])
}
# check first
blah <- running_sum(x, wins) - ref_running_sum(x, wins)
print(summary(blah[4:length(blah)]))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.84e-14 -3.60e-15 0.00e+00 -4.00e-16 1.80e-15 3.02e-14
# timings
checkit <- microbenchmark(sum(x), mean(x), gc(), running_sum(x,
wins), running_sum(x, wins, na_rm = FALSE, restart_period = 50000L),
running_mean(x, wins), ref_running_sum(x, wins),
ref_running_sum2(x, wins), cumsum(x))
print(checkit)
## Unit: microseconds
## expr min lq mean median uq max neval cld
## sum(x) 80 85 92 88 95 174 100 a
## mean(x) 163 182 195 188 200 296 100 a
## gc() 42571 43554 45673 44268 46801 61135 100 d
## running_sum(x, wins) 723 867 899 903 937 1122 100 b
## running_sum(x, wins, na_rm = FALSE, restart_period = 50000L) 722 873 900 897 935 1200 100 b
## running_mean(x, wins) 723 855 897 900 930 1346 100 b
## ref_running_sum(x, wins) 567 724 1213 1208 1251 15651 100 bc
## ref_running_sum2(x, wins) 857 1434 1744 1759 1860 6197 100 c
## cumsum(x) 86 110 234 272 290 583 100 a
checkit %>% group_by(expr) %>% dplyr::summarize(meant = mean(time,
na.rm = TRUE)) %>% ungroup() %>% dplyr::filter(grepl("running_sum",
expr)) %>% mutate(timeover = meant/min(meant, na.rm = TRUE)) %>%
kable()
|expr | meant| timeover| |:------------------------------------------------------------|-------:|--------:| |running_sum(x, wins) | 899403| 1.0| |running_sum(x, wins, na_rm = FALSE, restart_period = 50000L) | 900364| 1.0| |ref_running_sum(x, wins) | 1213388| 1.4| |ref_running_sum2(x, wins) | 1743633| 1.9|
Welford standard deviation is easy to compute quickly:
library(fromo)
library(microbenchmark)
print(Sys.info())
## sysname release
## "Linux" "4.15.0-42-generic"
## version nodename
## "#45~16.04.1-Ubuntu SMP Mon Nov 19 13:02:27 UTC 2018" "c4667c3c0807"
## machine login
## "x86_64" "unknown"
## user effective_user
## "docker" "docker"
print(sessionInfo())
## R version 3.5.2 (2018-12-20)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Debian GNU/Linux buster/sid
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.8.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.8.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8
## [6] LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats utils methods base
##
## other attached packages:
## [1] bindrcpp_0.2.2 microbenchmark_1.4-6 moments_0.14 dplyr_0.7.8 ggplot2_3.1.0 knitr_1.21 fromo_0.2.0.002
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.0 highr_0.7 pillar_1.3.1 compiler_3.5.2 formatR_1.5 plyr_1.8.4 bindr_0.1.1 tools_3.5.2 grDevices_3.5.2
## [10] evaluate_0.12 tibble_1.4.2 gtable_0.2.0 lattice_0.20-38 pkgconfig_2.0.2 rlang_0.3.0.1 Matrix_1.2-15 mvtnorm_1.0-8 xfun_0.4
## [19] withr_2.1.2 stringr_1.3.1 graphics_3.5.2 grid_3.5.2 tidyselect_0.2.5 glue_1.3.0 R6_2.3.0 survival_2.43-3 multcomp_1.4-8
## [28] TH.data_1.0-9 purrr_0.2.5 magrittr_1.5 codetools_0.2-15 MASS_7.3-51.1 splines_3.5.2 scales_1.0.0 assertthat_0.2.0 colorspace_1.3-2
## [37] sandwich_2.5-0 stringi_1.2.4 lazyeval_0.2.1 munsell_0.5.0 crayon_1.3.4 zoo_1.8-4
set.seed(12345)
x <- rnorm(1e+05)
wins <- 250
# check first
blah <- running_sd(x, wins) - ref_running_sd(x, wins)
print(summary(blah[4:length(blah)]))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -4.4e-15 -7.0e-16 1.6e-15 1.8e-15 4.8e-15 8.5e-15
# timings
checkit <- microbenchmark(sd(x), sd3(x), ref_sd(x),
ref_sd_objecty(x), running_sd(x, wins), running_sd(x,
wins, na_rm = FALSE, restart_period = 50000L),
gc(), ref_running_sd(x, wins), ref_running_sd_narm(x,
wins), ref_running_sd_intnel(x, wins), ref_running_sd_objecty(x,
wins), ref_running_sd_onecheck(x, wins), ref_running_sd_fooz(x,
wins), ref_running_sd_barz(x, wins))
print(checkit)
## Unit: microseconds
## expr min lq mean median uq max neval cld
## sd(x) 331 388 438 406 475 1224 100 a
## sd3(x) 721 747 806 776 848 1078 100 a
## ref_sd(x) 716 748 813 782 863 1035 100 a
## ref_sd_objecty(x) 720 744 791 769 830 970 100 a
## running_sd(x, wins) 2978 3200 3407 3309 3561 4964 100 b
## running_sd(x, wins, na_rm = FALSE, restart_period = 50000L) 1181 1276 1442 1387 1446 5489 100 ab
## gc() 101077 103125 113679 105176 123059 182245 100 c
## ref_running_sd(x, wins) 1182 1267 1374 1380 1422 1721 100 ab
## ref_running_sd_narm(x, wins) 1179 1243 1444 1394 1537 3021 100 ab
## ref_running_sd_intnel(x, wins) 1181 1274 1391 1371 1468 1853 100 ab
## ref_running_sd_objecty(x, wins) 1185 1272 1403 1402 1460 1740 100 ab
## ref_running_sd_onecheck(x, wins) 1184 1299 1399 1382 1458 1858 100 ab
## ref_running_sd_fooz(x, wins) 1200 1318 1451 1410 1519 2038 100 ab
## ref_running_sd_barz(x, wins) 1815 1930 2039 1997 2094 2706 100 ab
checkit %>% group_by(expr) %>% dplyr::summarize(meant = mean(time,
na.rm = TRUE)) %>% ungroup() %>% dplyr::filter(grepl("running_sd",
expr)) %>% mutate(timeover = meant/min(meant, na.rm = TRUE)) %>%
kable()
|expr | meant| timeover| |:-----------------------------------------------------------|-------:|--------:| |running_sd(x, wins) | 3407199| 2.5| |running_sd(x, wins, na_rm = FALSE, restart_period = 50000L) | 1442064| 1.1| |ref_running_sd(x, wins) | 1373753| 1.0| |ref_running_sd_narm(x, wins) | 1444354| 1.1| |ref_running_sd_intnel(x, wins) | 1390967| 1.0| |ref_running_sd_objecty(x, wins) | 1402811| 1.0| |ref_running_sd_onecheck(x, wins) | 1399323| 1.0| |ref_running_sd_fooz(x, wins) | 1451174| 1.1| |ref_running_sd_barz(x, wins) | 2038890| 1.5|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.