inst/doc/benchmarks_discretize.R

## ---- echo=FALSE, message=TRUE, results='asis'--------------------------------
is.pkg <- all(c("RTCGA.rnaseq", "microbenchmark", "RWeka", "pkgdown") %in% rownames(installed.packages()))
if(!is.pkg) {
  message("Please install all suggested packages to run benchmark.")
}

if(is.pkg && !pkgdown::in_pkgdown()) {
  is.pkg <- FALSE
  cat("### Benchmark is not available here",
  "(we don't want to run this code on the CRAN server).",
  "Please visit the ",
    paste(sep = "", 
        "https://mi2-warsaw.github.io/FSelectorRcpp/",
        "articles/benchmarks_discretize.html")
  )
}


## ----setup, eval=is.pkg, include = FALSE--------------------------------------
#  library(knitr)
#  
#  fig.path <- if(pkgdown::in_pkgdown()) {
#    "benchmarks_discretize_files/figure-html/"
#  } else {
#    "bench-figs/bench-"
#  }
#  
#  opts_chunk$set(
#      comment = "",
#      fig.width = 8,
#      fig.height = 8,
#      message = FALSE,
#      warning = FALSE,
#      tidy.opts = list(
#          keep.blank.line = TRUE,
#          width.cutoff = 150
#      ),
#      options(width = 150),
#      eval = TRUE,
#      fig.path = fig.path
#  )

## ---- eval=is.pkg-------------------------------------------------------------
#  library(microbenchmark)
#  library(FSelectorRcpp)
#  library(RWeka)

## ---- eval = FALSE------------------------------------------------------------
#  ## try https:// if https:// URLs are not supported
#  source("https://bioconductor.org/biocLite.R")
#  biocLite("RTCGA")

## ---- eval=is.pkg-------------------------------------------------------------
#  library(RTCGA.rnaseq)
#  BRCA.rnaseq <- RTCGA.rnaseq::BRCA.rnaseq
#  BRCA.rnaseq$bcr_patient_barcode <-
#     factor(substr(BRCA.rnaseq$bcr_patient_barcode, 14, 14))
#  names(BRCA.rnaseq) <- gsub(pattern = "?", replacement = "q",
#                             x = names(BRCA.rnaseq), fixed = TRUE)
#  names(BRCA.rnaseq) <- gsub(pattern = "[[:punct:]]", replacement = "_",
#                             x = names(BRCA.rnaseq))

## ---- eval=is.pkg-------------------------------------------------------------
#  library(ggplot2)
#  library(pbapply)
#  library(tibble)
#  names_by <- function(nameBy, vecBy) {
#    c(paste0(nameBy,
#             sapply(max(nchar(vecBy)) - nchar(vecBy),
#                    function(zeros) {
#                      paste0(rep(0, zeros), collapse = "")
#                    }),
#                    vecBy))
#  }
#  
#  get_times <- function(fun, vecRows, vecCols, nTimes) {
#    forRows <- pblapply(vecRows, function(nRows) {
#      forCols <- pblapply(vecCols + 1, function(nCols) {
#        suppressWarnings(microbenchmark(fun(bcr_patient_barcode ~ .,
#                           BRCA.rnaseq[1:nRows, 1:nCols]),
#                       times = nTimes))
#      })
#      colsNames <- names_by(nameBy = "columns_", vecBy = vecCols)
#      setNames(forCols, colsNames)
#    })
#    rowsNames <- names_by(nameBy = "rows_", vecBy = vecRows)
#    setNames(forRows, rowsNames)
#  }
#  
#  make_df <- function(bm_data, package) {
#    tibble(
#      rows = rep(
#        x = as.integer(
#          gsub(
#            pattern = "rows_0*",
#               replacement = "",
#               x = names(bm_data)
#          )
#        ),
#        each =  unique(sapply(bm_data, length))
#      ),
#      columns = as.integer(
#        gsub(
#          pattern = "columns_0*",
#          replacement = "",
#          x = as.vector(sapply(bm_data, names))
#        )
#      ),
#      time = as.vector(
#        sapply(
#          bm_data,
#          function(setOfCols) {
#            sapply(
#              setOfCols,
#              function(set) {
#                median(set$time) / 10 ^ 9
#              }
#            )
#          }
#        )
#      ),
#      package = package
#    )
#  }
#  
#  bm_fsr_discretize <- get_times(fun = discretize,
#                                 vecRows = seq(from = 200, to = 1000, by = 200),
#                                 vecCols = c(10, 100, 500, 1000),
#                                 nTimes = 1L)
#  bm_rwk_discretize <- get_times(fun = Discretize,
#                                 vecRows = seq(from = 200, to = 1000, by = 200),
#                                 vecCols = c(10, 100, 500, 1000),
#                                 nTimes = 1L)
#  
#  df_fsr <- make_df(bm_data = bm_fsr_discretize, package = "FSelectorRcpp")
#  df_rwk <- make_df(bm_data = bm_rwk_discretize, package = "RWeka")
#  bm_df <- rbind(df_fsr, df_rwk)
#  
#  make_plot <- function(y) {
#    yUnit <- gsub(pattern = "time",
#                  replacement = "s",
#                  x = y)
#  
#    ggplot(data = bm_df, aes_string(x = "rows", y = y, color = "package")) +
#    facet_grid(columns ~ ., labeller = label_both) +
#    geom_point() +
#    xlab("Number of rows [-]") +
#    ylab(paste0("Elapsed time [", yUnit, "]")) +
#    ggtitle(label = "Microbenchmark: discretization function comparison",
#            subtitle = "for different sizes of data sets") +
#    theme(plot.title = element_text(hjust = 0.5),
#          plot.subtitle = element_text(hjust = 0.5),
#          legend.position = "top")
#  }
#  
#  bm_plot <- make_plot("time")
#  bm_plot_log <- make_plot("log(time)")

## ----plots, eval=is.pkg, echo=FALSE-------------------------------------------
#  bm_plot
#  bm_plot_log

Try the FSelectorRcpp package in your browser

Any scripts or data that you put into this service are public.

FSelectorRcpp documentation built on April 28, 2023, 5:07 p.m.