examples/example-parallelize.R

\dontrun{ # CRAN is super slow when parallelism.
  X <- big_attachExtdata()

  ### Computation on all the matrix
  true <- big_colstats(X)

  big_colstats_sub <- function(X, ind) {
    big_colstats(X, ind.col = ind)
  }
  # 1. the computation is split along all the columns
  # 2. for each part the computation is done, using `big_colstats`
  # 3. the results (data.frames) are combined via `rbind`.
  test <- big_parallelize(X, p.FUN = big_colstats_sub,
                          p.combine = 'rbind', ncores = 2)
  all.equal(test, true)

  ### Computation on a part of the matrix
  n <- nrow(X)
  m <- ncol(X)
  rows <- sort(sample(n, n/2)) # sort to provide some locality in accesses
  cols <- sort(sample(m, m/2)) # idem

  true2 <- big_colstats(X, ind.row = rows, ind.col = cols)

  big_colstats_sub2 <- function(X, ind, rows, cols) {
    big_colstats(X, ind.row = rows, ind.col = cols[ind])
  }
  # This doesn't work because, by default, the computation is spread
  # along all columns. We must explictly specify the `ind` parameter.
  tryCatch(big_parallelize(X, p.FUN = big_colstats_sub2,
                           p.combine = 'rbind', ncores = 2,
                           rows = rows, cols = cols),
           error = function(e) message(e))

  # This now works, using `ind = seq_along(cols)`.
  test2 <- big_parallelize(X, p.FUN = big_colstats_sub2,
                           p.combine = 'rbind', ncores = 2,
                           ind = seq_along(cols),
                           rows = rows, cols = cols)
  all.equal(test2, true2)

}
privefl/bigstatsr documentation built on March 29, 2024, 3:31 a.m.