#' Data Manipulation with Parellelism and Shared Memory Matrices
#'
#' @description
#' Provides a new form of data frame backed by shared memory matrices and
#' a way to manipulate them. Upon creation these data frames are shared
#' across multiple local nodes to allow for simple parallel processing. Run the
#' following command for a more thorough explanation: \code{vignette("basics")}
#'
#' @section Major differences from dplyr:
#'
#' \code{summarise} with dplyr will return a single number, but here it
#' will return N values depending on how many nodes there are. Typically
#' you should follow \code{summarise} with \code{\link{reduce}}, which is
#' run locally.
#'
#' @section Standard dplyr-like functions:
#' \tabular{ll}{
#' \code{\link{arrange}} \tab Sort data \cr
#' \code{\link{distinct}} \tab Select unique rows or unique combinations of variables \cr
#' \code{\link{filter}} \tab Filter data \cr
#' \code{\link{group_by}} \tab Group data \cr
#' \code{\link{group_sizes}} \tab Return size of groups \cr
#' \code{\link{groupwise}} \tab Use grouped data (also known as \code{ungroup})\cr
#' \code{\link{mutate}} \tab Change values of existing variables (and create new ones) \cr
#' \code{\link{n_groups}} \tab Return number of groups \cr
#' \code{\link{rename}} \tab Rename variables \cr
#' \code{\link{rowwise}} \tab Use data as individual rows \cr
#' \code{\link{select}} \tab Retain only specified variables \cr
#' \code{\link{slice}} \tab Select rows by position\cr
#' \code{\link{summarise}} \tab Summarise data \cr
#' \code{\link{transmute}} \tab Change variables and drop all others \cr
#' }
#'
#' @section Parallel functions:
#' \tabular{ll}{
#' \code{\link{partition_even}} \tab Partition data evenly amongst cluster nodes \cr
#' \code{\link{partition_group}} \tab Partition data so that each group is wholly on a node \cr
#' \code{\link{within_group}} \tab Execute code within a group \cr
#' \code{\link{within_node}} \tab Execute code within a group \cr
#' }
#'
#' @section Additional data frame functions:
#' \tabular{ll}{
#' \code{\link{Multiplyr}} \tab Create new parallel data frame \cr
#' \code{\link{define}} \tab Define new variables \cr
#' \code{\link{nsa}} \tab No strings attached mode \cr
#' \code{\link{reduce}} \tab Summarise locally only \cr
#' \code{\link{regroup}} \tab Return to grouped data \cr
#' \code{\link{undefine}} \tab Delete variables \cr
#' }
#'
#' @section Data manipulation adjuncts:
#' \tabular{ll}{
#' \code{\link{between}} \tab Tests whether elements of a vector lie between two values (inclusively) \cr
#' \code{\link{cumall}} \tab Cumulative all \cr
#' \code{\link{cumany}} \tab Cumulative any \cr
#' \code{\link{cummean}} \tab Cumulative mean \cr
#' \code{\link{first}} \tab Returns first value in vector \cr
#' \code{\link{last}} \tab Returns last value in vector \cr
#' \code{\link{lag}} \tab Offset x backwards by n \cr
#' \code{\link{lead}} \tab Offset x forwards by n \cr
#' \code{\link{n}} \tab Number of items in current group \cr
#' \code{\link{nth}} \tab Return the nth item from a vector \cr
#' }
#'
#' @importFrom bigmemory sub.big.matrix attach.big.matrix mwhich mpermute
#' @importFrom bigmemory.sri describe
#' @importFrom magrittr %>%
#' @importFrom parallel makeCluster stopCluster clusterExport clusterEvalQ
#' @importClassesFrom bigmemory big.matrix big.matrix.descriptor
#' @docType package
#' @name multiplyr
NULL
if(getRversion() >= "2.15.1") {
# Avoid NOTEs during check about lack of global variable bindings
utils::globalVariables(c(".Gbase", ".end", ".expr", ".grouped", ".groups",
".local", ".rows", ".start", ".tg", ".gcdesc",
".offset"))
}
.onLoad <- function (libname, pkgname) {
op <- options()
op.multiplyr <- list(
multiplyr.cores = as.numeric (Sys.getenv ("R_MULTIPLYR_CORES", unset=parallel::detectCores()-1))
)
toset <- (!names(op.multiplyr) %in% names(op))
if (any(toset)) {
options(op.multiplyr[toset])
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.