basics.R
In multiplyr: Data Manipulation with Parallelism and Shared Memory Matrices

## ---- echo = FALSE, message = FALSE--------------------------------------
library(multiplyr)
knitr::opts_chunk$set(
  comment = NA,
  error   = FALSE,
  tidy    = FALSE,
  eval    = FALSE)

## ------------------------------------------------------------------------
#  # Construct a new data frame
#  dat <- Multiplyr (x=1:100, G=rep(c("A", "B", "C", "D"), length.out=100))
#  
#  # Group data by G
#  dat %>% group_by (G)
#  
#  # Summarise length(x) in each group and store in N
#  dat %>% summarise (N = length(x))

## ------------------------------------------------------------------------
#  dat <- Multiplyr (x=1:100, y=1:100)
#  
#  dat %>% filter(x<=50) %>% mutate (y=x*2) %>% select (y)

## ------------------------------------------------------------------------
#  dat <- Multiplyr (x=1:100)
#  
#  dat %>% mutate (x=length(x):1)

## ------------------------------------------------------------------------
#  # Create a new data frame with 2 columns called x and y
#  dat <- Multiplyr (x=1:100, y=rep(1, 100))
#  
#  # As above, but allocate space for 1 new column and create cluster with 2 nodes
#  dat <- Multiplyr (x=1:100, y=rep(1, 100), alloc=1, cl=2)
#  
#  # Convert back to standard data frame
#  dat.df <- as.data.frame (dat)
#  dat.df <- dat %>% as.data.frame()

## ------------------------------------------------------------------------
#  # Create new data frame with space for 3 new columns
#  dat <- Multiplyr (x=1:100, alloc=3)
#  
#  # Create 2 new columns named y and z
#  # Note that this define is actually not needed as mutate will define implicitly
#  dat %>% define (y, z)
#  
#  # Do things with new columns
#  dat %>% mutate (y=x*2, z=sqrt(x))

## ------------------------------------------------------------------------
#  # Create new Multiplyr frame with two columns (A and P) with space for 3 more
#  dat <- Multiplyr (A=rep(c("A", "B", "C", "D"), each=25),
#                    P=rep(c("p", "q"), each=50),
#                    alloc=3)
#  
#  # Create new columns named newA and newP
#  dat %>% define (newA=A, newP=P)
#  
#  # Set their values
#  dat %>% mutate (newA="A", newP="p")

## ------------------------------------------------------------------------
#  # Create a new Multiplyr frame with variables named x, y and z
#  dat <- Multiplyr (x=1:100, y=100:1, z=rnorm(100))
#  
#  # Rename to p, q and r
#  dat %>% rename (p=x, q=y, r=z)

## ------------------------------------------------------------------------
#  # Create Multiplyr data frame with columns named x, y, z and misc
#  dat <- Multiplyr (x=1:100, y=100:1, z=rnorm(100), misc=rep(1, 100))
#  
#  # Drop column named misc
#  dat %>% undefine (misc)
#  
#  # Keep only the x and y columns
#  dat %>% select (x, y)

## ------------------------------------------------------------------------
#  # Load data on guinea pig tooth growth:
#  #  len    Odontoblast (tooth cell) length
#  #  supp   Supplement (VC = Vit C supplement, OJ = Orange Juice)
#  #  dose   Dose in mg (0.5, 1 or 2)
#  #
#  data (ToothGrowth)
#  
#  # Convert into a Multiplyr data frame with space for 2 new columns
#  dat <- Multiplyr (ToothGrowth, alloc=2)
#  
#  # Group data by supplement and dose
#  dat %>% group_by (supp, dose)
#  
#  # Produce summary statistics
#  dat %>% summarise (len.mean = mean(len), len.sd = sd(len))

## ------------------------------------------------------------------------
#  # Load data on guinea pig tooth growth:
#  data (ToothGrowth)
#  dat <- Multiplyr (ToothGrowth)
#  
#  # Group data by supplement and dose
#  dat %>% group_by (supp, dose)
#  
#  # Ungroups data
#  dat %>% ungroup()
#  
#  # Regroups data (implicitly by supplement and dose)
#  dat %>% regroup()

## ------------------------------------------------------------------------
#  # Load data on guinea pig tooth growth:
#  data (ToothGrowth)
#  dat <- Multiplyr (ToothGrowth)
#  
#  # Group by supplement
#  dat %>% group_by (supp)
#  
#  # Select only guinea pigs with a dose >= 1 mg/kg
#  dat %>% filter (dose >= 1)
#  
#  # Produce summary
#  dat %>% summarise (mean.len = mean(len))

## ------------------------------------------------------------------------
#  # Create new Multiplyr data frame
#  dat <- Multiplyr (A=rep(c("A", "B", "C", "D"), each=25),
#                    P=rep(c("p", "q"), length.out=100))
#  
#  # Filter so only one of each AxP combination
#  dat %>% distinct()
#  
#  # Filter so only one of each A
#  dat %>% distinct(A)

## ------------------------------------------------------------------------
#  # Construct a new data frame
#  dat <- Multiplyr (x=1:100, G=rep(c("A", "B", "C", "D"), length.out=100))
#  
#  # Return the first 10 rows only
#  dat %>% slice (1:10)
#  
#  # Return alternate rows
#  dat %>% slice (rep(c(TRUE, FALSE), length.out=10))

## ------------------------------------------------------------------------
#  # Construct a new data frame
#  dat <- Multiplyr (x=1:100, G=rep(c("A", "B", "C", "D"), length.out=100))
#  
#  # Group by G
#  dat %>% group_by (G)
#  
#  # Return the first 10 rows in each group
#  dat %>% slice (1:10, each=TRUE)

## ------------------------------------------------------------------------
#  # Construct a new data frame
#  dat <- Multiplyr (x=1:100, G=rep(c("A", "B", "C", "D"), length.out=100))
#  
#  # Return the first 10 rows in each node
#  dat %>% slice (1:10, each=TRUE)

## ------------------------------------------------------------------------
#  # Create new data frame
#  dat <- Multiplyr (G=rep(c("A", "B", "C", "D"), each=25), x=100:1)
#  
#  # Sort by G, then by x
#  dat %>% arrange (G, x)

## ------------------------------------------------------------------------
#  # Construct a new data frame with space for 2 new columns
#  dat <- Multiplyr (x=1:100, alloc=2)
#  
#  # Update all cells in the x column to be twice their value
#  dat %>% mutate (x=x*2)
#  
#  # Create 2 new columns and populate them with data
#  dat %>% mutate (y=x*2, z=sqrt(x))

## ------------------------------------------------------------------------
#  # Construct a new data frame with space for 2 new columns
#  dat <- Multiplyr (x=1:100, alloc=2)
#  
#  # Create new columns (y and z), drop x in the process
#  dat %>% transmute (y=x*2, z=sqrt(x))

## ------------------------------------------------------------------------
#  # Load data on guinea pig tooth growth:
#  data (ToothGrowth)
#  
#  # Convert into a Multiplyr data frame with space for 2 new columns
#  dat <- Multiplyr (ToothGrowth, alloc=2)
#  
#  # Group data by supplement and dose
#  dat %>% group_by (supp, dose)
#  
#  # Produce summary statistics
#  dat %>% summarise (len.mean = mean(len), len.sd = sd(len))

## ------------------------------------------------------------------------
#  dat <- Multiplyr (x=1:100)
#  
#  dat %>% summarise (N=sum(x))

## ------------------------------------------------------------------------
#  dat <- Multiplyr (x=1:100)
#  
#  dat %>% summarise (N=sum(x)) %>% reduce(N=sum(x))

## ------------------------------------------------------------------------
#  # Construct a new data frame
#  dat <- Multiplyr (x=1:100)
#  
#  # Define y and z to be new columns
#  dat %>% define(y, z)
#  
#  # Execute the following code within each node
#  dat %>% within_node ({
#      y <- x * 10
#      z <- sqrt(y)
#  })

## ------------------------------------------------------------------------
#  # Construct a new data frame
#  dat <- Multiplyr (x=1:100, G=rep(c("A", "B", "C", "D"), length.out=100))
#  
#  # Group by G
#  dat %>% group_by (G)
#  
#  # Execute the following block of code within each group
#  dat %>% within_group({
#      N <- length(x)
#      xbar <- sum(x) / N
#  })
#  
#  # Export the data
#  dat %>% summarise(N=N, xbar=xbar)

## ------------------------------------------------------------------------
#  # Create data frame
#  dat <- Multiplyr (G = rep(c("A", "B"), each=50),
#                    m = rep(c(5, 10), each=50),
#                    alloc=1)
#  
#  # Group by G
#  dat %>% group_by (G)
#  
#  # Generate some random data in x with mean of m
#  dat %>% mutate (x=rnorm(length(m), mean=m))
#  
#  # Fit a linear model with just an intercept to x in each group
#  dat %>% within_group ({
#      mdl <- lm (x ~ 1)
#  })
#  
#  # Extract intercept and store it in x.mean
#  dat %>% summarise (x.mean = coef(mdl)[[1]])

## ------------------------------------------------------------------------
#  # Construct a new data frame
#  dat <- Multiplyr (G=rep(c("A", "B", "C", "D"), length.out=100),
#                    H=rep(c("p", "q", "r", "s"), each=25))
#  # Display data
#  dat["G"]
#  dat["H"]
#  
#  # Switch into NSA mode
#  dat %>% nsa()
#  dat["G"]
#  dat["H"]
#  
#  # Do some things
#  dat %>% mutate (G=max(G))
#  
#  # Switch back
#  dat %>% nsa(FALSE)
#  dat["G"]
#  dat["H"]

Any scripts or data that you put into this service are public.

multiplyr documentation built on May 30, 2017, 12:09 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

multiplyr
Data Manipulation with Parallelism and Shared Memory Matrices

inst/doc/basics.R
In multiplyr: Data Manipulation with Parallelism and Shared Memory Matrices

Try the multiplyr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

multiplyr Data Manipulation with Parallelism and Shared Memory Matrices

inst/doc/basics.R In multiplyr: Data Manipulation with Parallelism and Shared Memory Matrices

Try the multiplyr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

multiplyr
Data Manipulation with Parallelism and Shared Memory Matrices

inst/doc/basics.R
In multiplyr: Data Manipulation with Parallelism and Shared Memory Matrices