modify: Fast modification of data.frames and data.tables by reference

Description Usage Arguments Examples

View source: R/modify.R

Description

modify is essentially just a wrapper for data.table syntax but it can be used for other data containers. It is thought as an addition to dplyr functions. While the functionality can be replicated by mutate, modify can be much faster and more concise if only values for selected rows shall be modified.

Usage

1
modify(.data, .where, .by = NULL, ..., .envir = parent.frame())

Arguments

.data

a data.frame, data.table or dplyr tbl object

.where

optional a boolean conditions that specifies the rows that shall be modifed

.by

optional a vector of column names used for computations that are splitted by groups

...

formulas for columns that are modified or newly created

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
## Not run: 
  
  library(microbenchmark)
  library(modify)

  n = 10
  df = data.frame(a= sample(1:3,n,replace=TRUE),
                   b= sample(1:100,n,replace=TRUE),
                   x=rnorm(n))
  # Set x to 100 where a==2
  modify(df,a==2, x=100)
  df
  # Set x to the mean value of b*100 in each group of a
  modify(df,.by=c("a"),
         x=mean(b)*100)
  df
  
  # Call with strings
  com = "x=200"
  s_modify(df,"a==2", com)
  

  
  # Benckmark compared to directly using data.table or dplyr 
  n = 1e6
  df = data.frame(a= sample(1:5,n,replace=TRUE),
                   b= sample(1:100,n,replace=TRUE),
                   x=rnorm(n))
  dt = as.data.table(df)
  
  tbl = as.tbl(df)  
  modify(tbl, a==2,x = x+100)
  mutate(df, x=ifelse(a==2,x+100,x))
  
  tbl
  microbenchmark(times = 5L,
    modify(tbl,a==2, x = x+100),
    modify(df,a==2, x = x+100),
    modify(dt,a==2, x = x+100),
    dt[a==2,x:=x+100],
    mutate.df = mutate(df, x=ifelse(a==2,x+100,x)),
    mutate.tbl = mutate(tbl, x=ifelse(a==2,x+100,x))
  )
  # Substantial speed increases compared to mutate with ifelse
  # and not much slower than directly using data.table syntax

## End(Not run)

skranz/modify documentation built on May 30, 2019, 3:01 a.m.