Introduction
In maditr: Fast Data Aggregation, Modification, and Filtering with Pipes and 'data.table'

knitr::opts_chunk$set(echo = TRUE)
data.table::setDTthreads(2)

Overview

Package provides pipe-style interface for data.table package. It preserves all data.table features without significant impact on performance. let and take functions are simplified interfaces for most common data manipulation tasks.

To select rows from data: rows(mtcars, am==0)
To select columns from data: columns(mtcars, mpg, vs:carb)
To aggregate data: take(mtcars, mean_mpg = mean(mpg), by = am)
To aggregate all non-grouping columns: take_all(mtcars, mean, by = am)
To aggregate several columns with one summary: take(mtcars, mpg, hp, fun = mean, by = am)
To get total summary skip by argument: take_all(mtcars, mean)
Use magrittr pipe %>% to chain several operations:

     mtcars %>%
        let(mpg_hp = mpg/hp) %>%
        take(mean(mpg_hp), by = am)

To modify variables or add new variables:

      mtcars %>%
         let(new_var = 42,
             new_var2 = new_var*hp) %>%
         head()

To drop variable assign NULL: let(mtcars, am = NULL) %>% head()
To modify all non-grouping variables:

    iris %>%
      let_all(
          scaled = (.x - mean(.x))/sd(.x),
          by = Species) %>%
       head()

To aggregate all variables conditionally on name:

    iris %>%
      take_all(
          mean = if(startsWith(.name, "Sepal")) mean(.x),
          median = if(startsWith(.name, "Petal")) median(.x),
          by = Species
      )

For parametric assignment use :=:

    new_var = "my_var"
    old_var = "mpg"
    mtcars %>%
        let((new_var) := get(old_var)*2) %>%
        head()

    # or,  
    expr = quote(mean(cyl))
    mtcars %>% 
        let((new_var) := eval(expr)) %>% 
        head()

    # the same with `take` 
    by_var = "vs,am"
    take(mtcars, (new_var) := eval(expr), by = by_var)

query_if function translates its arguments one-to-one to [.data.table method. Additionally there are some conveniences such as automatic data.frame conversion to data.table.

vlookup & xlookup

Let's make datasets for lookups:

library(maditr)

workers = fread("
    name company
    Nick Acme
    John Ajax
    Daniela Ajax
")

positions = fread("
    name position
    John designer
    Daniela engineer
    Cathie manager
")

# xlookup
workers = let(workers,
  position = xlookup(name, positions$name, positions$position)
)

# vlookup
# by default we search in the first column and return values from second column
workers = let(workers,
  position = vlookup(name, positions, no_match = "Not found")
)

# the same 
workers = let(workers,
  position = vlookup(name, positions, 
                     result_column = "position", 
                     no_match = "Not found") # or, result_column = 2 
)

head(workers)

More examples

We will use for demonstartion well-known mtcars dataset and some examples from dplyr package.

library(maditr)
data(mtcars)

# Newly created variables are available immediately
mtcars %>%
    let(
        cyl2 = cyl * 2,
        cyl4 = cyl2 * 2
    ) %>% head()

# You can also use let() to remove variables and
# modify existing variables
mtcars %>%
    let(
        mpg = NULL,
        disp = disp * 0.0163871 # convert to litres
    ) %>% head()


# window functions are useful for grouped computations
mtcars %>%
    let(rank = rank(-mpg, ties.method = "min"),
        by = cyl) %>%
    head()

# You can drop variables by setting them to NULL
mtcars %>%
    let(cyl = NULL) %>%
    head()

# keeps all existing variables
mtcars %>%
    let(displ_l = disp / 61.0237) %>%
    head()

# keeps only the variables you create
mtcars %>%
    take(displ_l = disp / 61.0237) %>% 
    head()


# can refer to both contextual variables and variable names:
var = 100
mtcars %>%
    let(cyl = cyl * var) %>%
    head()

# select rows
mtcars %>%
    rows(am==0) %>% 
    head()

# select rows with compound condition
mtcars %>%
    rows(am==0 & mpg>mean(mpg))

# select columns
mtcars %>% 
    columns(vs:carb, cyl)

mtcars %>% 
    columns(-am, -cyl)    

# regular expression pattern
columns(iris, "^Petal") %>% head() # variables which start from 'Petal'
columns(iris, "Width$") %>% head() # variables which end with 'Width'

# move Species variable to the front
# pattern "^." matches all variables
columns(iris, Species, "^.") %>% head()

# pattern "^.*al" means "contains 'al'"
columns(iris, "^.*al") %>% head()

# numeric indexing - all variables except Species
columns(iris, 1:4) %>% head()

# A 'take' with summary functions applied without 'by' argument returns an aggregated data
mtcars %>%
    take(mean = mean(disp), n = .N)

# Usually, you'll want to group first
mtcars %>%
    take(mean = mean(disp), n = .N, by = am)

# grouping by multiple variables
mtcars %>%
    take(mean = mean(disp), n = .N, by = list(am, vs))

# You can group by expressions:
mtcars %>%
    take_all(
        mean,
        by = list(vsam = vs + am)
    )

# modify all non-grouping variables in-place
mtcars %>%
    let_all((.x - mean(.x))/sd(.x), by = am) %>%
    head()

# modify all non-grouping variables to new variables
mtcars %>%
    let_all(scaled = (.x - mean(.x))/sd(.x), by = am) %>%
    head()

# conditionally modify all variables
iris %>%
    let_all(mean = if(is.numeric(.x)) mean(.x)) %>%
    head()

# modify all variables conditionally on name
iris %>%
    let_all(
        mean = if(startsWith(.name, "Sepal")) mean(.x),
        median = if(startsWith(.name, "Petal")) median(.x),
        by = Species
    ) %>%
    head()

# aggregation with 'take_all'
mtcars %>%
    take_all(mean = mean(.x), sd = sd(.x), n = .N, by = am)

# conditionally aggregate all variables
iris %>%
    take_all(mean = if(is.numeric(.x)) mean(.x))

# aggregate all variables conditionally on name
iris %>%
    take_all(
        mean = if(startsWith(.name, "Sepal")) mean(.x),
        median = if(startsWith(.name, "Petal")) median(.x),
        by = Species
    )

# parametric evaluation:
var = quote(mean(cyl))
mtcars %>% 
    let(mean_cyl = eval(var)) %>% 
    head()
take(mtcars, eval(var))

# all together
new_var = "mean_cyl"
mtcars %>% 
    let((new_var) := eval(var)) %>% 
    head()
take(mtcars, (new_var) := eval(var))

Variable selection in the expressions

You can use 'columns' inside expression in the 'take'/'let'. 'columns' will be replaced with data.table with selected columns. In 'let' in the expressions with ':=', 'cols' or '%to%' can be placed in the left part of the expression. It is usefull for multiple assignment. There are four ways of column selection:

Simply by column names
By variable ranges, e. g. vs:carb. Alternatively, you can use '%to%' instead of colon: 'vs %to% carb'.
With regular expressions. Characters which start with '^' or end with $ considered as Perl-style regular expression patterns. For example, '^Petal' returns all variables started with 'Petal'. 'Width$' returns all variables which end with 'Width'. Pattern '^.' matches all variables and pattern '^.*my_str' is equivalent to contains "my_str"'.
By character variables with interpolated parts. Expression in the curly brackets inside characters will be evaluated in the parent frame with 'text_expand' function. For example, a{1:3} will be transformed to the names 'a1', 'a2', 'a3'. 'cols' is just a shortcut for 'columns'.

# range selection
iris %>% 
    let(
        avg = rowMeans(Sepal.Length %to% Petal.Width)
    ) %>% 
    head()

# multiassignment
iris %>% 
    let(
        # starts with Sepal or Petal
        multipled1 %to% multipled4 := cols("^(Sepal|Petal)")*2
    ) %>% 
    head()


mtcars %>% 
    let(
        # text expansion
        cols("scaled_{names(mtcars)}") := lapply(cols("{names(mtcars)}"), scale)
    ) %>% 
    head()

# range selection in 'by'
# selection of range + additional column
mtcars %>% 
    take(
        res = sum(cols(mpg, disp %to% drat)),
        by = vs %to% gear
    )

Joins

Here we use the same datasets as with lookups:

workers = fread("
    name company
    Nick Acme
    John Ajax
    Daniela Ajax
")

positions = fread("
    name position
    John designer
    Daniela engineer
    Cathie manager
")

workers
positions

Different kinds of joins:

workers %>% dt_inner_join(positions)
workers %>% dt_left_join(positions)
workers %>% dt_right_join(positions)
workers %>% dt_full_join(positions)

# filtering joins
workers %>% dt_anti_join(positions)
workers %>% dt_semi_join(positions)

To suppress the message, supply by argument:

workers %>% dt_left_join(positions, by = "name")

Use a named by if the join variables have different names:

positions2 = setNames(positions, c("worker", "position")) # rename first column in 'positions'
workers %>% dt_inner_join(positions2, by = c("name" = "worker"))

Any scripts or data that you put into this service are public.

maditr documentation built on Jan. 19, 2026, 9:06 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

maditr
Fast Data Aggregation, Modification, and Filtering with Pipes and 'data.table'

Introduction
In maditr: Fast Data Aggregation, Modification, and Filtering with Pipes and 'data.table'

Links

Overview

vlookup & xlookup

More examples

Variable selection in the expressions

Joins

Try the maditr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

maditr Fast Data Aggregation, Modification, and Filtering with Pipes and 'data.table'

Introduction In maditr: Fast Data Aggregation, Modification, and Filtering with Pipes and 'data.table'

Links

Overview

vlookup & xlookup

More examples

Variable selection in the expressions

Joins

Try the maditr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

maditr
Fast Data Aggregation, Modification, and Filtering with Pipes and 'data.table'

Introduction
In maditr: Fast Data Aggregation, Modification, and Filtering with Pipes and 'data.table'