| maditr-package | R Documentation |
Package provides pipe-style interface for data.table. It preserves
all data.table features without significant impact on performance. 'let'
and 'take' functions are simplified interfaces for most common data
manipulation tasks.
To select rows from data: rows(mtcars, am==0)
To select columns from data: columns(mtcars, mpg, vs:carb)
To aggregate data: take(mtcars, mean_mpg = mean(mpg), by = am)
To aggregate all non-grouping columns: take_all(mtcars, mean, by = am)
To aggregate several columns with one summary: take(mtcars, mpg, hp, fun = mean, by = am)
To get total summary skip by argument: take_all(mtcars, mean)
Use magrittr pipe '%>%' to chain several operations:
mtcars %>%
let(mpg_hp = mpg/hp) %>%
take(mean(mpg_hp), by = am)
To modify variables or add new variables:
mtcars %>%
let(new_var = 42,
new_var2 = new_var*hp) %>%
head()
To modify all non-grouping variables:
iris %>%
let_all(
scaled = (.x - mean(.x))/sd(.x),
by = Species) %>%
head()
To drop variable assign NULL: let(mtcars, am = NULL) %>% head()
To aggregate all variables conditionally on name:
iris %>%
take_all(
mean = if(startsWith(.name, "Sepal")) mean(.x),
median = if(startsWith(.name, "Petal")) median(.x),
by = Species
)
For parametric assignment use ':=':
new_var = "my_var"
old_var = "mpg"
mtcars %>%
let((new_var) := get(old_var)*2) %>%
head()
For more sophisticated operations see 'query'/'query_if': these
functions translates its arguments one-to-one to '[.data.table'
method. Additionally there are some conveniences such as automatic
'data.frame' conversion to 'data.table'.
Maintainer: Gregory Demin gdemin@gmail.com
Useful links:
# examples form 'dplyr' package
data(mtcars)
# Newly created variables are available immediately
mtcars %>%
let(
cyl2 = cyl * 2,
cyl4 = cyl2 * 2
) %>%
head()
# You can also use let() to remove variables and
# modify existing variables
mtcars %>%
let(
mpg = NULL,
disp = disp * 0.0163871 # convert to litres
) %>%
head()
# window functions are useful for grouped computations
mtcars %>%
let(rank = rank(-mpg, ties.method = "min"),
by = cyl) %>%
head()
# You can drop variables by setting them to NULL
mtcars %>% let(cyl = NULL) %>% head()
# keeps all existing variables
mtcars %>%
let(displ_l = disp / 61.0237) %>%
head()
# keeps only the variables you create
mtcars %>%
take(displ_l = disp / 61.0237)
# can refer to both contextual variables and variable names:
var = 100
mtcars %>%
let(cyl = cyl * var) %>%
head()
# select rows
mtcars %>%
rows(am==0) %>%
head()
# select rows with compound condition
mtcars %>%
rows(am==0 & mpg>mean(mpg))
# select columns
mtcars %>%
columns(vs:carb, cyl)
mtcars %>%
columns(-am, -cyl)
# regular expression pattern
columns(iris, "^Petal") # variables which start from 'Petal'
columns(iris, "Width$") # variables which end with 'Width'
# move Species variable to the front
# pattern "^." matches all variables
columns(iris, Species, "^.")
# pattern "^.*al" means "contains 'al'"
columns(iris, "^.*al")
# numeric indexing - all variables except Species
columns(iris, 1:4)
# A 'take' with summary functions applied without 'by' argument returns an aggregated data
mtcars %>%
take(mean = mean(disp), n = .N)
# Usually, you'll want to group first
mtcars %>%
take(mean = mean(disp), n = .N, by = cyl)
# You can group by expressions:
mtcars %>%
take_all(mean, by = list(vsam = vs + am))
# modify all non-grouping variables in-place
mtcars %>%
let_all((.x - mean(.x))/sd(.x), by = am) %>%
head()
# modify all non-grouping variables to new variables
mtcars %>%
let_all(scaled = (.x - mean(.x))/sd(.x), by = am) %>%
head()
# conditionally modify all variables
iris %>%
let_all(mean = if(is.numeric(.x)) mean(.x)) %>%
head()
# modify all variables conditionally on name
iris %>%
let_all(
mean = if(startsWith(.name, "Sepal")) mean(.x),
median = if(startsWith(.name, "Petal")) median(.x),
by = Species
) %>%
head()
# aggregation with 'take_all'
mtcars %>%
take_all(mean = mean(.x), sd = sd(.x), n = .N, by = am)
# conditionally aggregate all variables
iris %>%
take_all(mean = if(is.numeric(.x)) mean(.x))
# aggregate all variables conditionally on name
iris %>%
take_all(
mean = if(startsWith(.name, "Sepal")) mean(.x),
median = if(startsWith(.name, "Petal")) median(.x),
by = Species
)
# parametric evaluation:
var = quote(mean(cyl))
mtcars %>%
let(mean_cyl = eval(var)) %>%
head()
take(mtcars, eval(var))
# all together
new_var = "mean_cyl"
mtcars %>%
let((new_var) := eval(var)) %>%
head()
take(mtcars, (new_var) := eval(var))
########################################
# variable selection
# range selection
iris %>%
let(
avg = rowMeans(Sepal.Length %to% Petal.Width)
) %>%
head()
# multiassignment
iris %>%
let(
# starts with Sepal or Petal
multipled1 %to% multipled4 := cols("^(Sepal|Petal)")*2
) %>%
head()
mtcars %>%
let(
# text expansion
cols("scaled_{names(mtcars)}") := lapply(cols("{names(mtcars)}"), scale)
) %>%
head()
# range selection in 'by'
# range selection + additional column
mtcars %>%
take(
res = sum(cols(mpg, disp %to% drat)),
by = vs %to% gear
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.