knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(formulaic)
library(data.table)
library(knitr)
library(DTwrappers)
data("snack.dat", package = "formulaic")
id.name <- "User ID"
awareness.name <- "Awareness"
satisfaction.name <- "Satisfaction"
age.name <- "Age"
gender.name <- "Gender"
income.name <- "Income"
region.name <- "Region"
product.name = "Product"
persona.name <- "Persona"
bp.patterns <- "BP_"
consumption.name <- "Consumption"
consideration.name <- "Consideration"
advocacy.name <- "Advocacy"
satisfaction.name <- "Satisfaction"
age.group.name <- "Age Group"
income.group.name <- "Income Group"
product.name <- 'Product'

Introduction

Many newcomers to the data.table package are overwhelmed by the unique syntax . DTwrappers is useful in applying data.table with wrapper functions while still maintaining the data.table execution speed.

DTwrappers package has 9 main functions. The main purpose of developing the package is to help users to apply data.table package faster and more convenient.

create.filter.expression(the.filter = NULL)
create.filter.expression(the.filter = c(age.name, region.name))
create.filter.expression(the.filter = age.name == region.name)
create.filter.expression(the.filter = "get(region.name) == 'South'")
create.filter.expression(the.filter = "get(age.name) > 20")
create.filter.expression(the.filter = "get(region.name) == 'South' & get(age.name) > 20")
dt.count(dat = snack.dat)
dt.count(dat = snack.dat,
grouping.variables = c(region.name, gender.name))
dt.count(dat = snack.dat,
the.filter = "Age > 65",
grouping.variables = c(region.name, gender.name),
count.name = "Records with Age > 65")
dt.count(dat = snack.dat,
the.filter = 'get(age.name) > 20',
grouping.variables = c(age.name,income.name),
grouping.type = 'keyby', count.name = "counts")
dt.select(dat = snack.dat, the.variables = c(id.name, awareness.name))
dt.select(dat = snack.dat, the.filter = "Age > 65 & Region == 'Northeast' & Product == 'Tiramisoup' & Awareness == 1", the.variables = c(consideration.name, consumption.name, satisfaction.name, advocacy.name), grouping.variables = c(gender.name)) 
dt.select(dat = snack.dat, the.filter = "Age > 65 & Region == 'Northeast' & Product == 'Tiramisoup' & Awareness == 1", the.variables = c(consideration.name, consumption.name, satisfaction.name, advocacy.name), grouping.variables = c(gender.name), first.k = 2)
dt.select(dat = snack.dat, the.filter = "Age > 65 & Region == 'Northeast' & Product == 'Tiramisoup' & Awareness == 1", the.variables = c(consideration.name, consumption.name, satisfaction.name, advocacy.name), grouping.variables = c(gender.name), last.k = 2)
dt.select(dat = snack.dat, the.filter = "Age > 65 & Region == 'Northeast' & Product == 'Tiramisoup' & Awareness == 1", the.variables = c(consideration.name, consumption.name, satisfaction.name, advocacy.name), grouping.variables = c(gender.name), row.indices = 7:9)
dt.select(dat = formulaic::snack.dat, the.filter = "Age > 65 & Region == 'Northeast' & Product == 'Tiramisoup' & Awareness == 1", the.variables = c(consideration.name, consumption.name, satisfaction.name, advocacy.name), grouping.variables = c(gender.name), first.k = 2, last.k = 2)

Note: Specifying row.indices takes precedence to specifying the parameter first.k; if row.indices is not NULL, then row.indices will be used, and first.k will not. Meanwhile, first.k takes precedence to last.k when both are specified.

snack.dat1 = copy(snack.dat)
snack.dat1 <- dt.define.variable(dat = snack.dat1,
variable.name = "Age Decade",
the.values = snack.dat1[, floor(get(age.name) / 10)])
snack.dat1[1:10, .SD, .SDcols = c(age.name, "Age Decade")]
snack.dat2 = copy(snack.dat)
snack.dat2 <- dt.define.variable(dat = snack.dat2,
variable.name = "Income in Thousands",
the.values = expression(floor(get(income.name) / 10^3)),
specification = "by.expression")
snack.dat2[1:10, .SD, .SDcols = c(income.name, "Income in Thousands")]
snack.dat3 = copy(snack.dat)
snack.dat3 <- dt.define.variable(dat = snack.dat3,
variable.name = "Income in Thousands",
the.values = "floor(get(income.name) / 10^3)",
specification = "by.expression")
snack.dat3[1:10, .SD, .SDcols = c(income.name, "Income in Thousands")]
snack.dat4 = copy(snack.dat)
snack.dat4 <- dt.define.variable(dat = snack.dat4,
variable.name = "Region and Country",
the.values = expression(sprintf('%s, USA', get(region.name))),
specification = "by.expression")
snack.dat4[1:10, .SD, .SDcols = c(region.name, "Region and Country")]
dt.first.k.rows(dat = snack.dat, k = 2, the.variables = c(id.name, age.name, product.name), grouping.variables = gender.name, grouping.type = "by")
dt.first.k.rows(dat = snack.dat, k = 1, the.variables = c(id.name, age.name, product.name), grouping.variables = c(gender.name, region.name), grouping.type = "keyby")
dt.last.k.rows(dat = snack.dat, k = 2, the.variables = c(id.name, age.name, product.name), grouping.variables = gender.name, grouping.type = "by")
dt.last.k.rows(dat = snack.dat, k = 1, the.variables = c(id.name, age.name, product.name), grouping.variables = c(gender.name, region.name), grouping.type = "keyby")


dachosen1/DTwrappers documentation built on Dec. 25, 2019, 8:04 a.m.