knitr::opts_chunk$set(echo = TRUE, collapse = TRUE, comment = "#>")
library(tbltools)
options(tbltools.print_tibble = FALSE, tbltools.print_n = 4)
## function to compare with dplyr (ignore non data.frame class diffs)
same_as_dplyr <- function(d1, d2) {
  if (identical(d1, d2)) return(TRUE)
  if (is.data.frame(d1) && is.data.frame(d2)) {
    d1 <- as.data.frame(d1)
    d1at <- attributes(d1)
    attributes(d1) <- d1at[order(names(d1at))]
    d2 <- as.data.frame(d2)
    d2at <- attributes(d2)
    attributes(d2) <- d2at[order(names(d2at))]
  }
  identical(d1, d2)
}
set.seed(12)

tbltools

Build status CRAN status Coverage status

Downloads Downloads lifecycle

Tools for Working with Tibbles

Installation

Install from CRAN with:

## install {tbltools} from CRAN
install.packages("tbltools")

Or install the development version from Github with:

## install remotes pkg if not already
if (!requireNamespace("remotes")) {
  install.packages("remotes")
}

## install from github
remotes::install_github("mkearney/tbltools")

Features

Use

## convert mtcars to tibble and create row_names variable
d <- as_tbl_data(mtcars, row_names = TRUE)
## create data frame where 2 variable depends on evaluation of 1st variable
tbl_data_frame(
  x = rnorm(20),
  y = rnorm(20) + x
) %>%
  cor()

Slice

## select rows 1, 3, 5, 25 of data
slice_data(d, c(1, 3, 5, 25))

Compare with dplyr::slice()

same_as_dplyr(
  slice_data(d, c(1, 3, 5, 25)),
  dplyr::slice(d, c(1, 3, 5, 25))
)

Filter

## count by cyl and gear
filter_data(d, gear > 3 | mpg > 30, vs == 1)

Compare with dplyr::filter()

same_as_dplyr(
  filter_data(d, gear > 3 | mpg > 30, vs == 1),
  dplyr::filter(d, gear > 3 | mpg > 30, vs == 1)
)

Arrange

## count by cyl and gear
arrange_data(d, gear, cyl)

Compare with dplyr::arrange()

same_as_dplyr(
  arrange_data(d, gear, cyl),
  dplyr::arrange(d, gear, cyl)
)

Select

## select only these columns
select_data(d, row_names:hp, -disp, gear, weight = wt)

Compare with dplyr::select()

same_as_dplyr(
  select_data(d, cyl, gear, weight = wt),
  dplyr::select(d, cyl, gear, weight = wt)
)

Mutate

## select cyl, gear, mpg columns and create new logical column for more efficient cars
d %>%
  select_data(cyl, gear, mpg) %>%
  mutate_data(eff = mpg > 20)

Compare with dplyr::mutate()

same_as_dplyr(
  d %>%
    select_data(cyl, gear, mpg) %>%
    mutate_data(eff = mpg > 20),
  d %>%
    dplyr::select(cyl, gear, mpg) %>%
    dplyr::mutate(eff = mpg > 20)
)

Summarise

## select cyl, gear, mpg columns and create new logical column for more efficient cars
summarise_data(d, mpg = mean(mpg), wt = mean(wt))

Compare with dplyr::summarise()

same_as_dplyr(
  summarise_data(d, mpg = mean(mpg), wt = mean(wt)),
  dplyr::summarise(d, mpg = mean(mpg), wt = mean(wt))
)

Group by

## group by cyl
d %>%
  select_data(mpg:cyl, gear) %>%
  group_by_data(cyl, gear) %>%
  mutate_data(n = length(gear)) %>%
  summarise_data(
    n = unique(n),
    mpg = mean(mpg)
  )

Compare with dplyr::group_by()

same_as_dplyr(
  d %>%
    select_data(cyl, gear, mpg) %>%
    group_by_data(cyl) %>%
    mutate_data(n = length(gear)) %>%
    summarise_data(
      n = unique(n),
      mpg_total = sum(mpg),
      mpg = mean(mpg)
    ) %>%
    arrange_data(cyl) %>%
    select_data(cyl, mpg_total, mpg, n),
  d %>%
    dplyr::select(cyl, gear, mpg) %>%
    dplyr::group_by(cyl) %>%
    dplyr::mutate(n = length(gear)) %>%
    dplyr::summarise(
      n = unique(n),
      mpg_total = sum(mpg),
      mpg = mean(mpg)
    ) %>%
    dplyr::arrange(cyl) %>%
    dplyr::select(cyl, mpg_total, mpg, n)
)

Bind rows

## create version of data with new variable
dd <- d
dd$new_var <- sample(letters, nrow(d), replace = TRUE)

## combine multiple data sets into list
lst <- list(d, d, dd)

## bind rows into single data frame
bind_rows_data(lst, fill = TRUE)

Compare to dplyr::bind_rows()

same_as_dplyr(
  bind_rows_data(lst, fill = TRUE),
  dplyr::bind_rows(lst)
)

Joins

## mtcars data and additional cyl/new data
x <- tbltools::as_tbl_data(mtcars)
y <- data.frame(cyl = c(1, 4), new = c(1.25, 2.5))
## join according to x
left_join_data(x, y)

Compare with dplyr::left_join():

same_as_dplyr(
  left_join_data(x, y),
  dplyr::left_join(x, y)
)
## join by y
right_join_data(x, y)

Compare with dplyr::left_join():

same_as_dplyr(
  right_join_data(x, y),
  dplyr::right_join(x, y)
)
## join by x and y
full_join_data(x, y)

Compare with dplyr::left_join():

same_as_dplyr(
  full_join_data(x, y),
  dplyr::full_join(x, y)
)

Frequency tables

## count by cyl and gear
tabsort(d, cyl, gear)


mkearney/tbltools documentation built on May 14, 2019, 4:02 a.m.