knitr::opts_chunk$set(echo = TRUE, collapse = TRUE, comment = "#>") library(tbltools) options(tbltools.print_tibble = FALSE, tbltools.print_n = 4) ## function to compare with dplyr (ignore non data.frame class diffs) same_as_dplyr <- function(d1, d2) { if (identical(d1, d2)) return(TRUE) if (is.data.frame(d1) && is.data.frame(d2)) { d1 <- as.data.frame(d1) d1at <- attributes(d1) attributes(d1) <- d1at[order(names(d1at))] d2 <- as.data.frame(d2) d2at <- attributes(d2) attributes(d2) <- d2at[order(names(d2at))] } identical(d1, d2) } set.seed(12)
Tools for Working with Tibbles
Install from CRAN with:
## install {tbltools} from CRAN install.packages("tbltools")
Or install the development version from Github with:
## install remotes pkg if not already if (!requireNamespace("remotes")) { install.packages("remotes") } ## install from github remotes::install_github("mkearney/tbltools")
slice_data()
, arrange_data()
filter_data()
, select_data()
,
mutate_data()
, summarise_data()
, bind_rows_data()
,
bind_cols_data()
, full_join_data()
, left_join_data()
,
right_join_data()
as_tbl_data()
: Convert data frames to tibbles## convert mtcars to tibble and create row_names variable d <- as_tbl_data(mtcars, row_names = TRUE)
tbl_dat_frame()
: Create data framestbl_data_frame()
call## create data frame where 2 variable depends on evaluation of 1st variable tbl_data_frame( x = rnorm(20), y = rnorm(20) + x ) %>% cor()
slice_data()
: Select/keep/return row positions## select rows 1, 3, 5, 25 of data slice_data(d, c(1, 3, 5, 25))
Compare with dplyr::slice()
same_as_dplyr( slice_data(d, c(1, 3, 5, 25)), dplyr::slice(d, c(1, 3, 5, 25)) )
filter_data()
: Filter/keep/return certain rows## count by cyl and gear filter_data(d, gear > 3 | mpg > 30, vs == 1)
Compare with dplyr::filter()
same_as_dplyr( filter_data(d, gear > 3 | mpg > 30, vs == 1), dplyr::filter(d, gear > 3 | mpg > 30, vs == 1) )
arrange_data()
: Organize rows by column(s) value## count by cyl and gear arrange_data(d, gear, cyl)
Compare with dplyr::arrange()
same_as_dplyr( arrange_data(d, gear, cyl), dplyr::arrange(d, gear, cyl) )
select_data()
: Select columns of data frame## select only these columns select_data(d, row_names:hp, -disp, gear, weight = wt)
Compare with dplyr::select()
same_as_dplyr( select_data(d, cyl, gear, weight = wt), dplyr::select(d, cyl, gear, weight = wt) )
mutate_data()
: Wrangle/create variables in data frame## select cyl, gear, mpg columns and create new logical column for more efficient cars d %>% select_data(cyl, gear, mpg) %>% mutate_data(eff = mpg > 20)
Compare with dplyr::mutate()
same_as_dplyr( d %>% select_data(cyl, gear, mpg) %>% mutate_data(eff = mpg > 20), d %>% dplyr::select(cyl, gear, mpg) %>% dplyr::mutate(eff = mpg > 20) )
summarise_data()
: Wrangle/create summary variables in data frame## select cyl, gear, mpg columns and create new logical column for more efficient cars summarise_data(d, mpg = mean(mpg), wt = mean(wt))
Compare with dplyr::summarise()
same_as_dplyr( summarise_data(d, mpg = mean(mpg), wt = mean(wt)), dplyr::summarise(d, mpg = mean(mpg), wt = mean(wt)) )
group_by_data()
: Group observations in data frame [and then mutate and/or summarise]## group by cyl d %>% select_data(mpg:cyl, gear) %>% group_by_data(cyl, gear) %>% mutate_data(n = length(gear)) %>% summarise_data( n = unique(n), mpg = mean(mpg) )
Compare with dplyr::group_by()
same_as_dplyr( d %>% select_data(cyl, gear, mpg) %>% group_by_data(cyl) %>% mutate_data(n = length(gear)) %>% summarise_data( n = unique(n), mpg_total = sum(mpg), mpg = mean(mpg) ) %>% arrange_data(cyl) %>% select_data(cyl, mpg_total, mpg, n), d %>% dplyr::select(cyl, gear, mpg) %>% dplyr::group_by(cyl) %>% dplyr::mutate(n = length(gear)) %>% dplyr::summarise( n = unique(n), mpg_total = sum(mpg), mpg = mean(mpg) ) %>% dplyr::arrange(cyl) %>% dplyr::select(cyl, mpg_total, mpg, n) )
bind_rows_data()
: Collapse list of data frames into single data frame## create version of data with new variable dd <- d dd$new_var <- sample(letters, nrow(d), replace = TRUE) ## combine multiple data sets into list lst <- list(d, d, dd) ## bind rows into single data frame bind_rows_data(lst, fill = TRUE)
Compare to dplyr::bind_rows()
same_as_dplyr( bind_rows_data(lst, fill = TRUE), dplyr::bind_rows(lst) )
## mtcars data and additional cyl/new data x <- tbltools::as_tbl_data(mtcars) y <- data.frame(cyl = c(1, 4), new = c(1.25, 2.5))
left_join_data()
: Join according to first (left) data frame## join according to x left_join_data(x, y)
Compare with dplyr::left_join()
:
same_as_dplyr( left_join_data(x, y), dplyr::left_join(x, y) )
right_join_data()
: Join according to second (right) data frame## join by y right_join_data(x, y)
Compare with dplyr::left_join()
:
same_as_dplyr( right_join_data(x, y), dplyr::right_join(x, y) )
full_join_data()
: Join according to both data frames## join by x and y full_join_data(x, y)
Compare with dplyr::left_join()
:
same_as_dplyr( full_join_data(x, y), dplyr::full_join(x, y) )
tabsort()
: Count frequencies## count by cyl and gear tabsort(d, cyl, gear)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.