Nothing
Tools for Working with Tibbles
Install from CRAN with:
## install {tbltools} from CRAN
install.packages("tbltools")
Or install the development version from Github with:
## install remotes pkg if not already
if (!requireNamespace("remotes")) {
install.packages("remotes")
}
## install from github
remotes::install_github("mkearney/tbltools")
slice_data()
, arrange_data()
filter_data()
,
select_data()
, mutate_data()
, summarise_data()
,
bind_rows_data()
, bind_cols_data()
, full_join_data()
,
left_join_data()
, right_join_data()
as_tbl_data()
: Convert data frames to tibbles## convert mtcars to tibble and create row_names variable
d <- as_tbl_data(mtcars, row_names = TRUE)
tbl_dat_frame()
: Create data framestbl_data_frame()
call## create data frame where 2 variable depends on evaluation of 1st variable
tbl_data_frame(
x = rnorm(20),
y = rnorm(20) + x
) %>%
cor()
#> x y
#> x 1.0000000 0.7827194
#> y 0.7827194 1.0000000
slice_data()
: Select/keep/return row positions## select rows 1, 3, 5, 25 of data
slice_data(d, c(1, 3, 5, 25))
#> # A pseudo tibble: 4 x 12+
#> row_names mpg cyl disp hp drat wt qsec vs am
#> 1) Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1
#> 2) Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1
#> 3) Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0
#> 4) Pontiac Firebird 19.2 8 400 175 3.08 3.845 17.05 0 0
#> +2 column(s) not printed
Compare with dplyr::slice()
same_as_dplyr(
slice_data(d, c(1, 3, 5, 25)),
dplyr::slice(d, c(1, 3, 5, 25))
)
#> [1] TRUE
filter_data()
: Filter/keep/return certain rows## count by cyl and gear
filter_data(d, gear > 3 | mpg > 30, vs == 1)
#> # A pseudo tibble: 11* x 12+
#> row_names mpg cyl disp hp drat wt qsec vs am gear
#> 1) Datsun 710 22.8 4 108.0 93 3.85 2.32 18.61 1 1 4
#> 2) Merc 240D 24.4 4 146.7 62 3.69 3.19 20.00 1 0 4
#> 3) Merc 230 22.8 4 140.8 95 3.92 3.15 22.90 1 0 4
#> 4) Merc 280 19.2 6 167.6 123 3.92 3.44 18.30 1 0 4
#> *7 row(s) not printed; +1 column(s) not printed
Compare with dplyr::filter()
same_as_dplyr(
filter_data(d, gear > 3 | mpg > 30, vs == 1),
dplyr::filter(d, gear > 3 | mpg > 30, vs == 1)
)
#> [1] TRUE
arrange_data()
: Organize rows by column(s) value## count by cyl and gear
arrange_data(d, gear, cyl)
#> # A pseudo tibble: 32* x 12+
#> row_names mpg cyl disp hp drat wt qsec vs am
#> 1) Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0
#> 2) Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0
#> 3) Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0
#> 4) Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0
#> *28 row(s) not printed; +2 column(s) not printed
Compare with dplyr::arrange()
same_as_dplyr(
arrange_data(d, gear, cyl),
dplyr::arrange(d, gear, cyl)
)
#> [1] TRUE
select_data()
: Select columns of data frame## select only these columns
select_data(d, row_names:hp, -disp, gear, weight = wt)
#> # A pseudo tibble: 32* x 6
#> row_names mpg cyl hp gear weight
#> 1) Mazda RX4 21.0 6 110 4 2.620
#> 2) Mazda RX4 Wag 21.0 6 110 4 2.875
#> 3) Datsun 710 22.8 4 93 4 2.320
#> 4) Hornet 4 Drive 21.4 6 110 3 3.215
#> *28 row(s) not printed
Compare with dplyr::select()
same_as_dplyr(
select_data(d, cyl, gear, weight = wt),
dplyr::select(d, cyl, gear, weight = wt)
)
#> [1] TRUE
mutate_data()
: Wrangle/create variables in data
frame## select cyl, gear, mpg columns and create new logical column for more efficient cars
d %>%
select_data(cyl, gear, mpg) %>%
mutate_data(eff = mpg > 20)
#> # A pseudo tibble: 32* x 4
#> cyl gear mpg eff
#> 1) 6 4 21.0 TRUE
#> 2) 6 4 21.0 TRUE
#> 3) 4 4 22.8 TRUE
#> 4) 6 3 21.4 TRUE
#> *28 row(s) not printed
Compare with dplyr::mutate()
same_as_dplyr(
d %>%
select_data(cyl, gear, mpg) %>%
mutate_data(eff = mpg > 20),
d %>%
dplyr::select(cyl, gear, mpg) %>%
dplyr::mutate(eff = mpg > 20)
)
#> [1] TRUE
summarise_data()
: Wrangle/create summary variables in data
frame## select cyl, gear, mpg columns and create new logical column for more efficient cars
summarise_data(d, mpg = mean(mpg), wt = mean(wt))
#> # A pseudo tibble: 1 x 2
#> mpg wt
#> 1) 20.09062 3.21725
Compare with dplyr::summarise()
same_as_dplyr(
summarise_data(d, mpg = mean(mpg), wt = mean(wt)),
dplyr::summarise(d, mpg = mean(mpg), wt = mean(wt))
)
#> [1] TRUE
group_by_data()
: Group observations in data frame [and then
mutate and/or summarise]## group by cyl
d %>%
select_data(mpg:cyl, gear) %>%
group_by_data(cyl, gear) %>%
mutate_data(n = length(gear)) %>%
summarise_data(
n = unique(n),
mpg = mean(mpg)
)
#> # A pseudo tibble: 8 x 4
#> cyl gear n mpg
#> 1) 6 4 4 19.750
#> 2) 4 4 8 26.925
#> 3) 6 3 2 19.750
#> 4) 8 3 12 15.050
#> 5) 4 3 1 21.500
#> 6) 4 5 2 28.200
#> 7) 8 5 2 15.400
#> 8) 6 5 1 19.700
Compare with dplyr::group_by()
same_as_dplyr(
d %>%
select_data(cyl, gear, mpg) %>%
group_by_data(cyl) %>%
mutate_data(n = length(gear)) %>%
summarise_data(
n = unique(n),
mpg_total = sum(mpg),
mpg = mean(mpg)
) %>%
arrange_data(cyl) %>%
select_data(cyl, mpg_total, mpg, n),
d %>%
dplyr::select(cyl, gear, mpg) %>%
dplyr::group_by(cyl) %>%
dplyr::mutate(n = length(gear)) %>%
dplyr::summarise(
n = unique(n),
mpg_total = sum(mpg),
mpg = mean(mpg)
) %>%
dplyr::arrange(cyl) %>%
dplyr::select(cyl, mpg_total, mpg, n)
)
#> [1] TRUE
bind_rows_data()
: Collapse list of data frames into single
data frame## create version of data with new variable
dd <- d
dd$new_var <- sample(letters, nrow(d), replace = TRUE)
## combine multiple data sets into list
lst <- list(d, d, dd)
## bind rows into single data frame
bind_rows_data(lst, fill = TRUE)
#> # A pseudo tibble: 96* x 13+
#> row_names mpg cyl disp hp drat wt qsec vs am gear
#> 1) Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4
#> 2) Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4
#> 3) Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4
#> 4) Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3
#> *92 row(s) not printed; +2 column(s) not printed
Compare to dplyr::bind_rows()
same_as_dplyr(
bind_rows_data(lst, fill = TRUE),
dplyr::bind_rows(lst)
)
#> [1] TRUE
## mtcars data and additional cyl/new data
x <- tbltools::as_tbl_data(mtcars)
y <- data.frame(cyl = c(1, 4), new = c(1.25, 2.5))
left_join_data()
: Join according to first (left) data frame## join according to x
left_join_data(x, y)
#> Joining, by = "cyl"
#> # A pseudo tibble: 32* x 12
#> mpg cyl disp hp drat wt qsec vs am gear carb new
#> 1) 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 NA
#> 2) 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 NA
#> 3) 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 2.5
#> 4) 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 NA
#> *28 row(s) not printed
Compare with dplyr::left_join()
:
same_as_dplyr(
left_join_data(x, y),
dplyr::left_join(x, y)
)
#> Joining, by = "cyl"
#> Joining, by = "cyl"
#> [1] TRUE
right_join_data()
: Join according to second (right) data frame## join by y
right_join_data(x, y)
#> Joining, by = "cyl"
#> # A pseudo tibble: 12* x 12
#> mpg cyl disp hp drat wt qsec vs am gear carb new
#> 1) NA 1 NA NA NA NA NA NA NA NA NA 1.25
#> 2) 22.8 4 108.0 93 3.85 2.32 18.61 1 1 4 1 2.50
#> 3) 24.4 4 146.7 62 3.69 3.19 20.00 1 0 4 2 2.50
#> 4) 22.8 4 140.8 95 3.92 3.15 22.90 1 0 4 2 2.50
#> *8 row(s) not printed
Compare with dplyr::left_join()
:
same_as_dplyr(
right_join_data(x, y),
dplyr::right_join(x, y)
)
#> Joining, by = "cyl"
#> Joining, by = "cyl"
#> [1] TRUE
full_join_data()
: Join according to both data frames## join by x and y
full_join_data(x, y)
#> Joining, by = "cyl"
#> # A pseudo tibble: 33* x 12
#> mpg cyl disp hp drat wt qsec vs am gear carb new
#> 1) 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 NA
#> 2) 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 NA
#> 3) 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 2.5
#> 4) 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 NA
#> *29 row(s) not printed
Compare with dplyr::left_join()
:
same_as_dplyr(
full_join_data(x, y),
dplyr::full_join(x, y)
)
#> Joining, by = "cyl"
#> Joining, by = "cyl"
#> [1] TRUE
tabsort()
: Count frequencies## count by cyl and gear
tabsort(d, cyl, gear)
#> # A pseudo tibble: 9* x 4
#> cyl gear n prop
#> 1) 8 3 12 0.3750
#> 2) 4 4 8 0.2500
#> 3) 6 4 4 0.1250
#> 4) 6 3 2 0.0625
#> *5 row(s) not printed
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.