roll_na_fill | R Documentation |
NA
fillA fast and efficient by-group method for
"last-observation-carried-forward" NA
filling.
roll_na_fill(x, g = NULL, fill_limit = Inf)
.roll_na_fill(x, fill_limit = Inf)
x |
A vector. |
g |
An object use for grouping x This may be a vector or data frame for example. |
fill_limit |
(Optional) maximum number of consecutive NAs to fill
per |
When supplying groups using g
, this method uses radixorder(g)
to
specify how to loop through x
, making this extremely efficient.
When x
contains zero or all NA
values, then x
is returned with no copy
made.
.roll_na_fill()
is the same as roll_na_fill()
but without a g argument and
it performs no sanity checks. It is passed straight to c++ which makes it efficient for
loops.
A filled vector of x
the same length as x
.
library(timeplyr)
library(dplyr)
library(data.table)
words <- do.call(paste0,
do.call(expand.grid, rep(list(letters), 3)))
groups <- sample(words, size = 10^5, replace = TRUE)
x <- sample.int(10^2, 10^5, TRUE)
x[sample.int(10^5, 10^4)] <- NA
dt <- data.table(x, groups)
filled <- roll_na_fill(x, groups)
library(zoo)
# Summary
# Latest version of vctrs with their vec_fill_missing
# Is the fastest but not most memory efficient
# For low repetitions and large vectors, data.table is best
# For large numbers of repetitions (groups) and data
# that is sorted by groups
# timeplyr is fastest
# No groups
bench::mark(e1 = dt[, filled1 := timeplyr::roll_na_fill(x)][]$filled1,
e2 = dt[, filled2 := data.table::nafill(x, type = "locf")][]$filled2,
e3 = dt[, filled3 := vctrs::vec_fill_missing(x)][]$filled3,
e4 = dt[, filled4 := zoo::na.locf0(x)][]$filled4,
e5 = dt[, filled5 := timeplyr::.roll_na_fill(x)][]$filled5)
# With group
bench::mark(e1 = dt[, filled1 := timeplyr::roll_na_fill(x, groups)][]$filled1,
e2 = dt[, filled2 := data.table::nafill(x, type = "locf"), by = groups][]$filled2,
e3 = dt[, filled3 := vctrs::vec_fill_missing(x), by = groups][]$filled3,
e4 = dt[, filled4 := timeplyr::.roll_na_fill(x), by = groups][]$filled4)
# Data sorted by groups
setkey(dt, groups)
bench::mark(e1 = dt[, filled1 := timeplyr::roll_na_fill(x, groups)][]$filled1,
e2 = dt[, filled2 := data.table::nafill(x, type = "locf"), by = groups][]$filled2,
e3 = dt[, filled3 := vctrs::vec_fill_missing(x), by = groups][]$filled3,
e4 = dt[, filled4 := timeplyr::.roll_na_fill(x), by = groups][]$filled4)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.