impute_df | R Documentation |
Impute missing values in a data frame by columns
impute_df(
x,
imputation_type = c("none", "mean", "locf", "interp"),
imputation_span = 5L,
cyclic = FALSE,
nmax_run = Inf
)
x |
A |
imputation_type |
A character string describing the imputation method; currently, one of three values:
|
imputation_span |
An integer value. The number of non-missing values
considered if |
cyclic |
A logical value. If |
nmax_run |
An integer value. Runs (sets of consecutive missing values)
that are equal or shorter to |
An updated version of x
where missing values have been imputed
for each column separately.
n <- 30
ids_missing <- c(1:2, 10:13, 20:22, (n-1):n)
x0 <- x <- data.frame(
linear = seq_len(n),
all_missing = NA,
all_same = 1,
cyclic = cos(2 * pi * seq_len(n) / n)
)
x[ids_missing, ] <- NA
res <- list()
for (it in c("mean", "locf", "interp")) {
res[[it]] <- impute_df(x, imputation_type = it, nmax_run = 3L)
print(cbind(orig = x0[ids_missing, ], res[[it]][ids_missing, ]))
}
if (requireNamespace("graphics")) {
par_prev <- graphics::par(mfrow = c(ncol(x) - 1L, 1L))
for (k in seq_len(ncol(x))[-2L]) {
graphics::plot(
x[[k]],
ylim = range(x0[[k]]),
ylab = colnames(x)[[k]],
type = "l"
)
graphics::points(
ids_missing,
x0[ids_missing, k],
pch = 1L,
col = 1L
)
for (it in seq_along(res)) {
graphics::points(
ids_missing,
res[[it]][ids_missing, k],
pch = 1L + it,
col = 1L + it
)
}
}
graphics::par(par_prev)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.