Nothing
#' Model for donor-based imputation
#'
#' This function is intended to be used inside of [impute_unsupervised()] as
#' `model_fun`.
#'
#' @inheritParams impute_supervised
#' @param i Index for row of `ds` or `NULL`
#' @param model_arg A list with two named elements (missing elements will be
#' replaced by default values):
#' * `selection` How to select the donors? Possible choices are:
#' `complete_rows` (default), `partly_complete_rows`, `knn_complete_rows`,
#' `knn_partly_complete_rows`
#' * `k` number of selected closest donor (default: 10), only used for knn
#' `selection`s
#'
#' @return A "model" for [predict_donor()] which is merely a data frame.
#' @export
#' @seealso [predict_donor()]
#' @importFrom gower gower_topn
#' @importFrom stats complete.cases
#' @examples
#' set.seed(123)
#' ds_mis <- data.frame(X = rnorm(10), Y = rnorm(10))
#' ds_mis[2:4, 1] <- NA
#' ds_mis[4:6, 2] <- NA
#' # default returns only complete rows
#' model_donor(ds_mis)
#' # with partly_complete and knn returned objects depends on i
#' model_donor(ds_mis,
#' i = 2,
#' model_arg = list(selection = "partly_complete_rows")
#' )
#' model_donor(ds_mis,
#' i = 4,
#' model_arg = list(selection = "partly_complete_rows")
#' )
#' model_donor(ds_mis,
#' i = 5,
#' model_arg = list(selection = "partly_complete_rows")
#' )
#' model_donor(ds_mis,
#' i = 5,
#' model_arg = list(selection = "knn_partly_complete_rows", k = 2)
#' )
model_donor <- function(ds, M = is.na(ds), i = NULL, model_arg = NULL) {
stopifnot(
"model_arg must be a list or NULL" =
is.list(model_arg) || is.null(model_arg)
)
model_arg <- set_defaults_for_missing(
model_arg, list(selection = "complete_rows", k = 10)
)
if (is.null(i) && model_arg$selection != "complete_rows") {
stop("only donor selection \"complete_rows\"' is possible for this case")
}
if (model_arg$selection %in% c("complete_rows", "knn_complete_rows")) {
suitable_rows <- complete.cases(ds)
} else if (model_arg$selection %in%
c("partly_complete_rows", "knn_partly_complete_rows")) {
suitable_rows <- apply(M, 1, function(x) !any(M[i, ] & x))
} else {
stop(paste0(
"'", model_arg$selection, "' is not a valid option for donor selection"
))
}
if (model_arg$selection %in%
c("knn_complete_rows", "knn_partly_complete_rows")) {
suitable_rows_ind <- which(suitable_rows)
best_k <- gower::gower_topn(ds[i, ], ds[suitable_rows, ], n = model_arg$k)
best_k <- best_k$index[, 1]
suitable_rows <- suitable_rows_ind[best_k]
}
return(structure(ds[suitable_rows, ], donor_selection = model_arg$selection))
}
#' Prediction for donor-based imputation
#'
#' This function is intended to be used inside of [impute_unsupervised()] as
#' `predict_fun`.
#'
#' @inheritParams model_donor
#' @param ds_donors Data set with donors, normally generated by [model_donor()]
#' @param i Index of row of `ds` which should be imputed
#' @param donor_aggregation Type of donor aggregation. Can be one of
#' 'choose_random' and 'average'.
#'
#' @return The imputation values for row `i`.
#' @seealso [model_donor()]
#' @export
#' @examples
#' set.seed(123)
#' ds_mis <- data.frame(X = rnorm(10), Y = rnorm(10))
#' ds_mis[2:4, 1] <- NA
#' ds_mis[4:6, 2] <- NA
#' # default for ds_donors and predict_donors
#' ds_donors <- model_donor(ds_mis)
#' predict_donor(ds_donors, ds_mis, i = 2)
#' predict_donor(ds_donors, ds_mis, i = 4)
#' # with partly_complete, knn and average of neighbors
#' ds_donors <- model_donor(
#' ds_mis,
#' i = 5, model_arg = list(selection = "knn_partly_complete_rows", k = 2)
#' )
#' ds_donors
#' predict_donor(ds_donors, ds_mis, i = 5, donor_aggregation = "average")
predict_donor <- function(ds_donors, ds, M = is.na(ds), i,
donor_aggregation = "choose_random") {
if (donor_aggregation == "choose_random") {
return(ds_donors[sample.int(nrow(ds_donors), 1), M[i, ]])
} else if (donor_aggregation == "average") {
return(colMeans(ds_donors[, M[i, ], drop = FALSE]))
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.