##' k-nearest neighbor imputation
##'
##' Performs k-nearest neighbor (kNN) imputation on a matrix-like object where
##' rows represent features and columns represent samples. This function finds
##' k-nearest neighbors using either Gower distance or Euclidean distance.
##'
##' The kNN imputation based on Euclidean distance typically requires
##' standardization of input data to avoid variance-based weighting of variables
##' (make variables on similar scales). When Gower distance is used, the
##' imputation can be done with original units (would get the same result with
##' the standardized input on a different scale). The `type` "gower" utilizes
##' the [VIM::kNN] and "euclidean" uses the [impute::impute.knn].
##'
##' @param x A matrix-like object.
##' @param k An integer specifying the number of nearest neighbors to be used in
##' imputation.
##' @param type A string specifying the distance metric to be used. Either
##' "gower" or "euclidean".
##' @param by A string specifying whether the imputation is performed by
##' k-nearest features or by k-nearest samples. Either "feature" or "sample".
##' @param scale A logical specifying whether `x` needs to be standardized prior
##' to the imputation when Euclidean distance is used. The imputed values are
##' re-transformed so that they are on the original scales.
##' @param ... Arguments passed to [VIM::kNN] (Gower distance) or
##' [impute::impute.knn] (Euclidean distance).
##' @return A matrix of the same dimension as \code{x} containing the imputed
##' intensities.
##'
##' @references
##'
##' Trevor Hastie, Robert Tibshirani, Balasubramanian Narasimhan and Gilbert Chu
##' (2021). impute: impute: Imputation for microarray data. R package version
##' 1.66.0.
##'
##' Alexander Kowarik, Matthias Templ (2016). Imputation with the R Package VIM.
##' Journal of Statistical Software, 74(7), 1-16. doi:10.18637/jss.v074.i07
##'
##' @seealso
##'
##' See [imputeIntensity] that provides a
##' \linkS4class{SummarizedExperiment}-friendly wrapper for this function.
##'
##' See [VIM::kNN] and [missForest::missForest] for the underlying functions
##' that do work.
##'
##' @examples
##'
##' data(faahko_se)
##'
##' m <- assay(faahko_se, "raw")
##' imputeKNN(m)
##'
##' @export
imputeKNN <- function(x, k = 10, type = c("gower", "euclidean"),
by = c("feature", "sample"), scale = FALSE, ...) {
type <- match.arg(type)
by <- match.arg(by)
if (!is.matrix(x)) {
x <- as.matrix(x)
}
if (type == "gower") {
.impute_knn_gower(x, k = k, by = by, ...)
} else {
.impute_knn_euclidean(x, k = k, by = by, scale = scale, ...)
}
}
.impute_knn_gower <- function(x, k, by = c("feature", "sample"), ...) {
if (by == "feature") {
out <- VIM::kNN(x, k = k, ...)[, seq_len(ncol(x))]
## VIM package internally converts x as data.table, which drops rownames
rownames(out) <- rownames(x)
} else {
out <- t(VIM::kNN(t(x), k = k, ...))[seq_len(nrow(x)), ]
colnames(out) <- colnames(x)
}
as.matrix(out)
}
.impute_knn_euclidean <- function(x, k, by = c("feature", "sample"),
scale = FALSE, ...) {
## Min-Max scaling later?
.verify_package("impute")
if (by == "feature") {
if (scale) {
xs <- scale(x)
res <- MsCoreUtils::impute_knn(xs, k = k, ...)
.scale_recover(res, attr(xs, "scaled:center"),
attr(xs, "scaled:scale"))
} else{
MsCoreUtils::impute_knn(x, k = k, ...)
}
} else {
if (scale) {
xs <- scale(t(x))
res <- MsCoreUtils::impute_knn(xs, k = k, ...)
t(.scale_recover(res, attr(xs, "scaled:center"),
attr(xs, "scaled:scale")))
} else{
t(MsCoreUtils::impute_knn(t(x), k = k, ...))
}
}
}
.scale_recover <- function(x, centers, scales) {
res <- sweep(x, 2, scales, "*")
sweep(res, 2, centers, "+")
}
################################################################################
## imputation_mixed
################################################################################
## Slight modification in MSCoreUtils::imputed_mixed to accommodate new methods.
.impute_mixed <- function(x, randna, mar, mnar, ...) {
if (missing(randna))
stop("Mixed imputation requires 'randna' argument. See ?impute_mixed.",
call. = FALSE)
stopifnot(is.logical(randna))
if (missing(mar))
stop("Mixed imputation requires 'mar' argument. See ?impute_mixed.",
call. = FALSE)
if (missing(mnar))
stop("Mixed imputation requires 'mnar' argument. See ?impute_mixed.",
call. = FALSE)
if (length(randna) != nrow(x))
stop("`nrow(x)`and length of randna must be equal.",
call. = FALSE)
x[randna, ] <- .imputeIntensity(x[randna, ], method = mar, ...)
x[!randna, ] <- .imputeIntensity(x[!randna, ], method = mnar, ...)
x
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.