R/imputeIntensity-functions.R

Defines functions .impute_mixed .scale_recover .impute_knn_euclidean .impute_knn_gower imputeKNN

Documented in imputeKNN

##' k-nearest neighbor imputation
##'
##' Performs k-nearest neighbor (kNN) imputation on a matrix-like object where
##' rows represent features and columns represent samples. This function finds
##' k-nearest neighbors using either Gower distance or Euclidean distance.
##'
##' The kNN imputation based on Euclidean distance typically requires
##' standardization of input data to avoid variance-based weighting of variables
##' (make variables on similar scales). When Gower distance is used, the
##' imputation can be done with original units (would get the same result with
##' the standardized input on a different scale). The `type` "gower" utilizes
##' the [VIM::kNN] and "euclidean" uses the [impute::impute.knn].
##'
##' @param x A matrix-like object.
##' @param k An integer specifying the number of nearest neighbors to be used in
##'   imputation.
##' @param type A string specifying the distance metric to be used. Either
##'   "gower" or "euclidean".
##' @param by A string specifying whether the imputation is performed by
##'   k-nearest features or by k-nearest samples. Either "feature" or "sample".
##' @param scale A logical specifying whether `x` needs to be standardized prior
##'   to the imputation when Euclidean distance is used. The imputed values are
##'   re-transformed so that they are on the original scales.
##' @param ... Arguments passed to [VIM::kNN] (Gower distance) or
##'   [impute::impute.knn] (Euclidean distance).
##' @return A matrix of the same dimension as \code{x} containing the imputed
##'   intensities.
##'
##' @references
##'
##' Trevor Hastie, Robert Tibshirani, Balasubramanian Narasimhan and Gilbert Chu
##' (2021). impute: impute: Imputation for microarray data. R package version
##' 1.66.0.
##'
##' Alexander Kowarik, Matthias Templ (2016). Imputation with the R Package VIM.
##' Journal of Statistical Software, 74(7), 1-16. doi:10.18637/jss.v074.i07
##'
##' @seealso
##'
##' See [imputeIntensity] that provides a
##'   \linkS4class{SummarizedExperiment}-friendly wrapper for this function.
##'
##' See [VIM::kNN] and [missForest::missForest] for the underlying functions
##' that do work.
##'
##' @examples
##'
##' data(faahko_se)
##'
##' m <- assay(faahko_se, "raw")
##' imputeKNN(m)
##'
##' @export
imputeKNN <- function(x, k = 10, type = c("gower", "euclidean"),
                      by = c("feature", "sample"), scale = FALSE, ...) {
    type <- match.arg(type)
    by <- match.arg(by)
    if (!is.matrix(x)) {
        x <- as.matrix(x)
    }
    if (type == "gower") {
        .impute_knn_gower(x, k = k, by = by, ...)
    } else {
        .impute_knn_euclidean(x, k = k, by = by, scale = scale, ...)
    }
}

.impute_knn_gower <- function(x, k, by = c("feature", "sample"), ...) {
    if (by == "feature") {
        out <- VIM::kNN(x, k = k, ...)[, seq_len(ncol(x))]
        ## VIM package internally converts x as data.table, which drops rownames
        rownames(out) <- rownames(x)
    } else {
        out <- t(VIM::kNN(t(x), k = k, ...))[seq_len(nrow(x)), ]
        colnames(out) <- colnames(x)
    }
    as.matrix(out)
}

.impute_knn_euclidean <- function(x, k, by = c("feature", "sample"),
                                  scale = FALSE, ...) {
    ## Min-Max scaling later?
    .verify_package("impute")
    if (by == "feature") {
        if (scale) {
            xs <- scale(x)
            res <- MsCoreUtils::impute_knn(xs, k = k, ...)
            .scale_recover(res, attr(xs, "scaled:center"),
                           attr(xs, "scaled:scale"))
        } else{
            MsCoreUtils::impute_knn(x, k = k, ...)
        }
    } else {
        if (scale) {
            xs <- scale(t(x))
            res <- MsCoreUtils::impute_knn(xs, k = k, ...)
            t(.scale_recover(res, attr(xs, "scaled:center"),
                             attr(xs, "scaled:scale")))
        } else{
            t(MsCoreUtils::impute_knn(t(x), k = k, ...))
        }
    }
}

.scale_recover <- function(x, centers, scales) {
    res <- sweep(x, 2, scales, "*")
    sweep(res, 2, centers, "+")
}

################################################################################
## imputation_mixed
################################################################################
## Slight modification in MSCoreUtils::imputed_mixed to accommodate new methods.
.impute_mixed <- function(x, randna, mar, mnar, ...) {
    if (missing(randna))
        stop("Mixed imputation requires 'randna' argument. See ?impute_mixed.",
             call. = FALSE)
    stopifnot(is.logical(randna))
    if (missing(mar))
        stop("Mixed imputation requires 'mar' argument. See ?impute_mixed.",
             call. = FALSE)
    if (missing(mnar))
        stop("Mixed imputation requires 'mnar' argument. See ?impute_mixed.",
             call. = FALSE)
    if (length(randna) != nrow(x))
        stop("`nrow(x)`and length of randna must be equal.",
             call. = FALSE)
    x[randna, ] <- .imputeIntensity(x[randna, ], method = mar, ...)
    x[!randna, ] <- .imputeIntensity(x[!randna, ], method = mnar, ...)
    x
}
HimesGroup/qmtools documentation built on April 16, 2023, 8 p.m.