R/test_pcaMethods_Nipals.R
In missCompare: Intuitive Missing Data Imputation Framework

Documented in test_pcaMethods_Nipals

#' @title Testing the 'pcaMethods' NIPALS missing data imputation algorithm
#'
#' @description
#' \code{\link{test_pcaMethods_Nipals}} tests the imputation accuracy of the 'pcaMethods' NIPALS missing data imputation algorithm on matrices with various missing data patterns
#'
#' @details
#' This function tests the imputation accuracy of the 'pcaMethods' NIPALS missing data imputation algorithm by comparing the
#' original simulated matrix with no missingness and the imputed matrices generated by the algorithm using the matrices with
#' MCAR, MAR, MNAR and (optionally) MAP missingness patterns. The function calculates root-mean-square error (RMSE),
#' mean absolute error (MAE), Kolmogorov–Smirnov D test statistic (KS) between the imputed datapoints and the original
#' datapoints (that were subsequently set to missing). The function will also calculate the cumulative computation time for
#' imputing all datasets. The function will automatically detect whether there is a MAP matrix in the list and calculate
#' RMSE for all matrices provided in the list.
#'
#' @param X_hat Simulated matrix with no missingness (this matrix will be used to obtain the error between the original and imputed values). (Simulated_matrix output from the \code{\link{simulate}} function)
#' @param list List of matrices with various missingness patterns (MCAR, MAR, MNAR and optionally, MAP). (The input is ideally the R object that was generated using the \code{\link{all_patterns}} function)
#'
#' @name test_pcaMethods_Nipals
#'
#' @inherit test_AmeliaII return
#'
#' @examples
#' clindata_miss_mini <- clindata_miss[1:80,1:4]
#' cleaned <- clean(clindata_miss_mini, missingness_coding = -9)
#' metadata <- get_data(cleaned)
#' simulated <- simulate(rownum = metadata$Rows, colnum = metadata$Columns,
#' cormat = metadata$Corr_matrix)
#' miss_list <- all_patterns(simulated$Simulated_matrix,
#'                     MD_pattern = metadata$MD_Pattern,
#'                     NA_fraction = metadata$Fraction_missingness,
#'                     min_PDM = 2)
#'
#' test_pcaMethods_Nipals(X_hat = simulated$Simulated_matrix, list = miss_list)
#'
#' @export


# FUNCTION

test_pcaMethods_Nipals <- function(X_hat, list) {

    index <- lapply(list, is.na)

    pcaMethods_Nipals_imp <- function(X) {
        ncomp <- missMDA::estim_ncpPCA(X, ncp.max = ncol(X) - 2)
        if (ncomp$ncp > 0)
            resNipals <- pcaMethods::pca(X, method = "nipals", center = FALSE, nPcs = ncomp$ncp) else resNipals <- pca(X, method = "nipals", center = FALSE, nPcs = 2)
        imp_matrix <- resNipals@completeObs

        list(Imputed = imp_matrix)
    }

    print("pcaMethods NIPALS imputation - in progress")
    start_time <- Sys.time()
    log_output <- utils::capture.output(results <- lapply(list, pcaMethods_Nipals_imp))
    end_time <- Sys.time()
    time <- as.numeric(end_time - start_time, units = "mins")

    # using NA index to identify the original values (later set to missing)
    orig_MCAR <- X_hat[index[[1]]]
    orig_MAR <- X_hat[index[[2]]]
    orig_MNAR <- X_hat[index[[3]]]
    if (length(index) == 4)
      orig_MAP <- X_hat[index[[4]]]

    # using NA index to identify the imputed values
    imp_MCAR <- results$MCAR_matrix$Imputed[index[[1]]]
    imp_MAR <- results$MAR_matrix$Imputed[index[[2]]]
    imp_MNAR <- results$MNAR_matrix$Imputed[index[[3]]]
    if (length(index) == 4)
      imp_MAP <- results$MAP_matrix$Imputed[index[[4]]]

    # RMSE
    rmse_MCAR <- sqrt(mean((orig_MCAR - imp_MCAR)^2))
    rmse_MAR <- sqrt(mean((orig_MAR - imp_MAR)^2))
    rmse_MNAR <- sqrt(mean((orig_MNAR - imp_MNAR)^2))
    if (length(index) == 4)
      rmse_MAP <- sqrt(mean((orig_MAP - imp_MAP)^2))

    # MAE
    mae_MCAR <- mean(abs(orig_MCAR - imp_MCAR))
    mae_MAR <- mean(abs(orig_MAR - imp_MAR))
    mae_MNAR <- mean(abs(orig_MNAR - imp_MNAR))
    if (length(index) == 4)
      mae_MAP <- mean(abs(orig_MAP - imp_MAP))

    # KS test
    ks_MCAR  <- stats::ks.test(orig_MCAR, imp_MCAR, exact=TRUE)$statistic
    ks_MAR  <- stats::ks.test(orig_MAR, imp_MAR, exact=TRUE)$statistic
    ks_MNAR  <- stats::ks.test(orig_MNAR, imp_MNAR, exact=TRUE)$statistic
    if (length(index) == 4)
      ks_MAP  <- stats::ks.test(orig_MAP, imp_MAP, exact=TRUE)$statistic

    if (length(index) == 4)
      list(Comp_time = time, MCAR_RMSE = rmse_MCAR, MAR_RMSE = rmse_MAR, MNAR_RMSE = rmse_MNAR, MAP_RMSE = rmse_MAP, MCAR_MAE = mae_MCAR, MAR_MAE = mae_MAR, MNAR_MAE = mae_MNAR, MAP_MAE = mae_MAP, MCAR_KS = ks_MCAR, MAR_KS = ks_MAR, MNAR_KS = ks_MNAR, MAP_KS = ks_MAP) else list(Comp_time = time, MCAR_RMSE = rmse_MCAR, MAR_RMSE = rmse_MAR, MNAR_RMSE = rmse_MNAR, MCAR_MAE = mae_MCAR, MAR_MAE = mae_MAR, MNAR_MAE = mae_MNAR, MCAR_KS = ks_MCAR, MAR_KS = ks_MAR, MNAR_KS = ks_MNAR)

}