#' Evaluate imputation accuracy and consistency
#'
#' Compare observed values to imputed values for a single variable
#'
#' @param original original \code{data.frame}
#' @param amputated amputated \code{data.frame}
#' @param imputed imputed \code{data.frame}
#' @param var name of variable for evaluation
#' @param id unique id in \code{data.frame}
#'
#' @details
#' If a unique \code{id} is supplied, the order of all three data frames is aligned and otherwise the correct sorting is assumed.
#' The function compares amputated values to imputed values using the Kolmogorov–Smirnov statistic and the standardized RMSE (numerical variables) or the classification error (for variable types character, factor, ordered, logical).
#' Amputated values are observed in \code{original} but not in \code{amputated}. The RMSE is standardized with the standard deviation observed in \code{original}.
#' While RMSE and classification error measure imputation accuracy, the Kolmogorov–Smirnov statistic measures imputation consistency.
#'
#' @return
#' \code{data.frame}.
#'
#' @examples
#' df <- tibble(a=1:5, b=LETTERS[1:5])
#' df_amp <- missMaker(df, var='a', size=3)
#' df_imp <- missMean(df_amp)
#' missEval(df, df_amp, df_imp, var='a')
#'
#'
#' @export
missEval <- function(
original,
amputated,
imputed,
var,
id=NULL){
if(!(nrow(original)==nrow(amputated) &
nrow(original)==nrow(imputed)) ) stop("Data frames with different number of rows are not allowed.")
if(!(ncol(original)==ncol(amputated) &
ncol(original)==ncol(imputed)) ) stop("Data frames with different number of columns are not allowed.")
# Sort rows and columns
if(!is.null(id)){
amputated <- sort_df(amputated,original,id=id)
imputed <- sort_df(imputed,original,id=id)
} else {
warning("Assuming that all three data frames are sorted.")
}
amputated <- amputated[,colnames(original)]
imputed <- imputed[,colnames(original)]
if(is.character(original[[var]])==TRUE) stop("Character variables can not be used.")
raw <- original
idx <- !is.na(original[,var])
original <- original[idx,]
amputated <- amputated[idx,]
imputed <- imputed[idx,]
obs <- original[is.na(amputated[,var]),]
imp <- imputed[is.na(amputated[,var]),]
obs <- obs[[var]]
imp <- imp[[var]]
if(length(obs) < 3) stop("Fewer than 2 values observed.")
if( is_cat(obs) ){
acc <- tibble(metric='pfc', err=pfc_stat(obs, imp))
ks <- tibble(metric='ks', err=ks_stat(obs, imp))
smd <- tibble(metric='smd', err=smd_stat(obs, imp))
} else {
sd_ <- sd(raw[[var]],na.rm=TRUE)
acc <- tibble(metric='nrmse', err=rmse_stat(obs,imp)/sd_ )
ks <- tibble(metric='ks', err=ks_stat(obs, imp))
smd <- tibble(metric='smd', err=smd_stat(obs, imp))
}
return(bind_rows(acc,ks,smd))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.