Nothing
#' Perform imputation using Amelia package and EMB algorithm.
#'
#' @description Function use EMB (Expectation-Maximization with Bootstrapping ) to impute missing data. Function performance is highly depend from data structure and
#' chosen parameters.
#'
#'
#' @param df data.frame. Df to impute with column names and without target column.
#' @param col_type character vector. Vector containing column type names.
#' @param percent_of_missing numeric vector. Vector contatining percent of missing data in columns for example c(0,1,0,0,11.3,..)
#' @param col_0_1 Decaid if add bonus column informing where imputation been done. 0 - value was in dataset, 1 - value was imputed. Default False. (Works only for returning one dataset).
#' @param parallel If true parallel calculation is used.
#' @param polytime parameter pass to amelia function
#' @param splinetime parameter pass to amelia finction
#' @param intercs parameter pass to amleia function
#' @param empir parameter pass to amelia function as empir in Amelia == empir*nrow(df). If empir dont set empir=nrow(df)*0.015.
#' @param verbose If true function will print on console.
#' @param return_one Decide if one dataset or amelia object will be returned.
#' @param out_file Output log file location if file already exists log message will be added. If NULL no log will be produced.
#' @param m Number of datasets generated by amelia. If retrun_one=TRUE first dataset will be given.
#' @import Amelia
#' @references James Honaker, Gary King, Matthew Blackwell (2011). Amelia II: A Program for Missing Data. Journal of Statistical Software, 45(7), 1-47. URL https://www.jstatsoft.org/v45/i07/.
#'
#' @author James Honaker, Gary King, Matthew Blackwell (2011).
#'
#' @examples
#' {
#' raw_data <- data.frame(
#' a = as.factor(sample(c("red", "yellow", "blue", NA), 1000, replace = TRUE)),
#' b = as.integer(1:1000),
#' c = as.factor(sample(c("YES", "NO", NA), 1000, replace = TRUE)),
#' d = runif(1000, 1, 10),
#' e = as.factor(sample(c("YES", "NO"), 1000, replace = TRUE)),
#' f = as.factor(sample(c("male", "female", "trans", "other", NA), 1000, replace = TRUE)))
#'
#' # Prepering col_type
#' col_type <- c("factor", "integer", "factor", "numeric", "factor", "factor")
#'
#' percent_of_missing <- 1:6
#' for (i in percent_of_missing) {
#' percent_of_missing[i] <- 100 * (sum(is.na(raw_data[, i])) / nrow(raw_data))
#' }
#'
#'
#' imp_data <- autotune_Amelia(raw_data, col_type, percent_of_missing,parallel = FALSE)
#'
#' # Check if all missing value was imputed
#' sum(is.na(imp_data)) == 0
#' # TRUE
#' }
#' @return Return one data.frame with imputed values or amelia object.
#' @export
autotune_Amelia <- function(df, col_type=NULL, percent_of_missing=NULL, col_0_1 = FALSE, parallel = TRUE, polytime = NULL, splinetime = NULL, intercs = FALSE,
empir = NULL, verbose = FALSE, return_one = TRUE, m = 3, out_file = NULL) {
# Column informations
if(is.null(col_type)){
col_type <- 1:ncol(df)
for ( i in col_type){
col_type[i] <- class(df[,i])
}
}
if(is.null(percent_of_missing)){
percent_of_missing <- 1:ncol(df)
for ( i in percent_of_missing){
percent_of_missing[i] <- sum(is.na(df[,i]))/nrow(df)
}
}
if (!is.null(out_file)) {
write("Amelia ", file = out_file, append = TRUE)
}
col_0_1 <- FALSE
if (sum(is.na(df)) == 0) {
return(df)
}
# prepering information about categorical column
categorical_col <- colnames(df)[ifelse(col_type == "factor", TRUE, FALSE)]
if (length(categorical_col) == 0) {
categorical_col <- NULL
}
# seting parallel options
if (parallel) {
parallel <- "multicore"
}
if ("multicore" != parallel) {
parallel <- "no"
}
n_row <- length(df[, 1])
# Amelia Run
tryCatch({
if (is.null(empir)) {
empir <- 0.015
}
empir <- empir * n_row
final <- Amelia::amelia(df, m = m, noms = categorical_col, parallel = parallel, p2s = as.numeric(verbose), empri = empir, polytime = polytime, splinetime = splinetime, intercs = intercs)
if (return_one) {
for (i in final$imputations) {
if (!is.null(i)) {
final <- i
break
}
}
}
# Avoiding situtation when amelia dont impute and dont throwe errors c
if (!inherits(final,"data.frame") & !inherits(final,"amelia") ) {
stop("ERROR no error msg")
}
}, error = function(e) {
if (!is.null(out_file)) {
write(as.character(e), file = out_file, append = TRUE)
}
stop(e)
})
# adding 0_1 col
if (col_0_1 & return_one) {
columns_with_missing <- (as.data.frame(is.na(df)) * 1)[, percent_of_missing > 0]
colnames(columns_with_missing) <- paste(colnames(columns_with_missing), "where", sep = "_")
final <- cbind(final, columns_with_missing)
}
for (i in colnames(final)[col_type == "integer"]) {
final[, i] <- as.integer(final[, i])
}
return(final)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.