Nothing
#' @title PipeOpAmelia
#'
#' @name PipeOpAmelia
#'
#' @description
#' Implements EMB methods as mlr3 pipeline more about Amelia \code{\link{autotune_Amelia}} or \url{https://cran.r-project.org/package=Amelia}
#'
#' @section Input and Output Channels:
#' Input and output channels are inherited from \code{\link{PipeOpImpute}}.
#'
#'
#' @section Parameters:
#' The parameters include inherited from [`PipeOpImpute`], as well as: \cr
#' \itemize{
#' \item \code{id} :: \code{character(1)}\cr
#' Identifier of resulting object, default \code{"imput_Amelia"}.
#' \item \code{m} :: \code{integer(1)}\cr
#' Number of datasets generated by Amelia, default \code{3}.
#' \item \code{polytime} :: \code{integer(1)}\cr
#' Integer between 0 and 3 indicating what power of polynomial should be included in the imputation model to account for the effects of time. A setting of 0 would indicate constant levels, 1 would indicate linear time effects, 2 would indicate squared effects, and 3 would indicate cubic time effects, default \code{NULL}.
#' \item \code{splinetime} :: \code{integer(1)}\cr
#' Integer value of 0 or greater to control cubic smoothing splines of time. Values between 0 and 3 create a simple polynomial of time (identical to the polytime argument). Values k greater than 3 create a spline with an additional k-3 knotpoints, default \code{NULL}.
#' \item \code{intercs} :: \code{logical(1)}\cr
#' Variable indicating if the time effects of polytime should vary across the cross-section, default \code{FALSE}.
#' \item \code{empir} :: \code{double(1)}\cr
#' Number indicating level of the empirical (or ridge) prior. This prior shrinks the covariances of the data, but keeps the means and variances the same for problems of high missingness, small N's or large correlations among the variables. Should be kept small, perhaps 0.5 to 1 percent of the rows of the data; a reasonable upper bound is around 10 percent of the rows of the data. If empir is not set, empir=nrow(df)*0.015, default \code{NULL}.
#' \item \code{parallel} :: \code{double(1)}\cr
#' If true parallel calculation is used, default \code{TRUE}.
#' \item \code{out_fill} :: \code{character(1)}\cr
#' Output log file location. If file already exists log message will be added. If NULL no log will be produced, default \code{NULL}.
#' }
#'
#' @import mlr3
#' @import mlr3pipelines
#' @import paradox
#'
#' @importFrom methods is
#' @importFrom stats as.formula
#' @importFrom stats median
#' @importFrom stats na.omit
#' @importFrom stats runif
#' @importFrom stats setNames
#' @importFrom utils capture.output
#'
#' @examples
#' \donttest{
#'
#' # Using debug learner for example purpose
#'
#' graph <- PipeOpAmelia$new() %>>% LearnerClassifDebug$new()
#' graph_learner <- GraphLearner$new(graph)
#'
#' graph_learner$param_set$values$impute_Amelia_B.parallel <- FALSE
#'
#'
#' resample(tsk("pima"), graph_learner, rsmp("cv", folds = 3))
#' }
#' @export
PipeOpAmelia <- R6::R6Class("Amelia_imputation",
lock_objects = FALSE,
inherit = PipeOpImpute, # inherit from PipeOp
public = list(
initialize = function(id = "impute_Amelia_B", polytime = NULL, splinetime = NULL, intercs = FALSE, empir = NULL, m = 3, parallel = TRUE, out_file = NULL) {
super$initialize(id,
whole_task_dependent = TRUE, packages = "NADIA", param_vals = list(polytime = polytime, splinetime = splinetime, intercs = intercs, empir = empir, m = m, parallel = parallel, out_file = out_file),
param_set = ParamSet$new(list(
"polytime" = ParamUty$new("polytime", default = NULL, tags = "amelia"),
"splinetime" = ParamUty$new("splinetime", default = NULL, tags = "amelia"),
"empir" = ParamUty$new("empir", default = NULL, tags = "amelia"),
"parallel" = ParamLgl$new("parallel", default = TRUE, tags = "amelia"),
"intercs" = ParamLgl$new("intercs", default = FALSE, tags = "amelia"),
"m" = ParamInt$new("m", lower = 1, upper = Inf, default = 3, tags = "amelia"),
"out_file" = ParamUty$new("out_file", default = NULL, tags = "amelia")
))
)
self$imputed <- FALSE
self$column_counter <- NULL
self$data_imputed <- NULL
}), private = list(
.train_imputer = function(feature, type, context) {
imp_function <- function(data_to_impute) {
data_to_impute <- as.data.frame(data_to_impute)
# prepering arguments for function
col_type <- 1:ncol(data_to_impute)
for (i in col_type) {
col_type[i] <- class(data_to_impute[, i])
}
percent_of_missing <- 1:ncol(data_to_impute)
for (i in percent_of_missing) {
percent_of_missing[i] <- (sum(is.na(data_to_impute[, i])) / length(data_to_impute[, 1])) * 100
}
col_miss <- colnames(data_to_impute)[percent_of_missing > 0]
col_no_miss <- colnames(data_to_impute)[percent_of_missing == 0]
data_imputed <- NADIA::autotune_Amelia(data_to_impute, col_type, percent_of_missing,
parallel = self$param_set$values$parallel, polytime = self$param_set$values$polytime,
splinetime = self$param_set$values$splinetime, intercs = self$param_set$values$intercs,
empir = self$param_set$values$empir, m = self$param_set$values$m,
out_file = self$param_set$values$out_file)
return(data_imputed)
}
self$imputed_predict <- TRUE
self$flag <- "train"
if (!self$imputed) {
self$column_counter <- ncol(context) + 1
self$imputed <- TRUE
data_to_impute <- cbind(feature, context)
self$data_imputed <- imp_function(data_to_impute)
colnames(self$data_imputed) <- self$state$context_cols
}
if (self$imputed) {
self$column_counter <- self$column_counter - 1
}
if (self$column_counter == 0) {
self$imputed <- FALSE
}
self$train_s <- TRUE
self$action <- 3
return(list("data_imputed" = self$data_imputed, "train_s" = self$train_s, "flag" = self$flag, "imputed_predict" = self$imputed_predict, "imputed" = self$imputed, "column_counter" = self$column_counter))
},
.impute = function(feature, type, model, context) {
if (is.null(self$action)) {
self$train_s <- TRUE
self$flag <- "train"
self$imputed_predict <- TRUE
self$action <- 3
self$data_imputed <- model$data_imputed
self$imputed <- FALSE
self$column_counter <- 0
}
imp_function <- function(data_to_impute) {
data_to_impute <- as.data.frame(data_to_impute)
# prepering arguments for function
col_type <- 1:ncol(data_to_impute)
for (i in col_type) {
col_type[i] <- class(data_to_impute[, i])
}
percent_of_missing <- 1:ncol(data_to_impute)
for (i in percent_of_missing) {
percent_of_missing[i] <- (sum(is.na(data_to_impute[, i])) / length(data_to_impute[, 1])) * 100
}
col_miss <- colnames(data_to_impute)[percent_of_missing > 0]
col_no_miss <- colnames(data_to_impute)[percent_of_missing == 0]
data_imputed <- NADIA::autotune_Amelia(data_to_impute, col_type, percent_of_missing,
parallel = self$param_set$values$parallel, polytime = self$param_set$values$polytime,
splinetime = self$param_set$values$splinetime, intercs = self$param_set$values$intercs,
empir = self$param_set$values$empir, m = self$param_set$values$m,
out_file = self$param_set$values$out_file)
return(data_imputed)
}
if (self$imputed) {
feature <- self$data_imputed[, setdiff(colnames(self$data_imputed), colnames(context))]
}
if ((nrow(self$data_imputed) != nrow(context) | !self$train_s) & self$flag == "train") {
self$imputed_predict <- FALSE
self$flag <- "predict"
}
if (!self$imputed_predict) {
data_to_impute <- cbind(feature, context)
self$data_imputed <- imp_function(data_to_impute)
colnames(self$data_imputed)[1] <- setdiff(self$state$context_cols, colnames(context))
self$imputed_predict <- TRUE
}
if (self$imputed_predict & self$flag == "predict") {
feature <- self$data_imputed[, setdiff(colnames(self$data_imputed), colnames(context))]
}
if (self$column_counter == 0 & self$flag == "train") {
feature <- self$data_imputed[, setdiff(colnames(self$data_imputed), colnames(context))]
self$flag <- "predict"
self$imputed_predict <- FALSE
}
self$train_s <- FALSE
return(feature)
}
)
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.