Nothing
#' Label Encoder
#' @description Encodes and decodes categorical variables into integer values and vice versa.
#' This is a commonly performed task in data preparation during model training, because all machine learning models require
#' the data to be encoded into numerical format. It takes a vector of character or factor values and encodes them into numeric.
#' @format \code{\link{R6Class}} object.
#' @section Usage:
#' For usage details see \bold{Methods, Arguments and Examples} sections.
#' \preformatted{
#' lbl = LabelEncoder$new()
#' lbl$fit(x)
#' lbl$fit_transform(x)
#' lbl$transform(x)
#' }
#' @section Methods:
#' \describe{
#' \item{\code{$new()}}{Initialise the instance of the encoder}
#' \item{\code{$fit()}}{creates a memory of encodings but doesn't return anything}
#' \item{\code{$transform()}}{based on encodings learned in \code{fit} method is applies the transformation}
#' \item{\code{$fit_transform()}}{encodes the data and keep a memory of encodings simultaneously}
#' \item{\code{$inverse_transform()}}{encodes the data and keep a memory of encodings simultaneously}
#' }
#' @section Arguments:
#' \describe{
#' \item{data}{a vector or list containing the character / factor values}
#' }
#' @export
#' @examples
#' data_ex <- data.frame(Score = c(10,20,30,4), Name=c('Ao','Bo','Bo','Co'))
#' lbl <- LabelEncoder$new()
#' data_ex$Name <- lbl$fit_transform(data_ex$Name)
#' decode_names <- lbl$inverse_transform(data_ex$Name)
LabelEncoder <- R6Class("LabelEncoder", public = list(
#' @field input_data internal use
input_data = NA,
#' @field encodings internal use
encodings = NA,
#' @field decodings internal use
decodings = NA,
#' @field fit_model internal use
fit_model = FALSE,
#' @details
#' Fits the labelencoder model on given data
#'
#' @param data_col a vector containing non-null values
#' @return NULL, calculates the encoding and save in memory
#'
#' @examples
#' data_ex <- data.frame(Score = c(10,20,30,4), Name=c('Ao','Bo','Bo','Co'))
#' lbl <- LabelEncoder$new()
#' lbl$fit(data_ex$Name)
#' data_ex$Name <- lbl$fit_transform(data_ex$Name)
#' decode_names <- lbl$inverse_transform(data_ex$Name)
fit = function(data_col){
self$input_data <- private$check_data(data_col)
self$encodings <- private$encoder(self$input_data)
self$decodings <- private$decoder(self$encodings)
self$fit_model <- TRUE
},
#' @details
#' Fits and returns the encoding
#'
#' @param data_col a vector containing non-null values
#' @return encoding values for the given input data
#'
#' @examples
#' data_ex <- data.frame(Score = c(10,20,30,4), Name=c('Ao','Bo','Bo','Co'))
#' lbl <- LabelEncoder$new()
#' lbl$fit(data_ex$Name)
#' data_ex$Name <- lbl$fit_transform(data_ex$Name)
fit_transform = function(data_col){
data_col <- private$check_data(data_col)
self$fit(data_col)
vals <- private$mapper(data_col,
self$encodings,
convert_type = NULL,
output_type = numeric(1))
return(vals)
},
#' @details
#' Returns the encodings from the fitted model
#'
#' @param data_col a vector containing non-null values
#' @return encoding values for the given input data
#'
#' @examples
#' data_ex <- data.frame(Score = c(10,20,30,4), Name=c('Ao','Bo','Bo','Co'))
#' lbl <- LabelEncoder$new()
#' lbl$fit(data_ex$Name)
#' data_ex$Name <- lbl$transform(data_ex$Name)
transform = function(data_col){
if (!(isTRUE(self$fit_model)))
stop("Please run fit before using transform.")
data_col <- private$check_data(data_col)
# all values in the new vector should be in encodings
if (!(all(data_col %in% names(self$encodings)))) {
message(strwrap("There are new values in this vector which weren't
available during fit. Replacing those values with 'NA'"))
# replace new values with 'NA'
val_index <- which(!(data_col %in% names(self$encodings)))
data_col[val_index] <- "NA"
}
vals <- private$mapper(data_col,
self$encodings,
convert_type = NULL,
output_type = numeric(1))
return(vals)
},
#' @details
#' Gives back the original values from a encoded values
#'
#' @param coded_col a vector containing label encoded values
#' @return original values from the label encoded data
#'
#' @examples
#' data_ex <- data.frame(Score = c(10,20,30,4), Name=c('Ao','Bo','Bo','Co'))
#' lbl <- LabelEncoder$new()
#' lbl$fit(data_ex$Name)
#' data_ex$Name <- lbl$fit_transform(data_ex$Name)
#' decode_names <- lbl$inverse_transform(data_ex$Name)
inverse_transform = function(coded_col){
#check if all values exist in decode
if (!(all(coded_col %in% self$encodings))) {
stop(strwrap("There are new values in this data which weren't
available during fit. Please fix the issue."))
}
# write here
vals <- private$mapper(coded_col,
self$decodings,
convert_type = as.character,
output_type = character(1))
return(vals)
}
),
private = list(
encoder = function(data_col){
# this is to handle if the input value is factor, it should be character
data_col <- as.character(data_col)
all_values <- unique(data_col)
maps <- list()
# because the encoding should start with zero (i - 1)
for (i in seq(all_values)) maps[all_values[i]] <- i - 1
return(maps)
},
decoder = function(en_output){
# reverse the encoded list - make values as names and vice versa
f <- as.list(names(en_output))
names(f) <- en_output
return(f)
},
mapper = function(input_vec, coded_list, convert_type ,output_type){
if (is.null(convert_type)) {
# this is to avoid using convert_type argument for
# transform, fit_transform
return(vapply(input_vec,
function(x) coded_list[[x]],
FUN.VALUE = output_type,
USE.NAMES = F))
}
return(vapply(input_vec,
function(x) coded_list[[convert_type(x)]],
FUN.VALUE = output_type,
USE.NAMES = F))
},
check_data = function(data_col){
# fix data issues here
if (any(is.na(data_col))) {
message("The data contains NA values. Imputing NA with 'NA' ")
data_col <- replace(data_col, which(is.na(data_col)), "NA")
}
if (any((data_col == ""))) {
message("The data contains blank values. Imputing them with 'NA' ")
data_col <- replace(data_col, which(data_col == ""), "NA")
}
if (any((data_col == " "))) {
message("The data contains blank values. Imputing them with 'NA' ")
data_col <- replace(data_col, which(data_col == " "), "NA")
}
return(data_col)
}
)
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.