#' @export
FeatureStandardizer <- R6::R6Class(
'FeatureStandardizer', inherit=PipeComponent,
public=list(
method='l2', centers=TRUE, scales=TRUE, nobs=0,
tol=sqrt(.Machine$double.eps),
fit = function(x, y=NULL)
{
self$nobs <- dim(x)[1]
if (self$method == 'l2') {
self$centers <- apply(x, MARGIN=2, FUN=mean, na.rm=TRUE)
self$scales <- apply(x, MARGIN=2, FUN=sd, na.rm=TRUE)
} else if (self$method == 'l1') {
self$centers <- apply(x, MARGIN=2, FUN=mean, na.rm=TRUE)
self$scales <- apply(x, MARGIN=2, FUN=lsr::aad, na.rm=TRUE)
} else if (self$method == 'range') {
rng <- apply(x, MARGIN=2, FUN=range, na.rm=TRUE)
self$centers <- rng[1,]
self$scales <- apply(rng, MARGIN=2, FUN=diff)
} else {
stop('invalid standardization method: ', method)
}
flg <- (self$scales < self$tol)
if (any(flg)) {
cat('scales less than ', self$tol, ' are ignored\n')
self$scales[flg] <- 1
}
invisible(self)
},
incr_fit = function(x, y=NULL)
{
# if not fitted yet, call the fit function
if (self$nobs == 0) return(self$fit(x, y))
n <- dim(x)[1]
if (self$method == 'l2') {
# online update
tmp <- update_mean_and_sd(self$centers, self$scales, self$nobs, x)
self$centers <- tmp$m
self$scales <- tmp$s
} else if (self$method == 'l1') {
message('incremental fit for L1-method is approximate')
cen <- apply(x, MARGIN=2, FUN=mean, na.rm=TRUE)
sca <- apply(x, MARGIN=2, FUN=lsr::aad, na.rm=TRUE)
# update by weighted average
self$centers <- (n*cen + self$nobs*self$centers)/(n+self$nobs)
self$scales <- (n*sca + self$nobs*self$scales)/(n+self$nobs)
} else if (self$method == 'range') {
rng <- apply(x, MARGIN=2, FUN=range, na.rm=TRUE)
# update by element-wise max and mins
min1 <- self$centers
max1 <- self$scales
newmin <- pmin(min1, rng[1,])
newmax <- pmax(max1, rng[2,])
self$centers <- newmin
self$scales <- newmax-newmin
} else {
stop('invalid standardization method: ', method)
}
self$nobs <- self$nobs + n
flg <- (self$scales < self$tol)
if (any(flg)) {
cat('scales less than ', self$tol, ' are ignored\n')
self$scales[flg] <- 1
}
invisible(self)
},
transform = function(x, y=NULL)
{
x2 <- scale(x, center=self$centers, scale=self$scales)
attr(x2, 'scaled:center') <- NULL
attr(x2, 'scaled:scale') <- NULL
list(x=x2, y=y)
},
inv_transform = function(x, y=NULL)
{
x2 <- inv_scale(x, center=self$centers, scale=self$scales)
list(x=x2, y=y)
},
initialize = function(method='l2', tol=sqrt(.Machine$double.eps), ...)
{
self$method <- method
self$tol <- tol
invisible(self)
}
)
)
#' Feature-wise standardization
#'
#' Standardize each feature of data. Three methodologies are supported:
#' \itemize{
#' \item{"l2": standardize to zero-mean, unit-variance}
#' \item{"l1": standardize to zero-mean, unit-MAD (mean absolute deviation)}
#' \item{"range": standardize to unit interval range}
#' }
#'
#' @name feature_standardizer
#' @aliases FeatureStandardizer
#'
#' @section Usage:
#' \preformatted{feature_standardizer(method = 'l2', tol = sqrt(.Machine$double.eps)}
#'
#' @section Arguments:
#' \describe{
#' \item{method}{either "l2", "l1" or "range"}
#' \item{tol}{positive real number, scaling is not conducted if scale is below this level}
#' }
#'
#' @section Value:
#' \code{FeatureStandardizer} class object
#'
#' @section Class Methods:
#' \describe{
#' \item{\preformatted{fit(x, y = NULL)}}{fit to \code{x} and store the center and scale values}
#' \item{\preformatted{transform(x, y = NULL)}}{returns standardized \code{x} matrix}
#' \item{\preformatted{inv_transform(x, y = NULL)}}{returns pre-standardized \code{x} matrix}
#' \item{\preformatted{incr_fit(x, y = NULL)}}{currently not implemented}
#' }
#'
#' @section Details:
#' When fitted to data, the feature-wise centers and scales are stored in the object,
#' and transformation is conducted using these center and scale values.
#' Therefore, when applied to new data, they are not exactly standardized.
#'
#' Uses \code{\link[lsr]{aad}} as the backend calculation for "l1" method
#'
#' @examples
#' data(mtcars)
#' # zero-mean, unit-variance standardization
#' fs <- feature_standardizer(method='l2')
#' fs$fit(mtcars)
#' z <- fs$transform(mtcars)$x
#' apply(z, MARGIN=2, FUN=mean)
#' apply(z, MARGIN=2, FUN=sd)
#' w <- fs$inv_transform(z)$x
#' range(mtcars-w)
#'
#' # zero-mean, unit-MAD (mean absolute deviation)
#' fs <- feature_standardizer(method='l1')
#' fs$fit(mtcars)
#' z <- fs$transform(mtcars)$x
#' apply(z, MARGIN=2, FUN=mean)
#' apply(z, MARGIN=2, FUN=lsr::aad)
#' w <- fs$inv_transform(z)$x
#' range(mtcars-w)
#'
#' # standardize to unit interval
#' fs$set_parameters(method='range')
#' fs$fit(mtcars)
#' z <- fs$transform(mtcars)$x
#' apply(z, MARGIN=2, FUN=range)
#' w <- fs$inv_transform(z)$x
#' range(mtcars-w)
#'
#' # incremental fit is allowed for l2 and range method
#' fs$set_parameters(method='l2')
#' fs$fit(mtcars)
#' fs2 <- feature_standardizer(method='l2')
#' fs2$incr_fit(mtcars[1:10,])
#' fs2$incr_fit(mtcars[11:32,])
#' cbind(fs$centers, fs2$centers)
#' cbind(fs$scales, fs2$scales)
#'
#' fs$set_parameters(method='range')
#' fs$fit(mtcars)
#' fs2$set_parameters(method='range')
#' fs2$incr_fit(mtcars[1:14,])
#' fs2$incr_fit(mtcars[15:32,])
#' cbind(fs$centers, fs2$centers)
#' cbind(fs$scales, fs2$scales)
NULL
#' @export
feature_standardizer <- FeatureStandardizer$new
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.