Nothing
#'@title PCA
#'@description Principal Component Analysis (PCA) for unsupervised dimensionality reduction.
#' Transforms correlated variables into orthogonal principal components ordered by explained variance.
#'@details Fits PCA on (optionally) the numeric predictors only (excluding `attribute` when provided),
#' removes constant columns, and selects the number of components by an elbow rule (minimum curvature)
#' unless `components` is set explicitly.
#'@param attribute target attribute to model building
#'@param components number of components for PCA
#'@return returns an object of class `dt_pca`
#'@references
#' Pearson, K. (1901). On lines and planes of closest fit to systems of points in space.
#' Hotelling, H. (1933). Analysis of a complex of statistical variables into principal components.
#'@examples
#'mypca <- dt_pca("Species")
#'# Automatically fitting number of components
#'mypca <- fit(mypca, iris)
#'iris.pca <- transform(mypca, iris)
#'head(iris.pca)
#'head(mypca$pca.transf)
#'# Manual establishment of number of components
#'mypca <- dt_pca("Species", 3)
#'mypca <- fit(mypca, datasets::iris)
#'iris.pca <- transform(mypca, iris)
#'head(iris.pca)
#'head(mypca$pca.transf)
#'@export
dt_pca <- function(attribute=NULL, components = NULL) {
obj <- dal_transform()
obj$attribute <- attribute
obj$components <- components
class(obj) <- append("dt_pca", class(obj))
return(obj)
}
#'@importFrom stats prcomp
#'@exportS3Method fit dt_pca
fit.dt_pca <- function(obj, data, ...) {
data <- data.frame(data)
attribute <- obj$attribute
if (!is.null(attribute)) {
# drop target column from PCA input (unsupervised)
data[,attribute] <- NULL
}
# select numeric columns only
nums <- unlist(lapply(data, is.numeric))
remove <- NULL
for(j in names(nums[nums])) {
# remove constant columns (zero variance)
if(min(data[,j])==max(data[,j]))
remove <- cbind(remove, j)
}
nums[remove] <- FALSE
data = as.matrix(data[ , nums])
pca_res <- stats::prcomp(data, center=TRUE, scale.=TRUE)
if (is.null(obj$components)) {
# choose number of components via elbow (minimum curvature of cumulative variance)
y <- cumsum(pca_res$sdev^2/sum(pca_res$sdev^2))
curv <- fit_curvature_min()
res <- transform(curv, y)
obj$components <- res$x
}
obj$pca.transf <- as.matrix(pca_res$rotation[, 1:obj$components])
obj$nums <- nums
return(obj)
}
#'@exportS3Method transform dt_pca
transform.dt_pca <- function(obj, data, ...) {
attribute <- obj$attribute
pca.transf <- obj$pca.transf
nums <- obj$nums
data <- data.frame(data)
if (!is.null(attribute)) {
# preserve predictand and remove from PCA input
predictand <- data[,attribute]
data[,attribute] <- NULL
}
data = as.matrix(data[ , nums])
# project to principal components
data = data %*% pca.transf
data = data.frame(data)
if (!is.null(attribute)){
# reattach predictand
data[,attribute] <- predictand
}
return(data)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.