# R/0_util.r In spinifex: Manual Tours, Manual Control of Dynamic Projections of Numeric Multivariate Data

#### Documented in array2dfas_history_arraybasis_guidedbasis_half_circlebasis_odpbasis_oldabasis_onppbasis_pcais_orthonormalmanip_var_ofmap_absolutemap_relativepan_zoomsave_historyscale_01scale_axesscale_sdtheme_spinifex

```##
## MATH AND TRANSFORMS -----
##

#' Test if a numeric matrix is orthonormal, that is, each column is orthogonal,
#' at a right angle with the others, and each column has a norm
#' length of 1. This must be true for a projection to be linear.
#'
#' @param x Numeric matrix to test the orthonormality of.
#' @param tol Max tolerance of floating point differences.
#' Element-wise distance of t(x) %*% x from the identity matrix.
#' @return Single logical, whether or not the matrix is orthonormal.
#' @export
#' @examples
#' is_orthonormal(tourr::basis_random(n = 6))
#' is_orthonormal(matrix(1:12, ncol = 2), tol = 0.01)
is_orthonormal <- function(x, tol = 0.001) {
if(is.numeric(x) == FALSE) stop("'x', expected to be numeric and coercable to matrix.")
x <- as.matrix(x)
actual <- t(x) %*% x ## Collapses to identity matrix IFF x is orthonormal
expected <- diag(ncol(x))
if(max(abs(actual - expected)) < tol){return(TRUE)}else{return(FALSE)}
}

#' Turns a tour path array into a long data frame.
#'
#' Internal function, many end users will not need this. Takes the result of
#' `manual_tour()` or `tourr::save_history()`. Restructures the array of
#' interpolated bases into a long data frame for use in ggplots.
#'
#' @param basis_array A full (p, d, n_frames) interpolated basis array of a
#' tour, the output of `manual_tour` or `save_history(*_tour())`.
#' @param data Optional, (n, p) dataset to project, consisting of numeric
#' variables.
#' @param basis_label Optional, labels for the reference frame, a character
#' vector of the number of variables.
#' Defaults to the 3 character abbreviation of the original variables names.
#' @param data_label Optional, labels for plotly tooltip and return object.
#' Defaults to the rownames of the data, if available, then the row number.
#' @export
#' @examples
#' ## !!This function is not meant for external use!!
#' dat_std <- scale_sd(wine[, 2:6])
#' clas <- wine\$Type
#' bas <- basis_pca(dat_std)
#' mv <- manip_var_of(bas)
#'
#' ## Radial tour array to long df, as used in play_manual_tour()
#' mt_array <- manual_tour(basis = bas, manip_var = mv)
#' ls_df_frames <- array2df(basis_array = mt_array, data = dat_std,
#'                          basis_label = paste0("MyLabs", 1:nrow(bas)))
#' str(ls_df_frames)
#'
#' ## tourr::save_history tour array to long df, as used in play_tour_path()
#' gt_array <- tourr::save_history(data = dat_std, max_bases = 10)
#' ls_df_frames2 <- array2df(basis_array = gt_array, data = dat_std)
#' str(ls_df_frames2)
array2df <- function(
basis_array,
data = NULL,
basis_label = if(is.null(data) == FALSE) abbreviate(colnames(data), 3) else 1:nrow(basis_array),
data_label = if(is.null(data) == FALSE) abbreviate(colnames(data), 3) else paste0("v", 1:ncol(basis_array))
){
if("history_array" %in% class(basis_array)) class(basis_array) <- "array"

## Initialize
manip_var <- attributes(basis_array)\$manip_var
p <- dim(basis_array)[1L]
n_frames <- dim(basis_array)[3L]

## Basis condition handling
basis_frames <- NULL
.mute <- sapply(1L:n_frames, function(i){
basis_rows <- cbind(basis_array[,, i], i)
basis_frames <<- rbind(basis_frames, basis_rows)
})
basis_frames <- as.data.frame(basis_frames)
.nms <- c("x", "y", "z", "w")
colnames(basis_frames) <- c(.nms[1L:(ncol(basis_frames) - 1L)], "frame")

## Data; if exists
if(is.null(data) == FALSE){
data_frames <- NULL
data <- as.matrix(data)
.mute <- sapply(1L:n_frames, function(i){
new_frame <- data %*% matrix(basis_array[,, i], nrow(basis_array), ncol(basis_array))
## Center the new frame
new_frame <- sapply(1L:ncol(new_frame), function(i)
new_frame[, i] - mean(new_frame[, i])
)
new_frame <- cbind(new_frame, i) ## Append frame number
data_frames <<- rbind(data_frames, new_frame) ## Add rows to df
})
data_frames <- as.data.frame(data_frames)
colnames(data_frames) <- c(.nms[1L:(ncol(data_frames) - 1L)], "frame")
## Data label rep if applicable
if(is.null(data_label) == TRUE)
data_label <- 1L:nrow(data)
data_frames\$label <- rep_len(data_label, nrow(data_frames))
}

## Basis label and manip_var attribute.
basis_frames\$label <- rep_len(basis_label, nrow(basis_frames))
attr(basis_frames, "manip_var") <- manip_var

## Return, include data if it exists.
if(exists("data_frames")){
return(list(basis_frames = basis_frames,
data_frames = data_frames))
} else
return(list(basis_frames = basis_frames))
}

#' Changes an array of bases into a "history_array" class for use
#' in `tourr::interpolate()`.
#'
#' Internal function, many end users will not need this. Attaches data to an array and assigns the custom class "history_array" as
#' used in `tourr`. Typically called by basis arrays from `spinifex` functions.
#'
#' @param basis_array An array of bases.
#' @param data The data matrix to be projected through the basis. This is
#' `tourr::save_history` objects, but not consumed downstream in `spinifex`.
#' @return An array of numeric bases with custom class "history_array" for
#' consumption by `tourr::interpolate`.
#'
#' @seealso \code{\link[tourr:save_history]{tourr::save_history}} for preset choices.
#' @export
#' @examples
#' ## !!This function is not meant for external use!!
#' dat_std <- scale_sd(wine[, 2:6])
#' bas <- basis_pca(dat_std)
#' mv <- manip_var_of(bas)
#' mt_array <- manual_tour(basis = bas, manip_var = mv)
#' as_history_array(mt_array, dat_std)
as_history_array <- function(basis_array, data = NULL){
if(length(data) > 0L)
attr(basis_array, "data") <- as.matrix(data)
class(basis_array) <- "history_array"
return(basis_array)
}

#' Returns the axis scale and position.
#'
#' Internal function. Typically called by other functions to scale the position
#' of the axes data.frame or another data.frame to plot relative to the data.
#'
#' @param x Numeric matrix or data.frame, first 2 columns and scaled and offset
#' the `to` object.
#' @param position Text specifying the position the axes should go to.
#' Defaults to "center" expects one of: "center", "left", "right",
#' "bottomleft", "topright", or "off".
#' @param to Table to appropriately set the size and position of the axes to.
#' Based on the min/max of the first 2 columns. If left NULL defaults to
#' data.frame(x = c(-1L, 1L), y = c(-1L, 1L).
#' @return Transformed values of `x`, dimension and class unchanged.
#' @seealso \code{\link{map_absolute}} for more manual control.
#' @export
#' @family Linear mapping
#' @examples
#' ## !!This function is not meant for external use!!
#' rb <- tourr::basis_random(4, 2)
#'
#' map_relative(x = rb, position = "bottomleft")
#' map_relative(x = rb, position = "right", to = wine[, 2:3])
map_relative <- function(
x,
position = c("center", "left", "top1d", "floor1d",
"right", "bottomleft", "topright", "off"),
to = NULL
){
## Assumptions
if(is.null(to)) to <- data.frame(x = c(-1L, 1L), y = c(-1L, 1L))
position <- match.arg(position)
if(position == "off") return()
# ## If position is map_absolute call with x = NULL;
# if(is.list(position) & length(position) == 2L){
#   return(map_absolute(x, offset = position\$pan, scale = position\$zoom))
# }

## Initialize
xrange  <- range(to[, 1L])
yrange  <- range(to[, 2L])
xdiff   <- diff(xrange)
ydiff   <- diff(yrange)
xcenter <- mean(xrange)
ycenter <- mean(yrange)

## Condition handling of position
if(position == "center"){
scale <- .5 * ydiff
xoff  <- xcenter
yoff  <- ycenter
} else if(position == "left"){
scale <- .5 * ydiff
xoff  <- -.7 * xdiff + xcenter
yoff  <- ycenter
} else if(position == "top1d"){
scale <- .25
xoff  <- xcenter
yoff  <- 1.1
} else if(position == "floor1d"){
scale <- .25
xoff  <- xcenter
yoff  <- ycenter
} else if(position == "right"){
scale <- .3 * ydiff
xoff  <- .7 * xdiff + xcenter
yoff  <- ycenter
} else if(position == "bottomleft"){
scale <- .25 * ydiff
xoff  <- -.25 * xdiff + xcenter
yoff  <- -.5 * ydiff + ycenter
} else if(position == "topright"){
scale <- .25 * ydiff
xoff  <- .25 * xdiff + xcenter
yoff  <- .5 * ydiff + ycenter
} else stop(paste0("position: ", position, " not defined."))

## Apply scale and return
if(position %in% c("top1d", "floor1d")){
x[, 1L] <- 4L * scale * x[, 1L] + xoff
} else x[, 1L] <- scale * x[, 1L] + xoff
x[, 2L] <- scale * x[, 2L] + yoff

return(x)
}

#' @rdname spinifex-deprecated
#' @section \code{scale_axes}:
#' @export
scale_axes <- function(...) {
.Deprecated("map_relative")
map_relative(...)
}

#' Manually offset and scale the first 2 columns of a matrix or data.frame.
#'
#' A manual variant of `map_relative()`. Can be used as the `axes` argument
#' to manually set the size and locations of the axes.
#'
#' @param offset 2 Numeric values to offset/pan the first 2 dimensions of `x`.
#' @param scale 2 Numeric values to scale/zoom to the first 2 dimensions of `x`.
#' @param x Numeric data object with 2 columns to scale and offset.
#' Defaults to NULL, passing arguments to scale_axes for use internally.
#' @return Scaled and offset `x`.
#' @seealso \code{\link{scale_axes}} for preset choices.
#' @export
#' @family Linear mapping
#' @examples
#' bas <- tourr::basis_random(4, 2)
#'
#' map_absolute(bas, offset = c(-2, 0), scale = c(2/3, 2/3))
map_absolute <- function(x,
offset = c(0L, 0L),
scale = c(1L, 1L)
){
ret <- x
ret[, 1L] <- ret[, 1L] * offset[1L] + scale[1L]
ret[, 2L] <- ret[, 2L] * offset[2L] + scale[2L]
return(ret)
}

#' @rdname spinifex-deprecated
#' @section \code{pan_zoom}:
#' @export
pan_zoom <- function(x, pan = c(0L, 0L), zoom = c(1L, 1L)) {
.Deprecated("map_absolute")
map_absolute(x, pan, zoom)
}

##
## GGPLOT2 AESTHETICS ------
##

#' A ggplot2 theme suggested for linear projections with spinifex.
#' The default value for ggproto arguments in spinifex functions.
#'
#' @param ... Optionally pass arguments to `ggplot2::theme()`.
#' @seealso \code{\link[ggplot2:theme]{ggplot2::theme}} for all theme options.
#' @export
#' @examples
#' theme_spinifex()
#'
#' require("ggplot2")
#' ggplot(mtcars, aes(wt, mpg, color = as.factor(cyl))) +
#'   geom_point() + theme_spinifex()
theme_spinifex <- function(...){
list(ggplot2::theme_void(),
ggplot2::scale_color_brewer(palette = "Dark2"),
ggplot2::scale_fill_brewer(palette = "Dark2"),
ggplot2::coord_fixed(),
ggplot2::labs(x = "", y = ""),
ggplot2::theme(legend.position = "bottom",
legend.direction = "horizontal", ## Levels within aesthetic
legend.box = "vertical",         ## Between aesthetic
legend.margin = ggplot2::margin(-1L,-1L,-1L,-1L, "mm"), ## Tighter legend margin
axis.title = ggplot2::element_text(), ## Allow axis titles, though defaulted to blank
...) ## ... applied over defaults.
)
}

##
## BASIS AND MANIP VAR HELPERS -----
##

#' The basis of Principal Component Analysis (PCA)
#'
#' The orthogonal linear components of the variables in the next largest
#' direction of variance.
#'
#' @param data Numeric matrix or data.frame of the observations.
#' @param d Number of dimensions in the projection space.
#' @export
#' @family basis identifiers
#' @examples
#' dat_std <- scale_sd(wine[, 2:6])
#' basis_pca(data = dat_std)
basis_pca <- function(data, d = 2){
#ret <- stats::prcomp(data)\$rotation[, 1L:d, drop = FALSE]
ret <- Rdimtools::do.pca(X = as.matrix(data), ndim = d)\$projection
rownames(ret) <- colnames(data)
colnames(ret) <- paste0("PC", 1:d)
return(ret)
}

#' The basis of Orthogonal Linear Discriminant Analysis (OLDA)
#'
#' Orthogonal LDA (OLDA) is an extension of classical LDA where the discriminant
#' vectors are orthogonal to each other.
#'
#' @param data Numeric matrix or data.frame of the observations, coerced to matrix.
#' @param class The class for each observation, coerced to a factor.
#' @param d Number of dimensions in the projection space.
#' @return A numeric matrix, an orthogonal basis that best distinguishes the
#' group means of `class`.
#' @references
#' Ye J (2005). "Characterization of a Family of Algorithms for Generalized
#' Discriminant Analysis on Undersampled Problems." J. Mach. Learn. Res.,
#' 6, 483-502. ISSN 1532-4435.
#' @export
#' @family basis identifiers
#' @examples
#' dat_std <- scale_sd(wine[, 2:6])
#' clas <- wine\$Type
#' basis_olda(data = dat_std, class = clas)
basis_olda <- function(data, class, d = 2){
#lda <- MASS::lda(class ~ ., data = data.frame(data, class))\$scaling
#ret <- tourr::orthonormalise(lda)[, 1L:d, drop = FALSE]
#colnames(ret) <- paste0("OLD", 1:d)
ret <- Rdimtools::do.olda(X = as.matrix(data),
label = as.factor(class),
ndim = d)\$projection
rownames(ret) <- colnames(data)
colnames(ret) <- paste0("OLD", 1:d)
return(ret)
}

#' The basis of Orthogonal Discriminant Projection (ODP)
#'
#' Orthogonal Discriminant Projection (ODP) is a linear dimension reduction
#' method with class supervision. It maximizes weighted difference between local
#' and non-local scatter while local information is also preserved by
#' constructing a neighborhood graph.
#'
#' @param data Numeric matrix or data.frame of the observations, coerced to matrix.
#' @param class The class for each observation, coerced to a factor.
#' @param d Number of dimensions in the projection space.
#' of `class`.
#' @param type A vector specifying the neighborhood graph construction.
#' Expects; `c("knn", k)`, `c("enn", radius)`, or `c("proportion",ratio)`.
#' Defaults to `c("knn", sqrt(nrow(data)))`, nearest neighbors equal to the
#' square root of observations.
#' @param ... Optional, other arguments to pass to \code{\link[Rdimtools:do.odp]{Rdimtools::do.odp}}.
#' preservation arguments.
#' details on `type`.
#' @references
#' Li B, Wang C, Huang D (2009). "Supervised feature extraction based on
#' orthogonal discriminant projection." Neurocomputing, 73(1-3), 191-196.
#' @export
#' @family basis identifiers
#' @examples
#' dat_std <- scale_sd(wine[, 2:6])
#' clas <- wine\$Type
#' basis_odp(data = dat_std, class = clas)
basis_odp <- function(data, class, d = 2, type = c("proportion", 0.1), ...){
ret <- Rdimtools::do.odp(X = as.matrix(data),
label = as.factor(class),
ndim = d,
type = type,
...)\$projection
rownames(ret) <- colnames(data)
colnames(ret) <- paste0("ODP", 1L:d)
return(ret)
}

#' The basis of Orthogonal Neighborhood Preserving Projection (OLPP)
#'
#' Orthogonal Neighborhood Preserving Projection (ONPP) is an unsupervised
#' linear dimension reduction method. It constructs a weighted data graph from
#' LLE method. Also, it develops LPP method by preserving the structure of local
#' neighborhoods. For the more details on `type` see
#'
#' @param data Numeric matrix or data.frame of the observations, coerced to matrix.
#' @param d Number of dimensions in the projection space.
#' @param type A vector specifying the neighborhood graph construction.
#' Expects; `c("knn", k)`, `c("enn", radius)`, or `c("proportion",ratio)`.
#' Defaults to `c("knn", sqrt(nrow(data)))`, nearest neighbors equal to the
#' square root of observations.
#' @return Orthogonal matrix basis that distinguishes the levels of `class`
#' based on local and non-local variation as weighted against the neighborhood
#' graph.
#' details on `type`.
#' @references
#' He X (2005). Locality Preserving Projections. PhD Thesis,
#' University of Chicago, Chicago, IL, USA.
#' @export
#' @family basis identifiers
#' @examples
#' dat_std <- scale_sd(wine[, 2:6])
#' basis_onpp(data = dat_std)
basis_onpp <- function(data, d = 2, type = c("knn", sqrt(nrow(data)))){
ret <- Rdimtools::do.onpp(X = as.matrix(data),
ndim = d,
type = type)\$projection
rownames(ret) <- colnames(data)
colnames(ret) <- paste0("ONPP", 1L:d)
return(ret)
}

#' Solve for the last basis of a guided tour.
#'
#' Performs simulated annealing on the index function, solving for it's local
#' extrema. Returns only the last identified basis of the optimization. A
#' truncated, muted extension of tourr::save_history(guided_tour())).
#'
#' @param data Numeric matrix or data.frame of the observations.
#' @param index_f The index function to optimize.
#' `{tourr}` exports `holes()`, `cmass()`, and `lda_pp(class)`.
#' @param d Number of dimensions in the projection space.
#' @param ... Optional, other arguments to pass to
#' @return Numeric matrix of the last basis of a guided tour.
#' arguments.
#' @export
#' @family basis identifiers
#' @examples
#' dat_std <- scale_sd(wine[, 2:6])
#' basis_guided(data = dat_std, index_f = tourr::holes())
#'
#' basis_guided(data = dat_std, index_f = tourr::cmass(),
#'              alpha = .4, cooling = .9, max.tries = 10, n_sample = 4)
basis_guided <- function(data, index_f = tourr::holes(), d = 2, ...){
.mute <- utils::capture.output(
hist <- tourr::save_history(
data,
tourr::guided_tour(index_f = index_f, d = d, ...))
)
ret <- matrix(hist[,, length(hist)], ncol = d)
rownames(ret) <- colnames(data)
return(ret)
}

#' Create a basis that gives uniform contribution in a circle
#'
#' Orthonormalizes uniform variable contributions on a unit circle. This
#' serves as a NULL basis, one that is variable agnostic while spacing the
#' variables to have minimize variable dependence.
#'
#' @param data The data to create a basis for.
#' @export
#' @family basis identifiers
#' @examples
#' dat_std <- scale_sd(wine[, 2:6])
#' bas <- basis_half_circle(dat_std)
basis_half_circle <- function(data){
pp1 <- ncol(data) + 1L ## p++
arc <- seq(0L, pi, length.out = pp1)[-pp1]
u_circ <- as.matrix(data.frame(y1 = sin(arc), y2 = cos(arc)))
bas <- tourr::orthonormalise(u_circ)
rownames(bas) <- colnames(data)
colnames(bas) <- paste0("half_circ", 1L:2L)
return(bas)
}

#' Suggest a manipulation variable.
#'
#' Find the column number of the variable with the `rank`-ith largest
#' contribution of the `basis`.
#' Useful for identifying a variable to change the contribution of in a manual
#' tour, it's `manip_var` argument.
#'
#' @param basis Numeric matrix (p x d), orthogonal liner combinations of the
#' variables.
#' @param rank The number, specifying the variable with the `rank`-th largest
#' contribution. Defaults to 1.
#' @return Numeric scalar, the column number of a variable.
#' @export
#' @family manual tour
#' @examples
#' ## Setup
#' dat_std <- scale_sd(wine[, 2:6])
#' bas <- basis_pca(dat_std)
#'
#' manip_var_of(basis = bas) ## Variable with the largest contribution
#' manip_var_of(basis = bas, rank = 5) ## Variable with 5th-largest contribution
manip_var_of <- function(basis, rank = 1){
if(spinifex::is_orthonormal(basis) == FALSE)
warning("Supplied basis isn't orthonormal.")
row_norm <- sqrt(apply(basis, 1L, function(c) {sum(c^2L)}))
ret <- order(abs(row_norm), decreasing = TRUE)[rank]
names(ret) <- rownames(basis)[rank]
return(ret)
}

#' Preprocess numeric variables
#'
#' Centers and scales each column by standard deviation (sd) or to the
#' interval (0, 1).
#'
#' @param data Numeric matrix or data.frame of the observations.
#' @export
#' @examples
#' scale_sd(data = wine[, 2:6])
scale_sd <- function(data){
if(is.null(dim(data))) data <- matrix(data) ## As columnar matrix
return(apply(data, 2L, function(c){(c - mean(c)) / stats::sd(c)}))
}

#' @rdname scale_sd
#' @export
#' @examples
#' scale_01(data = wine[, 2:6])
scale_01 <- function(data){
if(is.null(dim(data))) data <- matrix(data) ## As columnar matrix
return(apply(data, 2L, function(c) (c - min(c)) / diff(range(c))))
}

# ## <<experimental>> Mutes verbose functions, without suppressing warnings or error,
# ## wrapper function for .mute <- capture.output(x <- value)
# #' @examples
# #' ## mute assignment
# #' mute(gt <- tourr::save_history(mtcars, max_bases = 3))
# mute <- function(...){
#   .mute <- capture.output(
#     ret <- for (i in seq_len(...length())) {
#       out <- withVisible(...elt(i))
#       if (out\$visible)
#         print(out\$value)
#     }
#   )
# }

#' A wrapper muting the text byproduct of
#'
#' @inheritParams tourr::save_history
#' @param verbose Whether or not to suppress the text output byproduct from
#' `tourr::save_history()`. Defaults to FALSE.
#' @export
#' @examples
#' tour_path <- save_history(data = wine[, 2:6], grand_tour(), max_bases = 10)
#' dim(tour_path)
save_history <- function(..., verbose = FALSE){
if(verbose == FALSE){
.mute <- utils::capture.output(
ret <- tourr::save_history(...))
} else ret <- tourr::save_history(...)

return(ret)
}
```

## Try the spinifex package in your browser

Any scripts or data that you put into this service are public.

spinifex documentation built on Sept. 28, 2021, 5:09 p.m.