Nothing
#' @include utilities.R
NULL
#' Enhanced Distance Matrix Computation and Visualization
#' @description Clustering methods classify data samples into groups of similar
#' objects. This process requires some methods for measuring the distance or
#' the (dis)similarity between the observations. Read more:
#' \href{http://www.sthda.com/english/wiki/clarifying-distance-measures-unsupervised-machine-learning}{STHDA
#' website - clarifying distance measures.}. \itemize{ \item get_dist():
#' Computes a distance matrix between the rows of a data matrix. Compared to
#' the standard \code{\link[stats]{dist}}() function, it supports
#' correlation-based distance measures including "pearson", "kendall" and
#' "spearman" methods. \item fviz_dist(): Visualizes a distance matrix }
#' @param x a numeric matrix or a data frame.
#' @param method the distance measure to be used. This must be one of
#' "euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski",
#' "pearson", "spearman" or "kendall".
#' @param stand logical value; default is FALSE. If TRUE, then the data will be
#' standardized using the function scale(). Measurements are standardized for
#' each variable (column), by subtracting the variable's mean value and
#' dividing by the variable's standard deviation.
#' @param ... other arguments to be passed to the function dist() when using get_dist().
#' @return \itemize{ \item get_dist(): returns an object of class "dist". \item
#' fviz_dist(): returns a ggplot2 }
#' @seealso \code{\link[stats]{dist}}
#' @author Alboukadel Kassambara \email{alboukadel.kassambara@@gmail.com}
#' @examples
#' data(USArrests)
#' res.dist <- get_dist(USArrests, stand = TRUE, method = "pearson")
#'
#' fviz_dist(res.dist,
#' gradient = list(low = "#00AFBB", mid = "white", high = "#FC4E07"))
#' @name dist
#' @rdname dist
#' @export
get_dist <- function(x, method = "euclidean", stand = FALSE, ...){
if(stand) x <- scale(x)
if(method %in% c("pearson", "spearman", "kendall")){
res.cor <- stats::cor(t(x), method = method)
res.dist <- stats::as.dist(1 - res.cor, ...)
}
else res.dist <- stats::dist(x, method = method, ...)
res.dist
}
#' @param dist.obj an object of class "dist" as generated by the function dist() or get_dist().
#' @param order logical value. if TRUE the ordered dissimilarity image (ODI) is shown.
#' @param show_labels logical value. If TRUE, the labels are displayed.
#' @param lab_size the size of labels.
#' @param gradient a list containing three elements specifying the colors for low, mid and high values in
#' the ordered dissimilarity image. The element "mid" can take the value of NULL.
#' @rdname dist
#' @export
fviz_dist<- function(dist.obj, order = TRUE, show_labels = TRUE, lab_size = NULL,
gradient = list(low = "red", mid = "white", high = "blue")
)
{
if(!inherits(dist.obj, "dist"))
stop("An object of class dist is required.")
if(order){
res.hc <- stats::hclust(dist.obj, method = "ward.D2")
dist.obj <- as.matrix(dist.obj)[res.hc$order, res.hc$order]
}
else dist.obj <- as.matrix(dist.obj)
rownames(dist.obj ) <- colnames(dist.obj ) <- paste0(rownames(dist.obj), "-")
d <- reshape2::melt(dist.obj)
p <- ggplot(d, aes_string(x = "Var1", y = "Var2"))+
ggplot2::geom_tile(aes_string(fill="value"))
if(is.null(gradient$mid)) p <- p + ggplot2::scale_fill_gradient(low=gradient$low, high=gradient$high)
else p <- p + ggplot2::scale_fill_gradient2(midpoint=mean(dist.obj), low=gradient$low,
mid=gradient$mid, high=gradient$high, space = "Lab")
if(show_labels) p <- p + theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1, size = lab_size),
axis.text.y = element_text(size = lab_size))
else p <- p +
theme(axis.text = element_blank(), axis.ticks = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank())
return(p)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.