Nothing
###########################################################################
# BASKIN-ROBBINS
# Mapper flavors
###########################################################################
# 1D Mapper ---------------------------------------------------------------
#
# a flavor of Mapper based on projection to a single coordinate
#' One-Dimensional Mapper
#'
#' Run Mapper using a one-dimensional filter, a cover of the codomain of intervals, and a clusterer.
#'
#' @param data A data frame.
#' @param dists A distance matrix associated to the data frame. Can be a `dist` object or `matrix`.
#' @param filtered_data The result of a function applied to the data frame; there should be one filter value per observation in the original data frame.
#' These values need to be named, and the names of these values must match the names of the original data set.
#' @param cover An \eqn{n \times 2} `matrix` of interval left and right endpoints; rows should be intervals and columns left and right endpoints (in that order).
#' @param clusterer A function which accepts a list of distance matrices as input, and returns the results of clustering done on each distance matrix;
#' that is, it should return a list of named vectors, whose name are the names of data points and whose values are cluster assignments (integers).
#' If this value is omitted, then trivial clustering will be done.
#'
#' @return A `list` of two data frames, `nodes` and `edges`, which contain information about the Mapper graph constructed from the given parameters.
#'
#' The node data frame consists of:
#'
#' - `id`: vertex ID
#' - `cluster_size`: number of data points in vertex
#' - `mean_dist_to_medoid`: mean distance to medoid of vertex
#' - `data`: names of data points in cluster
#' - `patch`: level set ID
#'
#' The `edge` data frame contains consists of:
#'
#' - `source`: vertex ID of edge source
#' - `target`: vertex ID of edge target
#' - `weight`: Jaccard index of edge; this is the size of the intersection between the vertices divided by the union
#' - `overlap_data`: names of data points in overlap
#' - `overlap_size`: number of data points overlap
#'
#' @export
#' @examples
#' # Create noisy circle data
#' data = data.frame(x = sapply(1:100, function(x) cos(x)), y = sapply(1:100, function(x) sin(x)))
#'
#' # Project to horizontal axis as lens
#' projx = data$x
#' names(projx) = row.names(data)
#'
#' # Create a one-dimensional cover
#' num_bins = 10
#' percent_overlap = 25
#' cover = create_width_balanced_cover(min(projx), max(projx), num_bins, percent_overlap)
#'
#' # Build Mapper object
#' create_1D_mapper_object(data, dist(data), projx, cover)
create_1D_mapper_object <- function(data,
dists,
filtered_data,
cover,
clusterer = global_hierarchical_clusterer("single", dists)) {
if (!all(cover[, 1] - cover[, 2] <= 0)) {
stop("left endpoints must be less than or equal to right endpoints")
}
cover = apply(cover, 1, check_in_interval)
return(create_mapper_object(data, dists, filtered_data, cover, clusterer = clusterer))
}
# Ball Mapper --------------------------------------------------------------
#
# a flavor of Mapper all about the balls
#' Convert Balls to Clusters
#'
#' Perform trivial clustering on a set of balled data.
#'
#' @param bins A `list` of bins, each containing a named vector of data points.
#'
#' @return A named vector whose names are data point names and whose values are cluster labels (`integer`s).
#' @noRd
convert_to_clusters <- function(bins) {
ball_sizes = lapply(bins, length)
# repeat the cluster id for as many data points belonging to that bin
ballball_data = unlist(mapply(function(x, y)
rep(x, y), 1:length(ball_sizes), ball_sizes))
# make sure names match up
names(ballball_data) = unlist(bins)
return(ballball_data)
}
#' Ball Mapper
#'
#' Run Mapper using the identity function as a lens and an \eqn{\varepsilon}-net cover, greedily generated using a distance matrix.
#'
#' @param data A data frame.
#' @param dists A distance matrix for the data frame. Can be a `dist` object or a `matrix`.
#' @param eps A positive real number for the desired ball radius.
#' @return A `list` of two data frames, `nodes` and `edges`, which contain information about the Mapper graph constructed from the given parameters.
#'
#' The node data frame consists of:
#'
#' - `id`: vertex ID
#' - `cluster_size`: number of data points in vertex
#' - `mean_dist_to_medoid`: mean distance to medoid of vertex
#' - `data`: names of data points in cluster
#'
#' The `edge` data frame contains consists of:
#'
#' - `source`: vertex ID of edge source
#' - `target`: vertex ID of edge target
#' - `weight`: Jaccard index of edge; this is the size of the intersection between the vertices divided by the union
#' - `overlap_data`: names of data points in overlap
#' - `overlap_size`: number of data points overlap
#'
#' @export
#' @examples
#' # Create noisy cirle data set
#' data = data.frame(x = sapply(1:100, function(x) cos(x)), y = sapply(1:100, function(x) sin(x)))
#'
#' # Set ball radius
#' eps = .5
#'
#' # Create Mapper object
#' create_ball_mapper_object(data, dist(data), eps)
create_ball_mapper_object <- function(data, dists, eps) {
if (!is.data.frame(data)) {
stop("input data needs to be a data frame.")
} else if (!is.numeric(eps)) {
stop("epsilon needs to be a number")
} else if (eps <= 0) {
stop("epsilon needs to be positive")
}
if (any(is.na(dists))) {
stop("no distance value can be NA")
}
balled_data = create_balls(data, dists, eps)
ball_mapper_object = assemble_mapper_object(convert_to_clusters(balled_data), dists, binning = FALSE)
return(ball_mapper_object)
}
# clusterball Mapper ------------------------------------------------------
#
# a flavor of Mapper that's just clustering in the balls of ball Mapper
#' ClusterBall Mapper
#'
#' Run Ball Mapper, but non-trivially cluster within the balls. You can use two different distance matrices to for the balling and clustering.
#'
#' @param data A data frame.
#' @param dist1 A distance matrix for the data frame; this will be used to ball the data. It can be a `dist` object or a `matrix`.
#' @param dist2 Another distance matrix for the data frame; this will be used to cluster the data after balling. It can be a `dist` object or a `matrix`.
#' @param eps A positive real number for the desired ball radius.
#' @param clusterer A function which accepts a list of distance matrices as input, and returns the results of clustering done on each distance matrix;
#' that is, it should return a list of named vectors, whose name are the names of data points and whose values are cluster assignments (integers).
#' If this value is omitted, then single-linkage clustering will be done (and a cutting height will be decided for you).
#' @return A `list` of two data frames, `nodes` and `edges`, which contain information about the Mapper graph constructed from the given parameters.
#'
#' The node data frame consists of:
#'
#' - `id`: vertex ID
#' - `cluster_size`: number of data points in vertex
#' - `mean_dist_to_medoid`: mean distance to medoid of vertex
#' - `data`: names of data points in cluster
#' - `patch`: level set ID
#'
#' The `edge` data frame contains consists of:
#'
#' - `source`: vertex ID of edge source
#' - `target`: vertex ID of edge target
#' - `weight`: Jaccard index of edge; this is the size of the intersection between the vertices divided by the union
#' - `overlap_data`: names of data points in overlap
#' - `overlap_size`: number of data points overlap
#'
#' @export
#' @examples
#' # Create noisy circle data set
#' data = data.frame(x = sapply(1:100, function(x) cos(x)), y = sapply(1:100, function(x) sin(x)))
#' data.dists = dist(data)
#'
#' # Set ball radius
#' eps = 1
#'
#' # Do single-linkage clustering in the balls to produce Mapper graph
#' create_clusterball_mapper_object(data, data.dists, data.dists, eps)
create_clusterball_mapper_object <- function(data, dist1, dist2, eps, clusterer = local_hierarchical_clusterer("single")) {
if (!is.data.frame(data)) {
stop("input data needs to be a data frame.")
} else if (!is.numeric(eps)) {
stop("epsilon needs to be a number")
} else if (eps <= 0) {
stop("epsilon needs to be positive")
}
if ((any(is.na(dist1))) | (any(is.na(dist2)))) {
stop("no distance value can be NA")
}
balls = create_balls(data, dist1, eps)
projection = row.names(data)
names(projection) = row.names(data) # label everything just trust me ok
return(create_mapper_object(
data,
dist2,
projection,
lapply(balls, is_in_ball),
clusterer = clusterer
))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.