R/knn.R

Defines functions knn

Documented in knn

#' @title k-Nearest-Neighbors Search
#'
#' @description
#' An implementation of k-nearest-neighbor search using single-tree and
#' dual-tree algorithms.  Given a set of reference points and query points, this
#' can find the k nearest neighbors in the reference set of each query point
#' using trees; trees that are built can be saved for future use.
#'
#' @param algorithm Type of neighbor search: 'naive', 'single_tree',
#'   'dual_tree', 'greedy'.  Default value "dual_tree" (character).
#' @param epsilon If specified, will do approximate nearest neighbor search
#'   with given relative error.  Default value "0" (numeric).
#' @param input_model Pre-trained kNN model (KNNModel).
#' @param k Number of nearest neighbors to find.  Default value "0"
#'   (integer).
#' @param leaf_size Leaf size for tree building (used for kd-trees, vp
#'   trees, random projection trees, UB trees, R trees, R* trees, X trees,
#'   Hilbert R trees, R+ trees, R++ trees, spill trees, and octrees).  Default
#'   value "20" (integer).
#' @param query Matrix containing query points (optional) (numeric
#'   matrix).
#' @param random_basis Before tree-building, project the data onto a random
#'   orthogonal basis.  Default value "FALSE" (logical).
#' @param reference Matrix containing the reference dataset (numeric
#'   matrix).
#' @param rho Balance threshold (only valid for spill trees).  Default
#'   value "0.7" (numeric).
#' @param seed Random seed (if 0, std::time(NULL) is used).  Default value
#'   "0" (integer).
#' @param tau Overlapping size (only valid for spill trees).  Default value
#'   "0" (numeric).
#' @param tree_type Type of tree to use: 'kd', 'vp', 'rp', 'max-rp', 'ub',
#'   'cover', 'r', 'r-star', 'x', 'ball', 'hilbert-r', 'r-plus', 'r-plus-plus',
#'   'spill', 'oct'.  Default value "kd" (character).
#' @param true_distances Matrix of true distances to compute the effective
#'   error (average relative error) (it is printed when -v is specified)
#'   (numeric matrix).
#' @param true_neighbors Matrix of true neighbors to compute the recall (it
#'   is printed when -v is specified) (integer matrix).
#' @param verbose Display informational messages and the full list of
#'   parameters and timers at the end of execution.  Default value "FALSE"
#'   (logical).
#'
#' @return A list with several components:
#' \item{distances}{Matrix to output distances into (numeric matrix).}
#' \item{neighbors}{Matrix to output neighbors into (integer matrix).}
#' \item{output_model}{If specified, the kNN model will be output here
#'   (KNNModel).}
#'
#' @details
#' This program will calculate the k-nearest-neighbors of a set of points using
#' kd-trees or cover trees (cover tree support is experimental and may be slow).
#' You may specify a separate set of reference points and query points, or just
#' a reference set which will be used as both the reference and query set.
#'
#' @author
#' mlpack developers
#'
#' @export
#' @examples
#' # For example, the following command will calculate the 5 nearest neighbors
#' # of each point in "input" and store the distances in "distances" and the
#' # neighbors in "neighbors": 
#' 
#' \dontrun{
#' output <- knn(k=5, reference=input)
#' neighbors <- output$neighbors
#' distances <- output$distances
#' }
#' 
#' # The output is organized such that row i and column j in the neighbors
#' # output matrix corresponds to the index of the point in the reference set
#' # which is the j'th nearest neighbor from the point in the query set with
#' # index i.  Row j and column i in the distances output matrix corresponds to
#' # the distance between those two points.
knn <- function(algorithm=NA,
                epsilon=NA,
                input_model=NA,
                k=NA,
                leaf_size=NA,
                query=NA,
                random_basis=FALSE,
                reference=NA,
                rho=NA,
                seed=NA,
                tau=NA,
                tree_type=NA,
                true_distances=NA,
                true_neighbors=NA,
                verbose=FALSE) {
  # Create parameters and timers objects.
  p <- CreateParams("knn")
  t <- CreateTimers()
  # Initialize an empty list that will hold all input models the user gave us,
  # so that we don't accidentally create two XPtrs that point to thesame model.
  inputModels <- vector()

  # Process each input argument before calling the binding.
  if (!identical(algorithm, NA)) {
    SetParamString(p, "algorithm", algorithm)
  }

  if (!identical(epsilon, NA)) {
    SetParamDouble(p, "epsilon", epsilon)
  }

  if (!identical(input_model, NA)) {
    SetParamKNNModelPtr(p, "input_model", input_model)
    # Add to the list of input models we received.
    inputModels <- append(inputModels, input_model)
  }

  if (!identical(k, NA)) {
    SetParamInt(p, "k", k)
  }

  if (!identical(leaf_size, NA)) {
    SetParamInt(p, "leaf_size", leaf_size)
  }

  if (!identical(query, NA)) {
    SetParamMat(p, "query", to_matrix(query), TRUE)
  }

  if (!identical(random_basis, FALSE)) {
    SetParamBool(p, "random_basis", random_basis)
  }

  if (!identical(reference, NA)) {
    SetParamMat(p, "reference", to_matrix(reference), TRUE)
  }

  if (!identical(rho, NA)) {
    SetParamDouble(p, "rho", rho)
  }

  if (!identical(seed, NA)) {
    SetParamInt(p, "seed", seed)
  }

  if (!identical(tau, NA)) {
    SetParamDouble(p, "tau", tau)
  }

  if (!identical(tree_type, NA)) {
    SetParamString(p, "tree_type", tree_type)
  }

  if (!identical(true_distances, NA)) {
    SetParamMat(p, "true_distances", to_matrix(true_distances), TRUE)
  }

  if (!identical(true_neighbors, NA)) {
    SetParamUMat(p, "true_neighbors", to_matrix(true_neighbors))
  }

  if (verbose) {
    EnableVerbose()
  } else {
    DisableVerbose()
  }

  # Mark all output options as passed.
  SetPassed(p, "distances")
  SetPassed(p, "neighbors")
  SetPassed(p, "output_model")

  # Call the program.
  knn_call(p, t)

  # Add ModelType as attribute to the model pointer, if needed.
  output_model <- GetParamKNNModelPtr(p, "output_model", inputModels)
  attr(output_model, "type") <- "KNNModel"

  # Extract the results in order.
  out <- list(
      "distances" = GetParamMat(p, "distances"),
      "neighbors" = GetParamUMat(p, "neighbors"),
      "output_model" = output_model
  )


  return(out)
}

Try the mlpack package in your browser

Any scripts or data that you put into this service are public.

mlpack documentation built on Sept. 27, 2023, 1:07 a.m.