Nothing
#' @title K-Approximate-Nearest-Neighbor Search with LSH
#'
#' @description
#' An implementation of approximate k-nearest-neighbor search with
#' locality-sensitive hashing (LSH). Given a set of reference points and a set
#' of query points, this will compute the k approximate nearest neighbors of
#' each query point in the reference set; models can be saved for future use.
#'
#' @param bucket_size The size of a bucket in the second level hash.
#' Default value "500" (integer).
#' @param hash_width The hash width for the first-level hashing in the LSH
#' preprocessing. By default, the LSH class automatically estimates a hash
#' width for its use. Default value "0" (numeric).
#' @param input_model Input LSH model (LSHSearch).
#' @param k Number of nearest neighbors to find. Default value "0"
#' (integer).
#' @param num_probes Number of additional probes for multiprobe LSH; if 0,
#' traditional LSH is used. Default value "0" (integer).
#' @param projections The number of hash functions for each tabl. Default
#' value "10" (integer).
#' @param query Matrix containing query points (optional) (numeric
#' matrix).
#' @param reference Matrix containing the reference dataset (numeric
#' matrix).
#' @param second_hash_size The size of the second level hash table.
#' Default value "99901" (integer).
#' @param seed Random seed. If 0, 'std::time(NULL)' is used. Default
#' value "0" (integer).
#' @param tables The number of hash tables to be used. Default value "30"
#' (integer).
#' @param true_neighbors Matrix of true neighbors to compute recall with
#' (the recall is printed when -v is specified) (integer matrix).
#' @param verbose Display informational messages and the full list of
#' parameters and timers at the end of execution. Default value
#' "getOption("mlpack.verbose", FALSE)" (logical).
#'
#' @return A list with several components:
#' \item{distances}{Matrix to output distances into (numeric matrix).}
#' \item{neighbors}{Matrix to output neighbors into (integer matrix).}
#' \item{output_model}{Output for trained LSH model (LSHSearch).}
#'
#' @details
#' This program will calculate the k approximate-nearest-neighbors of a set of
#' points using locality-sensitive hashing. You may specify a separate set of
#' reference points and query points, or just a reference set which will be used
#' as both the reference and query set.
#'
#' @author
#' mlpack developers
#'
#' @export
#' @examples
#' # For example, the following will return 5 neighbors from the data for each
#' # point in "input" and store the distances in "distances" and the neighbors
#' # in "neighbors":
#'
#' \dontrun{
#' output <- lsh(k=5, reference=input)
#' distances <- output$distances
#' neighbors <- output$neighbors
#' }
#'
#' # The output is organized such that row i and column j in the neighbors
#' # output corresponds to the index of the point in the reference set which is
#' # the j'th nearest neighbor from the point in the query set with index i.
#' # Row j and column i in the distances output file corresponds to the distance
#' # between those two points.
#' #
#' # Because this is approximate-nearest-neighbors search, results may be
#' # different from run to run. Thus, the "seed" parameter can be specified to
#' # set the random seed.
#' #
#' # This program also has many other parameters to control its functionality;
#' # see the parameter-specific documentation for more information.
lsh <- function(bucket_size=NA,
hash_width=NA,
input_model=NA,
k=NA,
num_probes=NA,
projections=NA,
query=NA,
reference=NA,
second_hash_size=NA,
seed=NA,
tables=NA,
true_neighbors=NA,
verbose=getOption("mlpack.verbose", FALSE)) {
# Create parameters and timers objects.
p <- CreateParams("lsh")
t <- CreateTimers()
# Initialize an empty list that will hold all input models the user gave us,
# so that we don't accidentally create two XPtrs that point to thesame model.
inputModels <- vector()
# Process each input argument before calling the binding.
if (!identical(bucket_size, NA)) {
SetParamInt(p, "bucket_size", bucket_size)
}
if (!identical(hash_width, NA)) {
SetParamDouble(p, "hash_width", hash_width)
}
if (!identical(input_model, NA)) {
SetParamLSHSearchPtr(p, "input_model", input_model)
# Add to the list of input models we received.
inputModels <- append(inputModels, input_model)
}
if (!identical(k, NA)) {
SetParamInt(p, "k", k)
}
if (!identical(num_probes, NA)) {
SetParamInt(p, "num_probes", num_probes)
}
if (!identical(projections, NA)) {
SetParamInt(p, "projections", projections)
}
if (!identical(query, NA)) {
SetParamMat(p, "query", to_matrix(query), TRUE)
}
if (!identical(reference, NA)) {
SetParamMat(p, "reference", to_matrix(reference), TRUE)
}
if (!identical(second_hash_size, NA)) {
SetParamInt(p, "second_hash_size", second_hash_size)
}
if (!identical(seed, NA)) {
SetParamInt(p, "seed", seed)
}
if (!identical(tables, NA)) {
SetParamInt(p, "tables", tables)
}
if (!identical(true_neighbors, NA)) {
SetParamUMat(p, "true_neighbors", to_matrix(true_neighbors))
}
if (!identical(verbose, FALSE)) {
SetParamBool(p, "verbose", verbose)
}
# Mark all output options as passed.
SetPassed(p, "distances")
SetPassed(p, "neighbors")
SetPassed(p, "output_model")
# Call the program.
lsh_call(p, t)
# Add ModelType as attribute to the model pointer, if needed.
output_model <- GetParamLSHSearchPtr(p, "output_model", inputModels)
attr(output_model, "type") <- "LSHSearch"
# Extract the results in order.
out <- list(
"distances" = GetParamMat(p, "distances"),
"neighbors" = GetParamUMat(p, "neighbors"),
"output_model" = output_model
)
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.