R/RcppExports.R

Defines functions SortOccurence bm_25 sort_vector_with_names idf avg_doc_len normalise1d normalise2d sort_index sorted dotmat dot superCountMatrix superTokenizer superNgrams superSplit

Documented in bm_25 dot dotmat normalise1d normalise2d sort_index

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

superSplit <- function(str, sep = ' ') {
    .Call('_superml_superSplit', PACKAGE = 'superml', str, sep)
}

superNgrams <- function(str, ngram_range, sep = ' ') {
    .Call('_superml_superNgrams', PACKAGE = 'superml', str, ngram_range, sep)
}

superTokenizer <- function(string) {
    .Call('_superml_superTokenizer', PACKAGE = 'superml', string)
}

superCountMatrix <- function(sent, tokens) {
    .Call('_superml_superCountMatrix', PACKAGE = 'superml', sent, tokens)
}

#' @name dot
#' @title Dot product similarity in vectors
#' @description Computes the dot product between two given vectors.
#'
#' @param a numeric vector
#' @param b numeric vector
#' @param norm logical, compute normalised dot product, default=True
#'
#' @return numeric vector containing sdot product score
#' @export
#'
#' @examples
#' a <- runif(5)
#' b <- runif(5)
#' s <- dot(a, b)
#'
dot <- function(a, b, norm = TRUE) {
    .Call('_superml_dot', PACKAGE = 'superml', a, b, norm)
}

#' @name dotmat
#' @title Dot product similarity between a vector and matrix
#' @description Computes the dot product between a vector and a given matrix.
#' The vector returned has a dot product similarity value for each row in the matrix.
#'
#' @param a numeric vector
#' @param b numeric matrix
#' @param norm logical, compute normalised dot product, default=True
#'
#' @return numeric vector containing dot product scores
#' @export
dotmat <- function(a, b, norm = TRUE) {
    .Call('_superml_dotmat', PACKAGE = 'superml', a, b, norm)
}

sorted <- function(v) {
    .Call('_superml_sorted', PACKAGE = 'superml', v)
}

#' @name sort_index
#' @title sort_index
#' @description For a given vector, return the indexes of the sorted array and
#' not the sorted array itself.
#'
#' @param vec numeric vector
#' @param ascending logical, order to return (ascending or descending), default = True
#'
#' @return numeric vector containing sorted indexes
#' @export
#'
#' @examples
#' v <- c(10,3,1,4)
#' j <- sort_index(v)
#'
sort_index <- function(vec, ascending = TRUE) {
    .Call('_superml_sort_index', PACKAGE = 'superml', vec, ascending)
}

#' @name normalise2d
#' @title normalise2d
#' @description Normalises a matrix towards unit p norm row wise or column wise. By default, p = 2 is used.
#' To normalise row wise, use axis=0. To normalise column wise, use axis=1.
#' as the square root of sum of square of values in the given vector.
#'
#' @param mat numeric matrix
#' @param pnorm integer value, default value=2
#' @param axis integer (0 or 1), row wise = 0,  column wise = 1
#'
#' @return normalised numeric matrix
#' @export
#'
#' @examples
#' mat <- matrix(runif(12), 3, 4)
#'
#' ## normalise matrix row wise
#' r <- normalise2d(mat, axis=0)
#'
#' ## normalise matrix column wise
#' r <- normalise2d(mat, axis=1)
#'
normalise2d <- function(mat, pnorm = 2L, axis = 1L) {
    .Call('_superml_normalise2d', PACKAGE = 'superml', mat, pnorm, axis)
}

#' @name normalise1d
#' @title normalise1d
#' @description Normalises a 1 dimensional vector towards unit p norm. By default, p = 2 is used.
#' For a given vector, eg: c(1,2,3), norm value is calculated as `x / |x|` where `|x|` is calculated
#' as the square root of sum of square of values in the given vector.
#'
#' @param vec vector containing integers or numeric values.
#' @param pnorm integer, default: 2
#'
#' @return a vector containing normalised values
#' @export
#'
#' @examples
#' val <- c(1,10,5,3,8)
#' norm_val <- normalise1d(val)
#'
normalise1d <- function(vec, pnorm = 2L) {
    .Call('_superml_normalise1d', PACKAGE = 'superml', vec, pnorm)
}

avg_doc_len <- function(ss) {
    .Call('_superml_avg_doc_len', PACKAGE = 'superml', ss)
}

idf <- function(q, corpus) {
    .Call('_superml_idf', PACKAGE = 'superml', q, corpus)
}

sort_vector_with_names <- function(x) {
    .Call('_superml_sort_vector_with_names', PACKAGE = 'superml', x)
}

#' @name bm_25
#' @title BM25 Matching
#' @description BM25 stands for Best Matching 25. It is widely using for ranking documents and a preferred method than TF*IDF scores.
#' It is used to find the similar documents from a corpus, given a new document. It is popularly used in information retrieval systems.
#' This implementation is based on c++ functions hence quite optimised as well.
#'
#' @param document a string for which to find similar documents
#' @param corpus a vector of strings against which document is to be matched
#' @param top_n top n similar documents to find
#'
#' @return a vector containing similar documents and their scores
#' @export
#'
#' @examples
#' docs <- c("chimpanzees are found in jungle",
#'           "chimps are jungle animals",
#'           "Mercedes automobiles are best",
#'           "merc is made in germany",
#'           "chimps are intelligent animals")
#'
#' sentence <- "automobiles are"
#' s <- bm_25(document=sentence, corpus=docs, top_n=2)
#'
bm_25 <- function(document, corpus, top_n) {
    .Call('_superml_bm_25', PACKAGE = 'superml', document, corpus, top_n)
}

SortOccurence <- function(vectors) {
    .Call('_superml_SortOccurence', PACKAGE = 'superml', vectors)
}

Try the superml package in your browser

Any scripts or data that you put into this service are public.

superml documentation built on Nov. 14, 2022, 9:05 a.m.