tkmeans: Low Memory Use Trimmed K-Means

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#'@importFrom Rcpp sourceCpp
#'@useDynLib lowmemtkmeans
NULL

#'@title Calculates BIC for a given clustering.
#'@description
#'Computes Bayesian information criterion for a given clustering of a data set.
#'@details
#'Bayesian information criterion (BIC) is calculated using the formula, BIC =  -2 * log(L) + k*log(n).
#'k is the number of free parameters, in this case is m*k + k - 1.
#'n is the number of observations (rows of data).
#'L is the liklihood for the given set of cluster centres.
#'
#'@param data a matrix (n x m). Rows are observations, columns are predictors.
#'@param centres matrix of cluster means (k x m), where k is the number of clusters.
#'@return BIC value
#'@examples
#'iris_mat <- as.matrix(iris[,1:4])
#'iris_centres2 <- tkmeans(iris_mat, 2 , 0.1, c(1,1,1,1), 1, 10, 0.001) # 2 clusters
#'iris_centres3 <- tkmeans(iris_mat, 3 , 0.1, c(1,1,1,1), 1, 10, 0.001) # 3 clusters
#'cluster_BIC(iris_mat, iris_centres2)
#'cluster_BIC(iris_mat, iris_centres3)
#'@export
cluster_BIC <- function(data, centres) {
    .Call('_lowmemtkmeans_cluster_BIC', PACKAGE = 'lowmemtkmeans', data, centres)
}

#'@title Trimmed k-means clustering
#'@description
#'Performs trimmed k-means clustering algorithm [1] on a matrix of data. Each row  in the data is an observation, each column is  a variable.
#'For optimal use columns should be scaled to have the same means and variances using \code{scale_mat_inplace}.
#'@details
#'k is the number of clusters. alpha is the proportion of data that will be excluded in the clustering.
#'
#'Algorithm will halt if either maximum number of iterations is reached or the change between iterations drops below tol.
#'
#'When n_starts is greater than 1, the algorithm will run multiple times and the result with the best BIC will be returned.
#'The centres are intialised by picking k observations.
#'
#'The function only returns the k cluster centres. To calculate the nearest cluster centre for each observation use the function \code{nearest_cluster}.
#'
#'@param M matrix (n x m). Rows are observations, columns are predictors.
#'@param k number of clusters
#'@param alpha proportion of data to be trimmed
#'@param weights weightings for variables (columns).
#'@param nstart number of restarts
#'@param iter maximum number of iterations
#'@param tol criteria for algorithm convergence
#'@param verbose If true will output more information on algorithm progress.
#'@return Returns a matrix of cluster means (k x m).
#'@references
#' [1] Garcia-Escudero, Luis A.; Gordaliza, Alfonso; Matran, Carlos; Mayo-Iscar, Agustin. A general trimming approach to robust cluster Analysis. Ann. Statist. 36 (2008), no. 3, 1324--1345.
#'@examples
#'iris_mat <- as.matrix(iris[,1:4])
#'scale_params<-scale_mat_inplace(iris_mat)
#'iris_cluster<- tkmeans(iris_mat, 2 , 0.1, c(1,1,1,1), 1, 10, 0.001) # 2 clusters
#'@export
tkmeans <- function(M, k, alpha, weights, nstart = 1L, iter = 10L, tol = 0.0001, verbose = FALSE) {
    .Call('_lowmemtkmeans_tkmeans', PACKAGE = 'lowmemtkmeans', M, k, alpha, weights, nstart, iter, tol, verbose)
}

#'@title Rescales a matrix in place.
#'@description
#'Recales matrix so that each column has a mean of 0 and a standard deviation of 1.
#'The original matrix is overwritten in place. The function returns the means and standard deviations of each column used to rescale it.
#'@details
#'The key advantage of this method is that it can be applied to very large matrices without having to make a second copy in memory and the orginal can still be restored using the saved information.
#'
#'@param M matrix of data (n x m)
#'@return Returns a matrix of size (2 x m). The first row contains the column means. The second row contains the column standard dveiations. NOTE: The original matrix, M, is overwritten.
#'@examples
#'m = matrix(rnorm(24, 1, 2),4, 6)
#'scale_params = scale_mat_inplace(m)
#'sweep(sweep(m,2,scale_params[2,],'*'),2,scale_params [1,], '+') # orginal matrix restored
#'@export
scale_mat_inplace <- function(M) {
    .Call('_lowmemtkmeans_scale_mat_inplace', PACKAGE = 'lowmemtkmeans', M)
}

#'@title Allocates each rw (observation) in data to the nearest cluster centre.
#'@description
#'For each observation the euclidean distance to each of the cluster centres is calculated and cluster with the smallest distance is return for that observation.
#'@param data a matrix (n x m) to be clustered
#'@param centres matrix of cluster means (k x m), wher k is the number of clusters.
#'@return vector of cluster allocations, n values ranging from 1 to k.
#'@examples
#'iris_mat <- as.matrix(iris[,1:4])
#'centres<- tkmeans(iris_mat, 3 , 0.2, c(1,1,1,1), 1, 10, 0.001)
#' nearest_cluster(iris_mat, centres)
#'@export
nearest_cluster <- function(data, centres) {
    .Call('_lowmemtkmeans_nearest_cluster', PACKAGE = 'lowmemtkmeans', data, centres)
}