Nothing
##' Classify using Mahalanobis distance
##'
##' Classifies using Mahalanobis distance
##'
##' The \code{model} argument contains the mean and inverse covariance matrix
##' (or standard deviation if the data is one-dimensional) for each class in
##' the training set as well as the class labels. This function calculates the
##' Mahalanobis distance of each row of \code{data} from each class mean and
##' assigns the label of the closest mean to that row. The result is a vector
##' of labels corresponding to the rows of \code{data}.
##'
##' The Mahalanobis distance between a data point and a class is the Euclidean
##' distance between the point and the class mean divided by the covariance
##' matrix for the class. This means that classes with large covariances will
##' attract data points from a larger area than those with small covariances.
##'
##' @param data A vector or matrix of data
##' @param train A Gaussian model generated by \code{train}.
##' @return A label vector with one element per row of \code{data}
##' @seealso train
##' @references O'Shaughnessy, D. Speech Communication (Addison-Wesley:
##' Reading, MA. 1987)
##' @keywords misc
##' @export mahal
"mahal" <- function(data, train)
{
# if (emu.options("deprecated.warnings")) # commented out because glabal vars don't exist any more
warning("mahal is deprecated, use classify with metric=\"mahal\"\n")
classify( data, train, metric="mahal" )
}
##' bayes lab
##'
##' see function
##'
##'
##' @keywords internal
##' @export bayes.lab
"bayes.lab" <- function(data, train)
{
#if (emu.options("deprecated.warnings")) # commented out because glabal vars don't exist any more
warning("bayes.lab is deprecated, use classify with metric=\"bayes\"\n")
classify( data, train, metric="bayes" )
}
##' bayes dist
##'
##' see function
##'
##'
##' @keywords internal
##' @export bayes.dist
"bayes.dist" <- function(data, train, labels=NULL)
{
#if (emu.options("deprecated.warnings")) # commented out because glabal vars don't exist any more
warning("bayes.dist is deprecated, use distance with metric=\"bayes\"\n")
distance( data, train, labels, metric="bayes")
}
##' Calculate mahalanobis distances
##'
##' Calculates mahalanobis distances
##'
##' The \code{train} function finds the centroids and covariance matrices for a
##' set of data and corresponding labels: one per unique label. This function
##' can be used to find the mahalanobis distance of every data point in a
##' dataset to each of the class centroids. The columns of the resulting
##' matrix are marked with the label of the centroid to which they refer. The
##' function \code{mahal} should be used if you want to find the closest
##' centroid to each data point.
##'
##' @param data A matrix of numerical data points.
##' @param labels A vector of labels..
##' @param train A gaussian model as returned by the \code{train} function.
##' @return A matrix of distances with one column for every class (label) in
##' the gaussian model.
##' @seealso train, mahal, bayes.lab, bayes.dist
##' @keywords misc
##' @export mahal.dist
"mahal.dist" <- function( data, train, labels=NULL )
{
#if (emu.options("deprecated.warnings")) # commented out because glabal vars don't exist any more
#cat("mahal.dist is deprecated, use distance with metric=\"mahal\"\n") # commented out because glabal vars don't exist any more
distance( data, train, labels, metric="mahal")
}
## generalise bayes.dist and mahal.dist
##' distance
##'
##' see function
##'
##'
##' @keywords internal
##' @export distance
"distance" <- function( data, train, labels=NULL, metric="bayes" )
{
## data is a set of data points
## train is the result of the train fn and contains
## $mean - the centroids
## $cov - the covariance matrices
## $invcov - the inverse covariance matrix
## $label - the corresponding labels
## labels - an optional set of labels corresponding to data
## metric - one of "bayes" or "mahal" for bayesian or mahalanobis distance
if(!is.matrix(data)) data <- cbind(data)
ncols <- length(train$label)
ndims <- ncol(data)
probs <- matrix(0, nrow = nrow(data), ncol = ncols)
for(lab in 1:ncols) {
tmp <- (lab - 1) * ndims + 1
tmp1 <- tmp + ndims - 1
cov <- train$cov[tmp:tmp1, ]
invcov <- train$invcov[tmp:tmp1, ]
if ( metric == "bayes" ) {
probs[, lab] <- bayesian.metric(data, train$means[lab, ], cov, invcov)
} else if ( metric == "euclidean" ) {
probs[, lab] <- euclidean.metric(data, train$means[lab, ])
} else if ( metric == "mahal" ) {
probs[, lab] <- mahalanobis.metric(data, train$means[lab, ], invcov)
}
}
dimnames(probs) <- list(labels, train$label)
probs
}
"euclidean.metric" <- function( data, mean )
{
return()
}
##' bayesian metric
##'
##' see function
##'
##'
##' @keywords internal
##' @export bayesian.metric
"bayesian.metric" <- function( data, mean, cov, invcov )
{
# calcuate the gaussian classification metric for multivariate data
# given mean vector and covariance matrix
det <- -log(as.numeric(prod(eigen(cov)$value)))
x.u <- t(data) - mean
pow <- t(x.u) %*% invcov
## this is really pow %*% x.u if x.u were just one point but it isn't
## it's many points so we can't just matrix multiply
return( det - apply(pow * t(x.u), 1, sum) )
}
##' mahalanobis metric
##'
##' see function
##'
##'
##' @keywords internal
##' @export mahalanobis.metric
"mahalanobis.metric" <- function(data, mean, invcov)
{
x.u <- t(data) - mean
pow <- t(x.u) %*% invcov
## this is really pow %*% x.u if x.u were just one point but it isn't
## it's many points so we can't just matrix multiply
return( apply(pow * t(x.u), 1, sum) )
}
##' classify
##'
##' classifies data
##'
##'
##' @param data data to classify
##' @param train training data
##' @param metric bayes or mahal
##' @return The classification matrix.
##' @author Jonathan Harrington
##' @keywords models
##' @examples
##'
##'
##' ## The function is currently defined as
##' function (data, train, metric = "bayes")
##' {
##' probs <- distance(data, train, metric = metric)
##' if (metric == "bayes") {
##' best <- apply(probs, 1, max)
##' }
##' else if (metric == "mahal") {
##' best <- apply(probs, 1, min)
##' }
##' result <- rep("", length(best))
##' for (lab in 1:length(train$label)) {
##' tmp <- probs[, lab] == best
##' result[tmp] <- train$label[lab]
##' }
##' result
##' }
##'
##' @export classify
"classify" <- function(data, train, metric="bayes")
{
probs <- distance(data, train, metric=metric )
## what's best depends on the metric, bayes is a prob. so max is best
## mahal is a distance so min is best
if( metric=="bayes" ) {
best <- apply(probs, 1, max)
} else if ( metric=="mahal" ) {
best <- apply(probs, 1, min)
}
result <- rep("", length(best))
for(lab in 1:length(train$label)) {
tmp <- probs[, lab] == best
result[tmp] <- train$label[lab]
}
result
}
##' bayesplot
##'
##' bayesplot
##'
##'
##' @keywords internal
##' @export bayesplot
bayesplot <- function(data, train, N = 10, ellipse = FALSE,
labs = NULL, xlab="", ylab="", colour = TRUE, ...)
{
## data is the original data, used for scaling
## train is the stuff you get from train()
rx <- range(data[, 1])
ry <- range(data[, 2])
## make a set of points covering the plane 0,1
points <- cbind(sort(rep(1:N/N, N)), rep(1:N/N, N))
## Now scale them to the data
points[, 1] <- points[, 1] * (rx[2] - rx[1]) + rx[1]
points[, 2] <- points[, 2] * (ry[2] - ry[1]) + ry[1]
## now classify each point
blabs <- classify(points, train, metric="bayes")
graphics::plot(points, type = "n", xlim = rx, ylim = ry, xlab=xlab, ylab=ylab)
ulabs <- unique(blabs)
k <- 1
colours <- mu.colour( ulabs, colour, FALSE )$colour
for(j in ulabs) {
temp <- muclass(blabs, j)
graphics::text(points[temp, ], blabs[temp], col = colours[k])
k <- k + 1
}
if(ellipse && !is.null(labs) ) {
oldpar = graphics::par(new = TRUE)
on.exit(graphics::par(oldpar))
eplot(data, labs, xlim = rx, ylim = ry, colour=colour, ...)
}
}
##' Train a Gaussian Model
##'
##' Trains a Gaussian Model
##'
##' This function is used to train a gaussian model on a data set. The result
##' can be passed to either the \code{mahal} or \code{bayes.lab} functions to
##' classify either the training set (\code{x}) or a test set with the same
##' number of dimensions. Train simply finds the mean and inverse covariance
##' matrix/standard deviation for the data corresponding to each unique label
##' in labs.
##'
##' @param x A data vector or matrix.
##' @param lab A vector of labels parallel to \code{x}. If missing, all data is
##' assumed to be from the same class.
##' @return A structure with the following components:
##'
##' \item{label}{ The unique labels in \code{lab}. } \item{means}{ The means
##' for each dimension per unique label. } \item{cov}{ The combined covariance
##' matrixes for each unique label. The matrixes are joined with \code{rbind}.
##' If the input data is one-dimensional, this is just the standard deviation
##' of the data. } \item{invcov}{ The combined inverse covariance matrixes for
##' each unique label. The matrixes are joined with \code{rbind}. If the input
##' data is one-dimensional, this is just the reciprocal of the standard
##' deviation of the data. }
##' @seealso mahal, bayes.lab, mahalplot, bayes.plot
##' @keywords misc
##' @export train
"train"<- function(x, lab=rep("x",nrow(x)))
{
mat <- NULL
if(is.matrix(x)){
summeanvals <- NULL
sumcovvals <- NULL
sumcovvals.inv <- NULL
for(j in unique(lab)) {
temp <- lab == j
# can only do this if there's more than one row
if( sum( temp ) == 1 ) {
## what to do??
## can't do anything sensible so barf and tell them why
stop( "\n\tData passed to train has only one entry for one of the labels.\n\tA gaussian model can't be generated for this data." )
}
values <- x[temp,]
meanvals <- apply(values, 2, mean)
covvals <- var(values, values)
covvals.inv <- solve(covvals)
summeanvals <- rbind(summeanvals, meanvals)
sumcovvals <- rbind(sumcovvals, covvals)
sumcovvals.inv <- rbind(sumcovvals.inv, covvals.inv)
}
mat$label <- unique(lab)
mat$means <- summeanvals
mat$cov <- sumcovvals
mat$invcov <- sumcovvals.inv
} else { # the one dimensional case
mat <- NULL
mat$means <- NULL
mat$cov <- NULL
for(j in unique(lab)) {
# cat("data for ", j, " \n" )
temp <- lab == j
mat$means <- c(mat$means, mean(x[temp]))
mat$cov <- c(mat$cov, sqrt(var(x[temp])))
}
mat$label <- unique(lab)
mat$invcov <- 1/mat$cov # in fact this won't be used in the 1d case
}
mat
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.