Nothing
#' @title Create a knn estimator for functional data classification.
#'
#' @description Creates an efficient k nearest neighbor estimator for functional data
#' classification. Currently supported distance measures are all \code{metrics}
#' implemented in \code{\link[proxy]{dist}}
#' and all semimetrics suggested in Fuchs et al. (2015).
#' Additionally, all (semi-)metrics can be used on an arbitrary order of derivation.
#'
#' @param classes [\code{factor(nrow(fdata))}]\cr
#' factor of length \code{nrow(fdata)} containing the classes of the observations.
#' @param fdata [\code{matrix}]\cr
#' matrix containing the functional observations as rows.
#' @param grid [\code{numeric(ncol(fdata))}]\cr
#' numeric vector of length \code{ncol(fdata)} containing the grid on which the functional observations were
#' evaluated.
#' @param knn [\code{integer(1)}]\cr
#' number of nearest neighbors to use in the k nearest neighbor algorithm.
#' @param metric [\code{character(1)}]\cr
#' character string specifying the (semi-)metric to be used.
#' For a an overview of what is available see the
#' \code{method} argument in \code{\link{computeDistMat}}. For a full list
#' execute \code{\link{metricChoices}()}.
#' @param nderiv [\code{integer(1)}]\cr
#' order of derivation on which the metric shall be computed.
#' The default is 0L.
#' @param derived [\code{logical(1)}]\cr
#' Is the data given in \code{fdata} already derived? Default is set to \code{FALSE},
#' which will lead to numerical derivation if \code{nderiv >= 1L} by applying
#' \code{\link[fda]{deriv.fd}} on a \code{\link[fda]{Data2fd}} representation of
#' \code{fdata}.
#' @param deriv.method [\code{character(1)}]\cr
#' character indicate which method should be used for derivation. Currently
#' implemented are \code{"base.diff"}, the default, and \code{"fda.deriv.fd"}.
#' \code{"base.diff"} uses the method \code{base::\link[base]{diff}} for equidistant measures
#' without missing values, which is faster than transforming the data into the
#' class \code{\link[fda]{fd}} and deriving this using \code{fda::\link[fda]{deriv.fd}}.
#' The second variant implies smoothing, which can be preferable for calculating
#' high order derivatives.
#' @param custom.metric [\code{function(x, y, ...)}]\cr
#' only used if \code{deriv.method = "custom.method"}.
#' A function of functional observations
#' \code{x} and \code{y} returning their distance.
#' The default is the L2 distance.
#' See how to implement your distance function in \code{\link[proxy]{dist}}.
#' @param ...
#' further arguments to and from other methods. Hand over additional arguments to
#' \code{\link{computeDistMat}}, usually additional arguments for the specified
#' (semi-)metric. Also, if \code{deriv.method == "fda.deriv.fd"} or
#' \code{fdata} is not observed on a regular grid, additional arguments to
#' \code{\link{fdataTransform}} can be specified which will be passed on to
#' \code{\link[fda]{Data2fd}}.
#'
#' @return \code{classiKnn} returns an object of class \code{"classiKnn"}.
#' This is a list containing at least the
#' following components:
#' \describe{
#' \item{\code{call}}{the original function call.}
#' \item{\code{classes}}{a factor of length nrow(fdata) coding the response of
#' the training data set.}
#' \item{\code{fdata}}{the raw functional data as a matrix with the individual
#' observations as rows.}
#' \item{\code{grid}}{numeric vector containing the grid on which \code{fdata}
#' is observed)}
#' \item{\code{proc.fdata}}{the preprocessed data (missing values interpolated,
#' derived and evenly spaced). This data is \code{this.fdataTransform(fdata)}.
#' See \code{this.fdataTransform} for more details.}
#' \item{\code{knn}}{integer coding the number of nearest neighbors used in the
#' k nearest neighbor classification algorithm.}
#' \item{\code{metric}}{character string coding the distance metric to be used
#' in \code{\link{computeDistMat}}.}
#' \item{\code{nderiv}}{integer giving the order of derivation that is applied
#' to fdata before computing the distances between the observations.}
#' \item{\code{this.fdataTransform}}{preprocessing function taking new data as
#' a matrix. It is used to transform \code{fdata} into \code{proc.fdata} and
#' is required to preprocess new data in order to predict it. This function
#' ensures, that preprocessing (derivation, respacing and interpolation of
#' missing values) is done in the exact same way for the original
#' training data set and future (test) data sets.}
#' }
#'
#' @references
#' Fuchs, K., J. Gertheiss, and G. Tutz (2015):
#' Nearest neighbor ensembles for functional data with interpretable feature selection.
#' Chemometrics and Intelligent Laboratory Systems 146, 186 - 197.
#'
#' @examples
#' # Classification of the Phoneme data
#' data(Phoneme)
#' classes = Phoneme[,"target"]
#'
#' set.seed(123)
#' # Use 80% of data as training set and 20% as test set
#' train_inds = sample(1:nrow(Phoneme), size = 0.8 * nrow(Phoneme), replace = FALSE)
#' test_inds = (1:nrow(Phoneme))[!(1:nrow(Phoneme)) %in% train_inds]
#'
#' # create functional data as matrix with observations as rows
#' fdata = Phoneme[,!colnames(Phoneme) == "target"]
#'
#' # create k = 3 nearest neighbors classifier with L2 distance (default) of the
#' # first order derivative of the data
#' mod = classiKnn(classes = classes[train_inds], fdata = fdata[train_inds,],
#' nderiv = 1L, knn = 3L)
#'
#' # predict the model for the test set
#' pred = predict(mod, newdata = fdata[test_inds,], predict.type = "prob")
#'
#' \dontrun{
#' # Parallelize across 2 CPU's
#' library(parallelMap)
#' parallelStartSocket(cpus = 2L) # parallelStartMulticore(cpus = 2L) for Linux
#' predict(mod, newdata = fdata[test_inds,], predict.type = "prob", parallel = TRUE, batches = 2L)
#' parallelStop()
#' }
#'
#' @seealso \link{predict.classiKnn}
#' @export
classiKnn = function(classes, fdata, grid = 1:ncol(fdata), knn = 1L,
metric = "L2", nderiv = 0L, derived = FALSE,
deriv.method = "base.diff",
custom.metric = function(x, y, ...) {
return(sqrt(sum((x - y) ^ 2)))
},
...) {
# check inputs
if (class(fdata) == "data.frame")
fdata = as.matrix(fdata)
assert_numeric(fdata)
assertClass(fdata, "matrix")
if (is.numeric(classes))
classes = factor(classes)
assertFactor(classes, any.missing = FALSE, len = nrow(fdata))
assertNumeric(grid, any.missing = FALSE, len = ncol(fdata))
assertIntegerish(knn, lower = 1L, upper = nrow(fdata), len = 1)
assertChoice(metric, choices = metricChoices())
assertIntegerish(nderiv, lower = 0L)
assertFlag(derived)
assertChoice(deriv.method, c("base.diff", "fda.deriv.fd"))
# check if data is evenly spaced -> respace
evenly.spaced = isTRUE(all.equal(grid, seq(grid[1], grid[length(grid)],
length.out = length(grid)),
check.attributes = FALSE))
no.missing = !anyMissing(fdata)
# warning message for imputation
if (!no.missing) {
warning("There are missing values in fdata. They will be filled using a spline representation!")
}
# create a model specific preprocessing function for the data
# here the data will be derived, respaced equally and missing values will be filled
this.fdataTransform = fdataTransform(grid = grid,
nderiv = nderiv, derived = derived,
evenly.spaced = evenly.spaced,
no.missing = no.missing,
deriv.method = deriv.method, ...)
proc.fdata = this.fdataTransform(fdata)
# delete the custom.metric function from output if not needed
if (metric != "custom.metric")
custom.metric = character(0)
ret = list(classes = classes,
fdata = fdata,
proc.fdata = proc.fdata,
grid = grid,
knn = knn,
metric = metric,
custom.metric = custom.metric,
nderiv = nderiv,
this.fdataTransform = this.fdataTransform,
call = as.list(match.call(expand.dots = FALSE)))
class(ret) = "classiKnn"
return(ret)
}
#' predict a classiKnn object
#'
#' predict function for a classiKnn object.
#'
#' @param object [\code{classiKnn}]\cr
#' object of class classiKnn to get predictions from
#' @param newdata [\code{data.frame}]\cr
#' (optional) new data to predict from with observations as rows. Do not derive this data,
#' this will be done automatically if required by the model. If \code{NULL},
#' the training data is predicted, currently without using a leave-one-out prediction.
#' @param predict.type [\code{character(1)}]\cr
#' one of 'response' or 'prob', indicating the type of prediction. Choose
#' 'response' to return a vector of length \code{nrow(newdata)} containing the
#' most predicted class.
#' Choose 'prob' to return a matrix with \code{nrow(newdata)} rows containing
#' the probabilities for the classes as columns.
#' @param parallel [\code{logical(1)}]\cr
#' Should the prediction be parallelized?
#' Uses \code{\link[parallelMap]{parallelMap}} for
#' parallelization. See \code{...} for further arguments.
#' @param ... [\code{list}]\cr
#' additional arguments to \link[classiFunc]{computeDistMat}.
#'
#' @seealso classiKnn
#' @export
predict.classiKnn = function(object, newdata = NULL, predict.type = "response",
parallel = FALSE, ...) {
# input checking
if (!is.null(newdata)) {
if (class(newdata) == "data.frame")
newdata = as.matrix(newdata)
assertClass(newdata, "matrix")
newdata = object$this.fdataTransform(newdata)
}
assertChoice(predict.type, c("response", "prob"))
# create distance metric
# note, that additional arguments from the original model are handed over
# to computeDistMat using object$call$...
if (parallel) {
distfun = "parallelComputeDistMat"
} else {
distfun = "computeDistMat"
}
dist.mat = do.call(distfun, c(list(x = object$proc.fdata, y = newdata,
method = object$metric,
custom.metric = object$custom.metric, ...),
object$call$...))
# matrix containing which nearest neighbor the training observation is
# for the new observation
nn.mat = apply(dist.mat, 2, rank)
if (predict.type == "response") {
result = apply(nn.mat, 2, function(x) {
names(which.max(table(object$classes[x < object$knn + 1])))
})
result = factor(result, levels = levels(object$classes))
} else {
# probabilities for the classes
result = t(apply(nn.mat, 2, function(x) {
table(object$classes[x < object$knn + 1]) / object$knn
}))
}
return(result)
}
#' @export
print.classiKnn = function(x, ...) {
cat("\n")
cat("\t classiKnn object \n")
cat("\n")
cat("data: \n")
cat("", length(levels(x$classes)), "classes:", levels(x$classes), "\n")
cat("", nrow(x$fdata), "observations of length", ncol(x$fdata), "\n")
cat("algorithm: \n")
cat(" metric =", x$metric, "\n")
cat(" knn = ", x$knn, "\n")
cat(" nderiv =", x$nderiv, "\n")
cat("\n")
}
#' @export
summary.classiKnn = function(object, ...) {
print(object, ...)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.