R/geometry-topology-and-density-of-manifolds.R

#' Nonlinearity of the 1-NN Classifier.
#'
#' \code{N4} computes the 1-NN error on a test set generated by linear
#' interpolation of points of the same class.
#'
#' @inheritParams F1
#' @param n Number of test points to generate from the input data set.
#' @return The 1-NN error computed on a test set generated by linear
#'  interpolation of points in the input data set.
#' @export

N4 <- function(x, y, n = 100){
  if (!requireNamespace("kknn", quietly = TRUE)){
    stop("Package kknn is required for this function. Please install it.",
         call. = FALSE)
  }
  if (!is.data.frame(x)) x <- as.data.frame(x)
  if (!is.factor(y)) y <- as.factor(y)

  generate_interpolations <- function(x, n){
    x <- as.matrix(x)
    x_size <- num_examples(x)
    x_features <- num_features(x)
    all_pairs <- combn(x_size, 2)
    n_pairs <- all_pairs[, sample(ncol(all_pairs), size = n), drop = FALSE]
    alphas <- runif(n)
    interpolations <- matrix(0, nrow = n, ncol = x_features)
    for (i in seq_len(n)){
      idx_a <- n_pairs[1, i]
      idx_b <- n_pairs[2, i]
      interpolations[i, ] <-
        alphas[i] * x[idx_a, ] + (1 - alphas[i]) * x[idx_b, ]
    }
    as.data.frame(interpolations)
  }

  x_groups <- split(x, y)
  data_size <- num_examples(x)

  test_set <- vector("list", length = length(x_groups))
  for (i in seq_along(x_groups)){
    group_size <- num_examples(x_groups[[i]])
    test_size <- round((group_size / data_size) * n)
    test_set[[i]] <- generate_interpolations(x_groups[[i]], n = test_size)
  }

  test_set_sizes <- vapply(test_set, num_examples, numeric(1))
  y_test <- rep(names(x_groups), times = test_set_sizes)

  test_set <- do.call(rbind, test_set)
  colnames(test_set) <- colnames(x)

  train_set <- cbind(x, class = y)
  preds_1nn <- kknn::kknn(class ~ .,
                          train = train_set,
                          test  = test_set,
                          k = 1,
                          distance = 2,
                          kernel = "rectangular")$fitted.values

  mean(preds_1nn != y_test)
}

#' Average Number of Points per Dimension (T2).
#'
#' \code{T2} computes the average number of points per dimension of the input
#' data set.
#'
#' @inheritParams F1
#' @return The average number of points per dimension of the input data set.
#' @export

T2 <- function(x){
  nrow(x) / ncol(x)
}
RomeroBarata/dcme documentation built on May 9, 2019, 2:24 p.m.