R/rkNN.R

#' Calculate the majority vote of kNN for a random subset of size mtry of features
#'
#' @param x is an object
#' @return y is the output
#' @export

rkNN <- function(dat.train, y.train, n.train,
                 dat.test, n.test,
                 k = 5, mtry = NULL, distances = NULL,
                 cores = 2, euclidean = FALSE){

  n.features <- dim(dat.train)[2]

  if(is.null(distances)){
    ind_features <- sample(1:n.features, mtry)

    new_train <- dat.train[, ind_features]
    new_test <- dat.test[, ind_features]

    distances <- distMatrix(mat.train = new_train, itself = FALSE, mat.test = new_train,
                            n.train = n.train, n.test = n.test,
                            cores = cores, euclidean = euclidean)
  }


  labels_kN <- matrix(NA, ncol = 2, nrow = n.test)
  labels_kN <- as.data.frame(labels_kN)
  colnames(labels_kN) <- c("BestGuess", "Confidence")

  for(l in 1:n.test){
    nearest <- order(distances[l, ], decreasing = TRUE)[1:k]
    guesses <- y.train[nearest]

    table_guesses <- table(guesses)
    best_guess <- names(table_guesses[table_guesses == max(table_guesses)])
    labels_kN$BestGuess[l] <- sample(best_guess, 1)
    labels_kN$Confidence[l] <- sum(table_guesses == max(table_guesses))/k
  }

  res <- list(distances = distances,
              prediction = labels_kN$BestGuess,
              confidence = labels_kN$Confidence)

  return(res)

}
thomaswiemann/SMLpractical documentation built on May 28, 2019, 12:23 p.m.