R/KNNRLD.R

Defines functions KNNRLD

Documented in KNNRLD

#' Standard K-Nearest Neighbor Regression for Longitudinal Data
#'
#' This function performs KNN regression for longitudinal data without clustering.
#' It predicts longitudinal outcomes for new observations based on the weighted
#' average of their k nearest neighbors in the predictor space.
#'
#' @param xnew A matrix of predictor values for prediction (test set).
#' @param y A matrix or data frame of longitudinal responses (training set).
#' @param x A matrix or data frame of training predictor values.
#' @param k Number of nearest neighbors to use. Can be a scalar or a vector.
#'
#' @return A list of matrices with predicted values for each value of k.
#'         Each matrix has dimensions nrow(xnew) x ncol(y).
#'
#' @examples
#' \donttest{
#' set.seed(123)
#' n <- 30
#' T <- 3
#' d <- 2
#' x <- matrix(runif(n * d), nrow = n)
#' y <- matrix(rnorm(n * T), nrow = n)
#' train_idx <- sample(1:n, 20)
#' test_idx <- setdiff(1:n, train_idx)
#' pred <- KNNRLD(
#'   xnew = x[test_idx, ],
#'   y = y[train_idx, ],
#'   x = x[train_idx, ],
#'   k = 3
#' )
#' head(pred[[1]])
#' }
#'
#' @importFrom Rfast dista colmeans
#'
#' @export
KNNRLD <- function(xnew, y, x, k = 5) {
  
  y <- as.matrix(y)
  x <- as.matrix(x)
  xnew <- as.matrix(xnew)
  
  n_timepoints <- ncol(y)
  n_test <- nrow(xnew)
  k_values <- sort(k)
  n_k <- length(k_values)
  max_k <- max(k_values)
  
  if (max_k > nrow(x)) {
    warning(paste("Maximum k (", max_k, ") exceeds number of training samples (", 
                  nrow(x), "). Reducing to ", nrow(x)))
    max_k <- nrow(x)
    k_values <- k_values[k_values <= max_k]
    n_k <- length(k_values)
  }
  
  distance_result <- Rfast::dista(xnew, x, trans = FALSE, k = max_k, index = TRUE)
  
  predictions <- list()
  
  for (j in 1:n_k) {
    current_k <- k_values[j]
    pred_matrix <- matrix(NA, nrow = n_test, ncol = n_timepoints)
    
    for (i in 1:n_test) {
      neighbor_indices <- distance_result[1:current_k, i]
      pred_matrix[i, ] <- Rfast::colmeans(y[neighbor_indices, , drop = FALSE])
    }
    
    predictions[[j]] <- pred_matrix
  }
  
  names(predictions) <- paste("k =", k_values)
  
  if (n_k == 1) {
    return(predictions)
  }
  
  return(predictions)
}

Try the CKNNRLD package in your browser

Any scripts or data that you put into this service are public.

CKNNRLD documentation built on May 29, 2026, 1:06 a.m.