# Utils
source("R/criterion.R")
#' Parsing features
#' @description Parse \code{n_features} for forward and backward selection.
#' Handles two cases:
#' \itemize{
#' \item if \code{n_features} is an integer, ensure it lies on (0, total).
#' \item if \code{n_features} is a float, ensure it lies on (0, 1).
#' }
#' @param n_features A numeric value passed to forward or backward selection.
#' Can be a float or integer.
#'
#' @param total Total features in the data (equivalent to number of columns in X)
#'
#' @return Number of features to select. If initial \code{n_features} lies on (0, total_features),
#' it will be returned 'as is'. If \code{n_feature} is passed in as a proportion with range (0,1), it will
#' be converted to an integer representing proportion with respect to the total number of features.
#'
#' @keywords internal
parse_n_features <- function(n_features, total) {
if (n_features > 0 & n_features < 1) {
return(max(1, round(n_features * total)))
} else if (n_features > total) {
msg <- c("If a whole number, `n_features` must be on (0, ", total, ").")
stop(paste0(msg, collapse = ""))
} else {
return(n_features)
}
}
#' Fitter
#' @description Fits data to a linear regression model.
#'
#' @param X_input A 2D matrix of features
#'
#' @param y_input A 1D vector representing the response variable
#'
#' @return A fitted \code{lm()} model
#' @keywords internal
fitter <- function(X_input, y_input) {
X <- X_input
y <- y_input
df <- cbind(as.data.frame(X), as.data.frame(y))
# Fit and return model
m <- stats::lm(y ~ ., data = df)
return(m)
}
#' Fit and Score
#' @description Fits a linear regression model to the data and
#' scores the relative quality of the model using r-squared, aic, or bic.
#'
#' @param S A vector representing the list of selected features in `forward()` and `backward()`
#'
#' @param feature Feature to add or drop, expressed as an integer.
#'
#' @param algorithm Direction of feature selection. One of: 'forward', 'backward'.
#'
#' @param X_train Training data. Represented as a 2D matrix of (observations, features).
#'
#' @param y_train Target class for training data. Represented as a 1D vector of target classes for \code{X_train}.
#'
#' @param X_val Validation data. Represented as a 2D matrix of (observations, features).
#'
#' @param y_val Target class for validation data. Represented as a 1D vector of target classes for \code{X_val}.
#'
#' @param criterion Model selection criterion to measure relative model quality. Can be one of:
#' \itemize{
#' \item 'aic': use Akaike Information Criterion
#' \item 'bic': use Akaike Information Criterion
#' \item 'r-squared': use coefficient of determination
#' }
#'
#' @return Score of the model (as a float).
#' @keywords internal
fit_and_score <- function(S,
feature,
algorithm,
X_train,
y_train,
X_val,
y_val,
criterion) {
if (algorithm == "forward") {
features <- c(S, feature)
} else {
if (is.null(feature)) {
features <- S
} else {
features <- S[S != feature]
}
}
features_to_use <- seq(1, ncol(X_train)) %in% features
X_train_to_use <- X_train[, features_to_use]
X_val_to_use <- X_val[, features_to_use]
# Correct for delightful R behavior that results
# in matrices degenerating to vectors when
# only a single column is extracted.
if (length(features) == 1) {
X_train_to_use <- as.matrix(X_train_to_use)
X_val_to_use <- as.matrix(X_val_to_use)
}
fit <- fitter(X_input = X_train_to_use, y_input = y_train)
if (criterion == "r-squared") {
score <- r_squared(fit_model = fit,
X = X_val_to_use,
y = y_val)
} else if (criterion == "aic") {
score <- aic(model = fit,
X = X_val_to_use,
y = y_val)
} else if (criterion == "bic") {
score <- bic(model = fit,
X = X_val_to_use,
y = y_val)
}
return(score)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.