inst/doc/Extending.R

params <-
list(family = "red")

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment  = "#>",
  fig.width = 7,
  fig.height = 5
)
# Load necessary packages for the examples
# Ensure 'multiblock' package itself is loaded for testing vignettes,
# e.g., via devtools::load_all() or library(multiblock)
library(tibble)
library(dplyr)
library(stats) # For cancor
library(glmnet) # For glmnet example
library(multivarious)
# Helper k-fold function (replace with package internal if available)
kfold_split <- function(n, k = 5) {
  idx <- sample(rep(1:k, length.out = n))
  lapply(1:k, function(j) list(train = which(idx != j),
                               test  = which(idx == j)))
}

## ----data_cca-----------------------------------------------------------------
data(iris)
X <- as.matrix(iris[, 1:2])
Y <- as.matrix(iris[, 3:4])

# Show first few rows of combined data
head(cbind(X, Y))

## ----fit_cca_wrapper----------------------------------------------------------
fit_cca <- function(Xtrain, Ytrain, ncomp = 2, ...) {
  # Step 1: Define and fit preprocessors with training data
  # Use center()+z-score for both blocks (swap to center() only if preferred)
  preproc_x <- fit(colscale(center(), type = "z"), Xtrain)
  preproc_y <- fit(colscale(center(), type = "z"), Ytrain)

  # Step 2: Transform training data with the fitted preprocessors
  Xp <- transform(preproc_x, Xtrain)
  Yp <- transform(preproc_y, Ytrain)

  # Step 3: Run CCA on the preprocessed data (no additional centering here)
  cc <- stats::cancor(Xp, Yp, xcenter = FALSE, ycenter = FALSE)

  # Step 4: Store loadings and preprocessors in a cross_projector
  cp <- cross_projector(
    vx         = cc$xcoef[, 1:ncomp, drop = FALSE],
    vy         = cc$ycoef[, 1:ncomp, drop = FALSE],
    preproc_x  = preproc_x,
    preproc_y  = preproc_y,
    classes    = "cca_cross_projector"
  )
  attr(cp, "can_cor") <- cc$cor[1:ncomp]
  cp
}

# Fit the model
cp_cca <- fit_cca(X, Y)
print(cp_cca)
attr(cp_cca, "can_cor") # Show canonical correlations

## ----transfer_cca-------------------------------------------------------------
Y_hat <- transfer(cp_cca, X, from = "X", to = "Y")
head(round(Y_hat, 2))

measure_reconstruction_error(Y, Y_hat, metrics = c("rmse", "r2"))

## ----partial_project_cca------------------------------------------------------
new_x_partial <- X[, 1, drop = FALSE]

scores_partial <- partial_project(cp_cca, new_x_partial,
                                  colind = 1,
                                  source = "X")
head(round(scores_partial, 3))

## ----cv_cca, eval=FALSE-------------------------------------------------------
# set.seed(1)
# folds <- kfold_split(nrow(X), k = 5)
# 
# cv_fit <- function(train_data, ncomp) {
#   fit_cca(train_data$X, train_data$Y, ncomp = ncomp)
# }
# 
# cv_measure <- function(model, test_data) {
#   measure_interblock_transfer_error(
#     test_data$X, test_data$Y, model,
#     metrics = c("x2y.rmse", "y2x.rmse")
#   )
# }
# 
# cv_res <- cv_generic(
#   data = list(X = X, Y = Y),
#   folds = folds,
#   .fit_fun = cv_fit,
#   .measure_fun = cv_measure,
#   fit_args = list(ncomp = 2)
# )

## ----perm_test_cca, eval=FALSE------------------------------------------------
# perm_res <- perm_test(
#   cp_cca, X, Y = Y,
#   nperm = 199,
#   alternative = "less"
# )
# print(perm_res)

## ----setup_glmnet-------------------------------------------------------------
# Generate sample data
set.seed(123)
n_obs <- 100
n_pred <- 50
X_glm <- matrix(rnorm(n_obs * n_pred), n_obs, n_pred)
# True coefficients (sparse)
true_beta <- matrix(0, n_pred, 1)
true_beta[1:10, 1] <- runif(10, -1, 1)
# Response variable with noise
y_glm <- X_glm %*% true_beta + rnorm(n_obs, sd = 0.5)

# Fit glmnet (LASSO, alpha=1)
# Typically use cv.glmnet to find lambda, but using a fixed one for simplicity
glm_fit <- glmnet::glmnet(X_glm, y_glm, alpha = 1) # alpha=1 is LASSO

# Choose a lambda (e.g., one near the end of the path)
chosen_lambda <- glm_fit$lambda[length(glm_fit$lambda) * 0.8]

# Get coefficients for this lambda
beta_hat <- coef(glm_fit, s = chosen_lambda)
print(paste("Number of non-zero coefficients:", sum(beta_hat != 0)))

# Extract coefficients, excluding the intercept
v_glm <- beta_hat[-1, 1, drop = FALSE] # Drop intercept, ensure it's a matrix
dim(v_glm) # Should be n_pred x 1

## ----wrap_glmnet--------------------------------------------------------------
# Define preprocessor
# Define and fit preprocessor with training data
preproc_glm <- fit(colscale(center(), type = "z"), X_glm)

# Create the projector
proj_glm <- projector(
  v = v_glm,
  preproc = preproc_glm,
  classes = "glmnet_projector"
)

print(proj_glm)

## ----project_glmnet-----------------------------------------------------------
# Generate some new test data
X_glm_test <- matrix(rnorm(20 * n_pred), 20, n_pred)

# Project the test data
# project() handles applying the centering/scaling from preproc_glm
lasso_scores <- project(proj_glm, X_glm_test)

head(round(lasso_scores, 3))
dim(lasso_scores) # Should be n_test x 1 (since v_glm has 1 column)

# Compare with direct calculation using transform
# Apply the same preprocessing used within project()
X_glm_test_processed <- transform(preproc_glm, X_glm_test)
# Calculate scores directly using the processed data and coefficients
direct_scores <- X_glm_test_processed %*% v_glm
head(round(direct_scores, 3))

# Check they are close
all.equal(c(lasso_scores), c(direct_scores))

Try the multivarious package in your browser

Any scripts or data that you put into this service are public.

multivarious documentation built on Jan. 22, 2026, 1:06 a.m.