Nothing
params <-
list(family = "red")
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.width = 7,
fig.height = 5
)
# Load necessary packages for the examples
# Ensure 'multiblock' package itself is loaded for testing vignettes,
# e.g., via devtools::load_all() or library(multiblock)
library(tibble)
library(dplyr)
library(stats) # For cancor
library(glmnet) # For glmnet example
library(multivarious)
# Helper k-fold function (replace with package internal if available)
kfold_split <- function(n, k = 5) {
idx <- sample(rep(1:k, length.out = n))
lapply(1:k, function(j) list(train = which(idx != j),
test = which(idx == j)))
}
## ----data_cca-----------------------------------------------------------------
data(iris)
X <- as.matrix(iris[, 1:2])
Y <- as.matrix(iris[, 3:4])
# Show first few rows of combined data
head(cbind(X, Y))
## ----fit_cca_wrapper----------------------------------------------------------
fit_cca <- function(Xtrain, Ytrain, ncomp = 2, ...) {
# Step 1: Define and fit preprocessors with training data
# Use center()+z-score for both blocks (swap to center() only if preferred)
preproc_x <- fit(colscale(center(), type = "z"), Xtrain)
preproc_y <- fit(colscale(center(), type = "z"), Ytrain)
# Step 2: Transform training data with the fitted preprocessors
Xp <- transform(preproc_x, Xtrain)
Yp <- transform(preproc_y, Ytrain)
# Step 3: Run CCA on the preprocessed data (no additional centering here)
cc <- stats::cancor(Xp, Yp, xcenter = FALSE, ycenter = FALSE)
# Step 4: Store loadings and preprocessors in a cross_projector
cp <- cross_projector(
vx = cc$xcoef[, 1:ncomp, drop = FALSE],
vy = cc$ycoef[, 1:ncomp, drop = FALSE],
preproc_x = preproc_x,
preproc_y = preproc_y,
classes = "cca_cross_projector"
)
attr(cp, "can_cor") <- cc$cor[1:ncomp]
cp
}
# Fit the model
cp_cca <- fit_cca(X, Y)
print(cp_cca)
attr(cp_cca, "can_cor") # Show canonical correlations
## ----transfer_cca-------------------------------------------------------------
Y_hat <- transfer(cp_cca, X, from = "X", to = "Y")
head(round(Y_hat, 2))
measure_reconstruction_error(Y, Y_hat, metrics = c("rmse", "r2"))
## ----partial_project_cca------------------------------------------------------
new_x_partial <- X[, 1, drop = FALSE]
scores_partial <- partial_project(cp_cca, new_x_partial,
colind = 1,
source = "X")
head(round(scores_partial, 3))
## ----cv_cca, eval=FALSE-------------------------------------------------------
# set.seed(1)
# folds <- kfold_split(nrow(X), k = 5)
#
# cv_fit <- function(train_data, ncomp) {
# fit_cca(train_data$X, train_data$Y, ncomp = ncomp)
# }
#
# cv_measure <- function(model, test_data) {
# measure_interblock_transfer_error(
# test_data$X, test_data$Y, model,
# metrics = c("x2y.rmse", "y2x.rmse")
# )
# }
#
# cv_res <- cv_generic(
# data = list(X = X, Y = Y),
# folds = folds,
# .fit_fun = cv_fit,
# .measure_fun = cv_measure,
# fit_args = list(ncomp = 2)
# )
## ----perm_test_cca, eval=FALSE------------------------------------------------
# perm_res <- perm_test(
# cp_cca, X, Y = Y,
# nperm = 199,
# alternative = "less"
# )
# print(perm_res)
## ----setup_glmnet-------------------------------------------------------------
# Generate sample data
set.seed(123)
n_obs <- 100
n_pred <- 50
X_glm <- matrix(rnorm(n_obs * n_pred), n_obs, n_pred)
# True coefficients (sparse)
true_beta <- matrix(0, n_pred, 1)
true_beta[1:10, 1] <- runif(10, -1, 1)
# Response variable with noise
y_glm <- X_glm %*% true_beta + rnorm(n_obs, sd = 0.5)
# Fit glmnet (LASSO, alpha=1)
# Typically use cv.glmnet to find lambda, but using a fixed one for simplicity
glm_fit <- glmnet::glmnet(X_glm, y_glm, alpha = 1) # alpha=1 is LASSO
# Choose a lambda (e.g., one near the end of the path)
chosen_lambda <- glm_fit$lambda[length(glm_fit$lambda) * 0.8]
# Get coefficients for this lambda
beta_hat <- coef(glm_fit, s = chosen_lambda)
print(paste("Number of non-zero coefficients:", sum(beta_hat != 0)))
# Extract coefficients, excluding the intercept
v_glm <- beta_hat[-1, 1, drop = FALSE] # Drop intercept, ensure it's a matrix
dim(v_glm) # Should be n_pred x 1
## ----wrap_glmnet--------------------------------------------------------------
# Define preprocessor
# Define and fit preprocessor with training data
preproc_glm <- fit(colscale(center(), type = "z"), X_glm)
# Create the projector
proj_glm <- projector(
v = v_glm,
preproc = preproc_glm,
classes = "glmnet_projector"
)
print(proj_glm)
## ----project_glmnet-----------------------------------------------------------
# Generate some new test data
X_glm_test <- matrix(rnorm(20 * n_pred), 20, n_pred)
# Project the test data
# project() handles applying the centering/scaling from preproc_glm
lasso_scores <- project(proj_glm, X_glm_test)
head(round(lasso_scores, 3))
dim(lasso_scores) # Should be n_test x 1 (since v_glm has 1 column)
# Compare with direct calculation using transform
# Apply the same preprocessing used within project()
X_glm_test_processed <- transform(preproc_glm, X_glm_test)
# Calculate scores directly using the processed data and coefficients
direct_scores <- X_glm_test_processed %*% v_glm
head(round(direct_scores, 3))
# Check they are close
all.equal(c(lasso_scores), c(direct_scores))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.