Nothing
params <-
list(family = "red")
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>", fig.width = 6, fig.height = 4)
library(multivarious)
library(ggplot2)
## ----motivation_example-------------------------------------------------------
set.seed(123)
n_samples <- 100
n_features <- 50
# Create background data (e.g., healthy controls)
# Main variation is in features 1-10
background <- matrix(rnorm(n_samples * n_features), n_samples, n_features)
background[, 1:10] <- background[, 1:10] * 3 # Strong common variation
# Create foreground data (e.g., patients)
# Has the same common variation PLUS disease-specific signal in features 20-25
foreground <- background[1:60, ] # Start with same structure
foreground[, 20:25] <- foreground[, 20:25] + matrix(rnorm(60 * 6, sd = 2), 60, 6)
# Standard PCA on combined data
all_data <- rbind(background, foreground)
regular_pca <- pca(all_data, ncomp = 2)
# Contrastive PCA
cpca_result <- cPCAplus(X_f = foreground, X_b = background, ncomp = 2)
# Compare what each method finds
loadings_df <- rbind(
data.frame(
feature = factor(1:30),
value = abs(regular_pca$v[1:30, 1]),
method = "Standard PCA"
),
data.frame(
feature = factor(1:30),
value = abs(cpca_result$v[1:30, 1]),
method = "Contrastive PCA"
)
)
ggplot(loadings_df, aes(x = feature, y = value)) +
geom_col(fill = "#1f78b4") +
facet_wrap(~method, nrow = 1) +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(
x = "Feature",
y = "|Loading|",
title = "Top loading coefficients for PC1"
)
## ----basic_usage--------------------------------------------------------------
# Basic usage
cpca_fit <- cPCAplus(
X_f = foreground, # Your group of interest (foreground)
X_b = background, # Your reference group (background)
ncomp = 5 # Number of components to extract
)
# The result is a bi_projector object with familiar methods
print(cpca_fit)
# Project new data
new_samples <- matrix(rnorm(10 * n_features), 10, n_features)
new_scores <- project(cpca_fit, new_samples)
# Reconstruct using top components
reconstructed <- reconstruct(cpca_fit, comp = 1:2)
## ----understanding_output-----------------------------------------------------
# Which features contribute most to the first contrastive component?
top_features <- order(abs(cpca_fit$v[, 1]), decreasing = TRUE)[1:10]
print(paste("Top contributing features:", paste(top_features, collapse = ", ")))
# How much more variable is each component in foreground vs background?
print(paste("Variance ratios:", paste(round(cpca_fit$values[1:3], 2), collapse = ", ")))
## ----biomedical_example, eval=FALSE-------------------------------------------
# # Identify disease-specific patterns
# tumor_cpca <- cPCAplus(
# X_f = tumor_samples,
# X_b = healthy_tissue,
# ncomp = 10
# )
## ----technical_example, eval=FALSE--------------------------------------------
# # Use technical replicates as background to find biological signal
# bio_cpca <- cPCAplus(
# X_f = biological_samples,
# X_b = technical_replicates,
# ncomp = 5
# )
## ----time_example, eval=FALSE-------------------------------------------------
# # Find patterns specific to treatment timepoint
# treatment_cpca <- cPCAplus(
# X_f = after_treatment,
# X_b = before_treatment,
# ncomp = 5
# )
## ----high_dim-----------------------------------------------------------------
# Create high-dimensional example
n_f <- 50; n_b <- 80; p <- 1000
X_background_hd <- matrix(rnorm(n_b * p), n_b, p)
X_foreground_hd <- X_background_hd[1:n_f, ] +
matrix(c(rnorm(n_f * 20, sd = 2), rep(0, n_f * (p-20))), n_f, p)
# Use sample-space strategy for efficiency
cpca_hd <- cPCAplus(X_f = X_foreground_hd, X_b = X_background_hd,
ncomp = 5, strategy = "sample")
## ----regularization-----------------------------------------------------------
# Small background sample size can lead to instability
small_background <- matrix(rnorm(20 * 100), 20, 100)
small_foreground <- matrix(rnorm(30 * 100), 30, 100)
# Add regularization
cpca_regularized <- cPCAplus(
X_f = small_foreground,
X_b = small_background,
ncomp = 5,
lambda = 0.1 # Regularization parameter for background covariance
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.