knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5, fig.align = "center", out.width = "90%", dpi = 90, message = FALSE, warning = FALSE ) options(width = 100)
The missoNet package implements a powerful framework for multitask learning with missing responses, simultaneously estimating:
Regression coefficients ($\mathbf{B}$): Relationships between predictors and multiple responses
Conditional network ($\Theta$): Dependencies among responses after accounting for predictors
The conditional Gaussian model is: $$ \mathbf{Y} = \mathbf{1}\mu^T + \mathbf{X}\mathbf{B} + \mathbf{E}, \quad \mathbf{E} \sim \mathrm{MVN}(0, \Theta^{-1}) $$ where:
$\mathbf{Y} \in \mathbb{R}^{n \times q}$: Response matrix (may contain missing values)
$\mathbf{X} \in \mathbb{R}^{n \times p}$: Predictor matrix (complete)
$\mathbf{B} \in \mathbb{R}^{p \times q}$: Coefficient matrix
$\Theta \in \mathbb{R}^{q \times q}$: Precision matrix (inverse covariance)
$\mu \in \mathbb{R}^q$: Intercept vector
For theoretical details, see Zeng et al. (2025).
# Install from CRAN (when available) install.packages("missoNet") # Or install development version from GitHub devtools::install_github("yixiao-zeng/missoNet")
# Load the package library(missoNet)
The package includes a flexible data generator for testing:
# Generate synthetic data sim <- generateData( n = 200, # Sample size p = 50, # Number of predictors q = 10, # Number of responses rho = 0.1, # Missing rate (10%) missing.type = "MCAR" # Missing completely at random ) # Examine the data structure str(sim, max.level = 1)
# Check dimensions cat("Predictors (X):", dim(sim$X), "\n") cat("Complete responses (Y):", dim(sim$Y), "\n") cat("Observed responses (Z):", dim(sim$Z), "\n") cat("Missing rate:", sprintf("%.1f%%", mean(is.na(sim$Z)) * 100), "\n")
# Fit missoNet with automatic parameter selection fit <- missoNet( X = sim$X, Y = sim$Z, # Use observed responses with missing values GoF = "BIC" # Goodness-of-fit criterion ) # Extract optimal estimates Beta.hat <- fit$est.min$Beta Theta.hat <- fit$est.min$Theta mu.hat <- fit$est.min$mu
# Model summary cat("Selected lambda.beta:", fit$est.min$lambda.beta, "\n") cat("Selected lambda.theta:", fit$est.min$lambda.theta, "\n") cat("Active predictors:", sum(rowSums(abs(Beta.hat)) > 1e-8), "/", nrow(Beta.hat), "\n") cat("Network edges:", sum(abs(Theta.hat[upper.tri(Theta.hat)]) > 1e-8), "/", ncol(Theta.hat) * (ncol(Theta.hat)-1) / 2, "\n")
# Visualize the regularization path plot(fit, type = "heatmap")
# Visualize the regularization path in a scatter plot plot(fit, type = "scatter")
# Split data for demonstration train_idx <- 1:150 test_idx <- 151:200 # Refit on training data fit_train <- missoNet( X = sim$X[train_idx, ], Y = sim$Z[train_idx, ], GoF = "BIC", verbose = 0 # Suppress output ) # Predict on test data Y_pred <- predict(fit_train, newx = sim$X[test_idx, ]) # Evaluate predictions (using complete data for comparison) mse <- mean((Y_pred - sim$Y[test_idx, ])^2) cat("Test set MSE:", round(mse, 4), "\n")
missoNet handles three types of missing data:
# Generate data with different missing mechanisms n <- 300; p <- 30; q <- 8; rho <- 0.15 sim_mcar <- generateData(n, p, q, rho, missing.type = "MCAR") sim_mar <- generateData(n, p, q, rho, missing.type = "MAR") sim_mnar <- generateData(n, p, q, rho, missing.type = "MNAR") # Visualize missing patterns par(mfrow = c(1, 3), mar = c(4, 4, 3, 1)) # MCAR pattern image(1:q, 1:n, t(is.na(sim_mcar$Z)), col = c("white", "darkred"), xlab = "Response", ylab = "Observation", main = "MCAR: Random Pattern") # MAR pattern image(1:q, 1:n, t(is.na(sim_mar$Z)), col = c("white", "darkred"), xlab = "Response", ylab = "Observation", main = "MAR: Depends on X") # MNAR pattern image(1:q, 1:n, t(is.na(sim_mnar$Z)), col = c("white", "darkred"), xlab = "Response", ylab = "Observation", main = "MNAR: Depends on Y")
# Fit with different criteria criteria <- c("AIC", "BIC", "eBIC") results <- list() for (crit in criteria) { results[[crit]] <- missoNet( X = sim$X, Y = sim$Z, GoF = crit, verbose = 0 ) } # Compare selected models comparison <- data.frame( Criterion = criteria, Lambda.Beta = sapply(results, function(x) x$est.min$lambda.beta), Lambda.Theta = sapply(results, function(x) x$est.min$lambda.theta), Active.Predictors = sapply(results, function(x) sum(rowSums(abs(x$est.min$Beta)) > 1e-8)), Network.Edges = sapply(results, function(x) sum(abs(x$est.min$Theta[upper.tri(x$est.min$Theta)]) > 1e-8)), GoF.Value = sapply(results, function(x) x$est.min$gof) ) print(comparison, digits = 4)
# Define custom regularization paths lambda.beta <- 10^seq(0, -2, length.out = 15) lambda.theta <- 10^seq(0, -2, length.out = 15) # Fit with custom grid fit_custom <- missoNet( X = sim$X, Y = sim$Z, lambda.beta = lambda.beta, lambda.theta = lambda.theta, verbose = 0 )
# Grid coverage summary cat(" Beta range: [", sprintf("%.4f", min(fit_custom$param_set$gof.grid.beta)), ", ", sprintf("%.4f", max(fit_custom$param_set$gof.grid.beta)), "]\n", sep = "") cat(" Theta range: [", sprintf("%.4f", min(fit_custom$param_set$gof.grid.theta)), ", ", sprintf("%.4f", max(fit_custom$param_set$gof.grid.theta)), "]\n", sep = "") cat(" Total models evaluated:", length(fit_custom$param_set$gof), "\n")
# Create data with variable missing rates across responses n <- 300; p <- 30; q <- 8; rho <- 0.15 rho_vec <- seq(0.05, 0.30, length.out = q) sim_var <- generateData( n = 300, p = 30, q = 8, rho = rho_vec, # Different missing rate for each response missing.type = "MAR" ) # Examine missing patterns miss_summary <- data.frame( Response = paste0("Y", 1:q), Target = rho_vec, Actual = colMeans(is.na(sim_var$Z)) ) print(miss_summary, digits = 3) # Fit model accounting for variable missingness fit_var <- missoNet( X = sim_var$X, Y = sim_var$Z, adaptive.search = TRUE, # Fast adaptive search verbose = 0 ) # Visualize plot(fit_var)
# Use penalty factors to incorporate prior information p <- ncol(sim$X) q <- ncol(sim$Z) # Example: We know predictors 1-10 are important beta.pen.factor <- matrix(1, p, q) beta.pen.factor[1:10, ] <- 0.1 # Lighter penalty for known important predictors # Example: We expect certain response pairs to be connected theta.pen.factor <- matrix(1, q, q) theta.pen.factor[1, 2] <- theta.pen.factor[2, 1] <- 0.1 theta.pen.factor[3, 4] <- theta.pen.factor[4, 3] <- 0.1 # Fit with prior information fit_prior <- missoNet( X = sim$X, Y = sim$Z, beta.pen.factor = beta.pen.factor, theta.pen.factor = theta.pen.factor )
# Standardization is recommended (default: TRUE) # for numerical stability and comparable penalties fit_std <- missoNet(X = sim$X, Y = sim$Z, standardize = TRUE, standardize.response = TRUE) # Without standardization (for pre-scaled data) fit_no_std <- missoNet(X = scale(sim$X), Y = scale(sim$Z), standardize = FALSE, standardize.response = FALSE)
# Adjust convergence settings based on problem difficulty and time constraints fit_tight <- missoNet( X = sim$X, Y = sim$Z, beta.tol = 1e-6, # Tighter tolerance theta.tol = 1e-6, beta.max.iter = 10000, # More iterations allowed theta.max.iter = 10000 ) # For quick exploration, use looser settings fit_quick <- missoNet( X = sim$X, Y = sim$Z, beta.tol = 1e-3, # Looser tolerance theta.tol = 1e-3, beta.max.iter = 1000, # Fewer iterations theta.max.iter = 1000, adaptive.search = TRUE # Fast adaptive search )
| Key features of missoNet: | ✓ Handles missing responses naturally through unbiased estimating equations | ✓ Joint estimation of regression and network structures | ✓ Flexible regularization with separate penalties for coefficients and network | ✓ Multiple selection criteria (AIC/BIC/eBIC, cross-validation) | ✓ Efficient algorithms with warm starts and adaptive search | ✓ Comprehensive visualization tools | For advanced features including cross-validation, see the companion vignettes.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.