Getting Started with missoNet

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 5,
  fig.align = "center",
  out.width = "90%",
  dpi = 90,
  message = FALSE,
  warning = FALSE
)
options(width = 100)

Introduction

The missoNet package implements a powerful framework for multitask learning with missing responses, simultaneously estimating:

The Model

The conditional Gaussian model is: $$ \mathbf{Y} = \mathbf{1}\mu^T + \mathbf{X}\mathbf{B} + \mathbf{E}, \quad \mathbf{E} \sim \mathrm{MVN}(0, \Theta^{-1}) $$ where:

For theoretical details, see Zeng et al. (2025).

Installation

# Install from CRAN (when available)
install.packages("missoNet")

# Or install development version from GitHub
devtools::install_github("yixiao-zeng/missoNet")
# Load the package
library(missoNet)

Quick Start

Generate Example Data

The package includes a flexible data generator for testing:

# Generate synthetic data
sim <- generateData(
  n = 200,                # Sample size
  p = 50,                 # Number of predictors
  q = 10,                 # Number of responses
  rho = 0.1,              # Missing rate (10%)
  missing.type = "MCAR"   # Missing completely at random
)

# Examine the data structure
str(sim, max.level = 1)
# Check dimensions
cat("Predictors (X):", dim(sim$X), "\n")
cat("Complete responses (Y):", dim(sim$Y), "\n")
cat("Observed responses (Z):", dim(sim$Z), "\n")
cat("Missing rate:", sprintf("%.1f%%", mean(is.na(sim$Z)) * 100), "\n")

Basic Model Fitting

# Fit missoNet with automatic parameter selection
fit <- missoNet(
  X = sim$X,
  Y = sim$Z,  # Use observed responses with missing values
  GoF = "BIC" # Goodness-of-fit criterion
)

# Extract optimal estimates
Beta.hat <- fit$est.min$Beta
Theta.hat <- fit$est.min$Theta
mu.hat <- fit$est.min$mu
# Model summary
cat("Selected lambda.beta:", fit$est.min$lambda.beta, "\n")
cat("Selected lambda.theta:", fit$est.min$lambda.theta, "\n")
cat("Active predictors:", sum(rowSums(abs(Beta.hat)) > 1e-8), "/", nrow(Beta.hat), "\n")
cat("Network edges:", sum(abs(Theta.hat[upper.tri(Theta.hat)]) > 1e-8), 
    "/", ncol(Theta.hat) * (ncol(Theta.hat)-1) / 2, "\n")

Visualization

# Visualize the regularization path
plot(fit, type = "heatmap")
# Visualize the regularization path in a scatter plot
plot(fit, type = "scatter")

Making Predictions

# Split data for demonstration
train_idx <- 1:150
test_idx <- 151:200

# Refit on training data
fit_train <- missoNet(
  X = sim$X[train_idx, ],
  Y = sim$Z[train_idx, ],
  GoF = "BIC",
  verbose = 0  # Suppress output
)

# Predict on test data
Y_pred <- predict(fit_train, newx = sim$X[test_idx, ])

# Evaluate predictions (using complete data for comparison)
mse <- mean((Y_pred - sim$Y[test_idx, ])^2)
cat("Test set MSE:", round(mse, 4), "\n")

Understanding Missing Data Mechanisms

missoNet handles three types of missing data:

# Generate data with different missing mechanisms
n <- 300; p <- 30; q <- 8; rho <- 0.15

sim_mcar <- generateData(n, p, q, rho, missing.type = "MCAR")
sim_mar <- generateData(n, p, q, rho, missing.type = "MAR")
sim_mnar <- generateData(n, p, q, rho, missing.type = "MNAR")

# Visualize missing patterns
par(mfrow = c(1, 3), mar = c(4, 4, 3, 1))

# MCAR pattern
image(1:q, 1:n, t(is.na(sim_mcar$Z)), 
      col = c("white", "darkred"),
      xlab = "Response", ylab = "Observation",
      main = "MCAR: Random Pattern")

# MAR pattern
image(1:q, 1:n, t(is.na(sim_mar$Z)), 
      col = c("white", "darkred"),
      xlab = "Response", ylab = "Observation",
      main = "MAR: Depends on X")

# MNAR pattern
image(1:q, 1:n, t(is.na(sim_mnar$Z)), 
      col = c("white", "darkred"),
      xlab = "Response", ylab = "Observation",
      main = "MNAR: Depends on Y")

Model Selection Strategies

Comparing Selection Criteria

# Fit with different criteria
criteria <- c("AIC", "BIC", "eBIC")
results <- list()

for (crit in criteria) {
  results[[crit]] <- missoNet(
    X = sim$X,
    Y = sim$Z,
    GoF = crit,
    verbose = 0
  )
}

# Compare selected models
comparison <- data.frame(
  Criterion = criteria,
  Lambda.Beta = sapply(results, function(x) x$est.min$lambda.beta),
  Lambda.Theta = sapply(results, function(x) x$est.min$lambda.theta),
  Active.Predictors = sapply(results, function(x) 
    sum(rowSums(abs(x$est.min$Beta)) > 1e-8)),
  Network.Edges = sapply(results, function(x) 
    sum(abs(x$est.min$Theta[upper.tri(x$est.min$Theta)]) > 1e-8)),
  GoF.Value = sapply(results, function(x) x$est.min$gof)
)

print(comparison, digits = 4)

Custom Lambda Grids

# Define custom regularization paths
lambda.beta <- 10^seq(0, -2, length.out = 15)
lambda.theta <- 10^seq(0, -2, length.out = 15)

# Fit with custom grid
fit_custom <- missoNet(
  X = sim$X,
  Y = sim$Z,
  lambda.beta = lambda.beta,
  lambda.theta = lambda.theta,
  verbose = 0
)
# Grid coverage summary
cat("  Beta range: [", 
    sprintf("%.4f", min(fit_custom$param_set$gof.grid.beta)), ", ",
    sprintf("%.4f", max(fit_custom$param_set$gof.grid.beta)), "]\n", sep = "")
cat("  Theta range: [", 
    sprintf("%.4f", min(fit_custom$param_set$gof.grid.theta)), ", ",
    sprintf("%.4f", max(fit_custom$param_set$gof.grid.theta)), "]\n", sep = "")
cat("  Total models evaluated:", length(fit_custom$param_set$gof), "\n")

Working with Real Data Patterns

Handling Variable Missing Rates

# Create data with variable missing rates across responses
n <- 300; p <- 30; q <- 8; rho <- 0.15
rho_vec <- seq(0.05, 0.30, length.out = q)

sim_var <- generateData(
  n = 300,
  p = 30,
  q = 8,
  rho = rho_vec,  # Different missing rate for each response
  missing.type = "MAR"
)

# Examine missing patterns
miss_summary <- data.frame(
  Response = paste0("Y", 1:q),
  Target = rho_vec,
  Actual = colMeans(is.na(sim_var$Z))
)

print(miss_summary, digits = 3)

# Fit model accounting for variable missingness
fit_var <- missoNet(
  X = sim_var$X,
  Y = sim_var$Z,
  adaptive.search = TRUE, # Fast adaptive search
  verbose = 0
)

# Visualize
plot(fit_var)

Incorporating Prior Knowledge

# Use penalty factors to incorporate prior information
p <- ncol(sim$X)
q <- ncol(sim$Z)

# Example: We know predictors 1-10 are important
beta.pen.factor <- matrix(1, p, q)
beta.pen.factor[1:10, ] <- 0.1  # Lighter penalty for known important predictors

# Example: We expect certain response pairs to be connected
theta.pen.factor <- matrix(1, q, q)
theta.pen.factor[1, 2] <- theta.pen.factor[2, 1] <- 0.1
theta.pen.factor[3, 4] <- theta.pen.factor[4, 3] <- 0.1

# Fit with prior information
fit_prior <- missoNet(
  X = sim$X,
  Y = sim$Z,
  beta.pen.factor = beta.pen.factor,
  theta.pen.factor = theta.pen.factor
)

Tips for Optimal Performance

Standardization

# Standardization is recommended (default: TRUE)
# for numerical stability and comparable penalties
fit_std <- missoNet(X = sim$X, Y = sim$Z, 
                    standardize = TRUE, 
                    standardize.response = TRUE)

# Without standardization (for pre-scaled data)
fit_no_std <- missoNet(X = scale(sim$X), Y = scale(sim$Z), 
                       standardize = FALSE,
                       standardize.response = FALSE)

Convergence Settings

# Adjust convergence settings based on problem difficulty and time constraints
fit_tight <- missoNet(
  X = sim$X,
  Y = sim$Z,
  beta.tol = 1e-6,      # Tighter tolerance
  theta.tol = 1e-6,
  beta.max.iter = 10000,  # More iterations allowed
  theta.max.iter = 10000
)

# For quick exploration, use looser settings
fit_quick <- missoNet(
  X = sim$X,
  Y = sim$Z,
  beta.tol = 1e-3,      # Looser tolerance
  theta.tol = 1e-3,
  beta.max.iter = 1000,   # Fewer iterations
  theta.max.iter = 1000,
  adaptive.search = TRUE # Fast adaptive search
)

Summary

| Key features of missoNet: | ✓ Handles missing responses naturally through unbiased estimating equations | ✓ Joint estimation of regression and network structures | ✓ Flexible regularization with separate penalties for coefficients and network | ✓ Multiple selection criteria (AIC/BIC/eBIC, cross-validation) | ✓ Efficient algorithms with warm starts and adaptive search | ✓ Comprehensive visualization tools | For advanced features including cross-validation, see the companion vignettes.



Try the missoNet package in your browser

Any scripts or data that you put into this service are public.

missoNet documentation built on Sept. 9, 2025, 5:55 p.m.