inst/doc/missoNet-introduction.R

## ----setup, include = FALSE-----------------------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 5,
  fig.align = "center",
  out.width = "90%",
  dpi = 90,
  message = FALSE,
  warning = FALSE
)
options(width = 100)

## ----eval = FALSE---------------------------------------------------------------------------------
# # Install from CRAN (when available)
# install.packages("missoNet")
# 
# # Or install development version from GitHub
# devtools::install_github("yixiao-zeng/missoNet")

## -------------------------------------------------------------------------------------------------
# Load the package
library(missoNet)

## -------------------------------------------------------------------------------------------------
# Generate synthetic data
sim <- generateData(
  n = 200,                # Sample size
  p = 50,                 # Number of predictors
  q = 10,                 # Number of responses
  rho = 0.1,              # Missing rate (10%)
  missing.type = "MCAR"   # Missing completely at random
)

# Examine the data structure
str(sim, max.level = 1)

## ----echo = TRUE, include = TRUE------------------------------------------------------------------
# Check dimensions
cat("Predictors (X):", dim(sim$X), "\n")
cat("Complete responses (Y):", dim(sim$Y), "\n")
cat("Observed responses (Z):", dim(sim$Z), "\n")
cat("Missing rate:", sprintf("%.1f%%", mean(is.na(sim$Z)) * 100), "\n")

## -------------------------------------------------------------------------------------------------
# Fit missoNet with automatic parameter selection
fit <- missoNet(
  X = sim$X,
  Y = sim$Z,  # Use observed responses with missing values
  GoF = "BIC" # Goodness-of-fit criterion
)

# Extract optimal estimates
Beta.hat <- fit$est.min$Beta
Theta.hat <- fit$est.min$Theta
mu.hat <- fit$est.min$mu

## ----echo = TRUE, include = TRUE------------------------------------------------------------------
# Model summary
cat("Selected lambda.beta:", fit$est.min$lambda.beta, "\n")
cat("Selected lambda.theta:", fit$est.min$lambda.theta, "\n")
cat("Active predictors:", sum(rowSums(abs(Beta.hat)) > 1e-8), "/", nrow(Beta.hat), "\n")
cat("Network edges:", sum(abs(Theta.hat[upper.tri(Theta.hat)]) > 1e-8), 
    "/", ncol(Theta.hat) * (ncol(Theta.hat)-1) / 2, "\n")

## -------------------------------------------------------------------------------------------------
# Visualize the regularization path
plot(fit, type = "heatmap")

## -------------------------------------------------------------------------------------------------
# Visualize the regularization path in a scatter plot
plot(fit, type = "scatter")

## -------------------------------------------------------------------------------------------------
# Split data for demonstration
train_idx <- 1:150
test_idx <- 151:200

# Refit on training data
fit_train <- missoNet(
  X = sim$X[train_idx, ],
  Y = sim$Z[train_idx, ],
  GoF = "BIC",
  verbose = 0  # Suppress output
)

# Predict on test data
Y_pred <- predict(fit_train, newx = sim$X[test_idx, ])

# Evaluate predictions (using complete data for comparison)
mse <- mean((Y_pred - sim$Y[test_idx, ])^2)
cat("Test set MSE:", round(mse, 4), "\n")

## -------------------------------------------------------------------------------------------------
# Generate data with different missing mechanisms
n <- 300; p <- 30; q <- 8; rho <- 0.15

sim_mcar <- generateData(n, p, q, rho, missing.type = "MCAR")
sim_mar <- generateData(n, p, q, rho, missing.type = "MAR")
sim_mnar <- generateData(n, p, q, rho, missing.type = "MNAR")

# Visualize missing patterns
par(mfrow = c(1, 3), mar = c(4, 4, 3, 1))

# MCAR pattern
image(1:q, 1:n, t(is.na(sim_mcar$Z)), 
      col = c("white", "darkred"),
      xlab = "Response", ylab = "Observation",
      main = "MCAR: Random Pattern")

# MAR pattern
image(1:q, 1:n, t(is.na(sim_mar$Z)), 
      col = c("white", "darkred"),
      xlab = "Response", ylab = "Observation",
      main = "MAR: Depends on X")

# MNAR pattern
image(1:q, 1:n, t(is.na(sim_mnar$Z)), 
      col = c("white", "darkred"),
      xlab = "Response", ylab = "Observation",
      main = "MNAR: Depends on Y")

## -------------------------------------------------------------------------------------------------
# Fit with different criteria
criteria <- c("AIC", "BIC", "eBIC")
results <- list()

for (crit in criteria) {
  results[[crit]] <- missoNet(
    X = sim$X,
    Y = sim$Z,
    GoF = crit,
    verbose = 0
  )
}

# Compare selected models
comparison <- data.frame(
  Criterion = criteria,
  Lambda.Beta = sapply(results, function(x) x$est.min$lambda.beta),
  Lambda.Theta = sapply(results, function(x) x$est.min$lambda.theta),
  Active.Predictors = sapply(results, function(x) 
    sum(rowSums(abs(x$est.min$Beta)) > 1e-8)),
  Network.Edges = sapply(results, function(x) 
    sum(abs(x$est.min$Theta[upper.tri(x$est.min$Theta)]) > 1e-8)),
  GoF.Value = sapply(results, function(x) x$est.min$gof)
)

print(comparison, digits = 4)

## -------------------------------------------------------------------------------------------------
# Define custom regularization paths
lambda.beta <- 10^seq(0, -2, length.out = 15)
lambda.theta <- 10^seq(0, -2, length.out = 15)

# Fit with custom grid
fit_custom <- missoNet(
  X = sim$X,
  Y = sim$Z,
  lambda.beta = lambda.beta,
  lambda.theta = lambda.theta,
  verbose = 0
)

## ----echo = TRUE, include = TRUE------------------------------------------------------------------
# Grid coverage summary
cat("  Beta range: [", 
    sprintf("%.4f", min(fit_custom$param_set$gof.grid.beta)), ", ",
    sprintf("%.4f", max(fit_custom$param_set$gof.grid.beta)), "]\n", sep = "")
cat("  Theta range: [", 
    sprintf("%.4f", min(fit_custom$param_set$gof.grid.theta)), ", ",
    sprintf("%.4f", max(fit_custom$param_set$gof.grid.theta)), "]\n", sep = "")
cat("  Total models evaluated:", length(fit_custom$param_set$gof), "\n")

## -------------------------------------------------------------------------------------------------
# Create data with variable missing rates across responses
n <- 300; p <- 30; q <- 8; rho <- 0.15
rho_vec <- seq(0.05, 0.30, length.out = q)

sim_var <- generateData(
  n = 300,
  p = 30,
  q = 8,
  rho = rho_vec,  # Different missing rate for each response
  missing.type = "MAR"
)

# Examine missing patterns
miss_summary <- data.frame(
  Response = paste0("Y", 1:q),
  Target = rho_vec,
  Actual = colMeans(is.na(sim_var$Z))
)

print(miss_summary, digits = 3)

# Fit model accounting for variable missingness
fit_var <- missoNet(
  X = sim_var$X,
  Y = sim_var$Z,
  adaptive.search = TRUE, # Fast adaptive search
  verbose = 0
)

# Visualize
plot(fit_var)

## ----eval = FALSE---------------------------------------------------------------------------------
# # Use penalty factors to incorporate prior information
# p <- ncol(sim$X)
# q <- ncol(sim$Z)
# 
# # Example: We know predictors 1-10 are important
# beta.pen.factor <- matrix(1, p, q)
# beta.pen.factor[1:10, ] <- 0.1  # Lighter penalty for known important predictors
# 
# # Example: We expect certain response pairs to be connected
# theta.pen.factor <- matrix(1, q, q)
# theta.pen.factor[1, 2] <- theta.pen.factor[2, 1] <- 0.1
# theta.pen.factor[3, 4] <- theta.pen.factor[4, 3] <- 0.1
# 
# # Fit with prior information
# fit_prior <- missoNet(
#   X = sim$X,
#   Y = sim$Z,
#   beta.pen.factor = beta.pen.factor,
#   theta.pen.factor = theta.pen.factor
# )

## ----eval = FALSE---------------------------------------------------------------------------------
# # Standardization is recommended (default: TRUE)
# # for numerical stability and comparable penalties
# fit_std <- missoNet(X = sim$X, Y = sim$Z,
#                     standardize = TRUE,
#                     standardize.response = TRUE)
# 
# # Without standardization (for pre-scaled data)
# fit_no_std <- missoNet(X = scale(sim$X), Y = scale(sim$Z),
#                        standardize = FALSE,
#                        standardize.response = FALSE)
# 

## ----eval = FALSE---------------------------------------------------------------------------------
# # Adjust convergence settings based on problem difficulty and time constraints
# fit_tight <- missoNet(
#   X = sim$X,
#   Y = sim$Z,
#   beta.tol = 1e-6,      # Tighter tolerance
#   theta.tol = 1e-6,
#   beta.max.iter = 10000,  # More iterations allowed
#   theta.max.iter = 10000
# )
# 
# # For quick exploration, use looser settings
# fit_quick <- missoNet(
#   X = sim$X,
#   Y = sim$Z,
#   beta.tol = 1e-3,      # Looser tolerance
#   theta.tol = 1e-3,
#   beta.max.iter = 1000,   # Fewer iterations
#   theta.max.iter = 1000,
#   adaptive.search = TRUE # Fast adaptive search
# )

Try the missoNet package in your browser

Any scripts or data that you put into this service are public.

missoNet documentation built on Sept. 9, 2025, 5:55 p.m.