R/helpers.R

Defines functions suggest_missoNet_params robust_scale robust_sd robust_mean floor_prob

# Probability floor to avoid division by 0
floor_prob <- function(v, floor_val = 1e-6) {
  pmax(v, floor_val)
}

# Robust mean
robust_mean <- function(x, na.rm = TRUE) {
  if (all(is.na(x))) return(0)
  # Use trimmed mean for robustness
  mean(x, trim = 0.05, na.rm = na.rm)
}

# Robust SD
robust_sd <- function(x, na.rm = TRUE) {
  if (all(is.na(x))) return(1)
  x <- x[!is.na(x)]
  if (length(x) < 2) return(1)
  
  # Use MAD for robustness
  mad_val <- mad(x, constant = 1.4826)
  if (mad_val > .Machine$double.eps * 100) {
    return(mad_val)
  }
  
  # Fallback to IQR-based estimate
  iqr_val <- IQR(x) / 1.349
  if (iqr_val > .Machine$double.eps * 100) {
    return(iqr_val)
  }
  
  # Last resort: standard SD
  sd_val <- sd(x)
  return(max(sd_val, .Machine$double.eps * 100))
}

# Robust scaling
robust_scale <- function(M, center, scale) {
  if (!is.matrix(M)) M <- as.matrix(M)
  stopifnot(length(center) == ncol(M), length(scale) == ncol(M))
  # Ensure scale is never zero or invalid
  scale[!is.finite(scale) | abs(scale) <= .Machine$double.eps * 100] <- 1
  M <- sweep(M, 2, center, FUN = "-", check.margin = FALSE)
  M <- sweep(M, 2, scale, FUN = "/", check.margin = FALSE)
  return(M)
}

suggest_missoNet_params <- function(X, Y) {
  n <- nrow(X); p <- ncol(X); q <- ncol(Y)
  
  # Calculate data characteristics
  miss_rate <- mean(colMeans(is.na(Y)))
  n_eff <- n * (1 - miss_rate)
  
  # Condition number proxy (using sample)
  sample_size <- min(100, n)
  sample_idx <- sample(n, sample_size)
  cond_proxy <- kappa(X[sample_idx, ], exact = FALSE)
  
  suggestions <- list()
  
  # Standardization
  suggestions$standardize <- TRUE
  suggestions$standardize.response <- TRUE
  
  # Penalize diagonal
  suggestions$penalize.diagonal <- (n_eff < max(p, q) * 2) || (cond_proxy > 100)
  
  # Fast mode
  suggestions$fast <- (p * q > 1000)
  
  # Grid sizes
  if (p * q > 10000) {
    suggestions$n.lamB <- 30
    suggestions$n.lamTh <- 30
  } else if (p * q > 1000) {
    suggestions$n.lamB <- 40
    suggestions$n.lamTh <- 40
  } else {
    suggestions$n.lamB <- 50
    suggestions$n.lamTh <- 50
  }
  
  # Convergence parameters
  if (p * q > 10000) {
    suggestions$Beta.thr <- 1e-3
    suggestions$Theta.thr <- 1e-3
  } else {
    suggestions$Beta.thr <- 1e-4
    suggestions$Theta.thr <- 1e-4
  }
  
  # GoF criterion
  if (n_eff > p * 10 && n_eff > q * 10) {
    suggestions$GoF <- "AIC"  # Less penalty when lots of data
  } else if (n_eff < p || n_eff < q) {
    suggestions$GoF <- "eBIC"  # More penalty when limited data
  } else {
    suggestions$GoF <- "BIC"  # Standard choice
  }
  
  # Print suggestions
  cat("Suggested parameters based on data characteristics:\n")
  cat(sprintf("  Data: n=%d, p=%d, q=%d, miss=%.1f%%\n", n, p, q, miss_rate * 100))
  cat(sprintf("  Effective sample size: %.1f\n", n_eff))
  cat("\nRecommendations:\n")
  for (param in names(suggestions)) {
    cat(sprintf("  %s = %s\n", param, 
                ifelse(is.logical(suggestions[[param]]), 
                       ifelse(suggestions[[param]], "TRUE", "FALSE"),
                       as.character(suggestions[[param]]))))
  }
  
  return(suggestions)
}

Try the missoNet package in your browser

Any scripts or data that you put into this service are public.

missoNet documentation built on Sept. 9, 2025, 5:55 p.m.