Nothing
# Probability floor to avoid division by 0
floor_prob <- function(v, floor_val = 1e-6) {
pmax(v, floor_val)
}
# Robust mean
robust_mean <- function(x, na.rm = TRUE) {
if (all(is.na(x))) return(0)
# Use trimmed mean for robustness
mean(x, trim = 0.05, na.rm = na.rm)
}
# Robust SD
robust_sd <- function(x, na.rm = TRUE) {
if (all(is.na(x))) return(1)
x <- x[!is.na(x)]
if (length(x) < 2) return(1)
# Use MAD for robustness
mad_val <- mad(x, constant = 1.4826)
if (mad_val > .Machine$double.eps * 100) {
return(mad_val)
}
# Fallback to IQR-based estimate
iqr_val <- IQR(x) / 1.349
if (iqr_val > .Machine$double.eps * 100) {
return(iqr_val)
}
# Last resort: standard SD
sd_val <- sd(x)
return(max(sd_val, .Machine$double.eps * 100))
}
# Robust scaling
robust_scale <- function(M, center, scale) {
if (!is.matrix(M)) M <- as.matrix(M)
stopifnot(length(center) == ncol(M), length(scale) == ncol(M))
# Ensure scale is never zero or invalid
scale[!is.finite(scale) | abs(scale) <= .Machine$double.eps * 100] <- 1
M <- sweep(M, 2, center, FUN = "-", check.margin = FALSE)
M <- sweep(M, 2, scale, FUN = "/", check.margin = FALSE)
return(M)
}
suggest_missoNet_params <- function(X, Y) {
n <- nrow(X); p <- ncol(X); q <- ncol(Y)
# Calculate data characteristics
miss_rate <- mean(colMeans(is.na(Y)))
n_eff <- n * (1 - miss_rate)
# Condition number proxy (using sample)
sample_size <- min(100, n)
sample_idx <- sample(n, sample_size)
cond_proxy <- kappa(X[sample_idx, ], exact = FALSE)
suggestions <- list()
# Standardization
suggestions$standardize <- TRUE
suggestions$standardize.response <- TRUE
# Penalize diagonal
suggestions$penalize.diagonal <- (n_eff < max(p, q) * 2) || (cond_proxy > 100)
# Fast mode
suggestions$fast <- (p * q > 1000)
# Grid sizes
if (p * q > 10000) {
suggestions$n.lamB <- 30
suggestions$n.lamTh <- 30
} else if (p * q > 1000) {
suggestions$n.lamB <- 40
suggestions$n.lamTh <- 40
} else {
suggestions$n.lamB <- 50
suggestions$n.lamTh <- 50
}
# Convergence parameters
if (p * q > 10000) {
suggestions$Beta.thr <- 1e-3
suggestions$Theta.thr <- 1e-3
} else {
suggestions$Beta.thr <- 1e-4
suggestions$Theta.thr <- 1e-4
}
# GoF criterion
if (n_eff > p * 10 && n_eff > q * 10) {
suggestions$GoF <- "AIC" # Less penalty when lots of data
} else if (n_eff < p || n_eff < q) {
suggestions$GoF <- "eBIC" # More penalty when limited data
} else {
suggestions$GoF <- "BIC" # Standard choice
}
# Print suggestions
cat("Suggested parameters based on data characteristics:\n")
cat(sprintf(" Data: n=%d, p=%d, q=%d, miss=%.1f%%\n", n, p, q, miss_rate * 100))
cat(sprintf(" Effective sample size: %.1f\n", n_eff))
cat("\nRecommendations:\n")
for (param in names(suggestions)) {
cat(sprintf(" %s = %s\n", param,
ifelse(is.logical(suggestions[[param]]),
ifelse(suggestions[[param]], "TRUE", "FALSE"),
as.character(suggestions[[param]]))))
}
return(suggestions)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.