In stephenslab/mvebnm: Ultimate Deconvolution for Multivariate Normal Means

Description:

This file contains examples of how cross-validation works in udr. It chooses the best number of components for unconstrained and rank1 covariance matrices.

knitr::opts_chunk$set(comment = "#",
                      results = "hold",
                      collapse = TRUE,
                      fig.align = "center",
                      warning = FALSE,
                      message = FALSE)

library(udr)

set.seed(1)
n <- 400
V <- rbind(c(0.8,0.2),
           c(0.2,1.5))
U <- list(none   = rbind(c(0,0),
                         c(0,0)),
          shared = rbind(c(1.0,0.9),
                         c(0.9,1.0)),
          only1  = rbind(c(1,0),
                         c(0,0)),
          only2  = rbind(c(0,0),
                         c(0,1)))
w <- c(0.8,0.1,0.075,0.025)
X <- simulate_ud_data(n,w,U,V)

Example 1: k_unconstrained is a list and k_rank1 is a number.

control = list(unconstrained.update = "ted", rank1.update = "ted", n0 = 0, lambda = 0, tol = 1e-2, maxiter = 20)

# Perform cross-validation
res1 = ud_fit_cv(X, V, nfold = 2, k_unconstrained = c(1,2,5,6,8), 
                 control = control, verbose = FALSE)
res1$scenario
res1$avg_logliks

# Get best parameter settings
best_fit_cv = udr:::get_best_fit_cv(res1)
best_fit_cv$test.loglik
best_fit_cv$n_unconstrained
best_fit_cv$n_rank1

# Refit based on best parameter settings
best_fit = udr:::get_best_fit(X, V, best_fit_cv, control = control, verbose = TRUE)

Example 2: k_unconstrained a number and k_rank1 is a vector.

control = list(unconstrained.update = "ted", rank1.update = "ted", n0 = 0, lambda = 0, tol = 1e-2, maxiter = 20)

res2 = ud_fit_cv(X, V, nfold = 2, k_unconstrained = 2, k_rank1 = c(1,2,5), control = control, verbose = FALSE)
res2$scenario
res2$avg_logliks


best_fit_cv = udr:::get_best_fit_cv(res2)
best_fit_cv$test.loglik
best_fit_cv$n_unconstrained
best_fit_cv$n_rank1

best_fit = udr:::get_best_fit(X, V, best_fit_cv, control = control, verbose = TRUE)

Example 3: k_unconstrained and k_rank1 both vectors

control = list(unconstrained.update = "ted", rank1.update = "ted", n0 = 0, lambda = 0, tol = 1e-2, maxiter = 20)

res3 = ud_fit_cv(X, V, nfold = 2, k_unconstrained = c(2,4,5), k_rank1 = c(1,2,5), 
                 control = control, verbose = FALSE)
res3$scenario
res3$avg_logliks

best_fit_cv = get_best_fit_cv(res3)
best_fit_cv$test.loglik
best_fit_cv$n_unconstrained
best_fit_cv$n_rank1

best_fit = udr:::get_best_fit(X, V, best_fit_cv, control = control, verbose = TRUE)