knitr::opts_chunk$set(echo = TRUE)
\newcommand{\bX}{\mathbf{X}} \newcommand{\bZ}{\mathbf{Z}} \newcommand{\bx}{\mathbf{x}} \newcommand{\bz}{\mathbf{z}} \newcommand{\bigf}{\mathbf{f}} \newcommand{\bo}{\mathbf{0}}
We generate the high rank continuous data matrix $\bX\in \mathbb{R}^{500\times200}$ from a low rank Gaussian copula as in the experiments of https://arxiv.org/pdf/2006.10829.pdf and randomly mask $40\%$ observation as test set.
p = 200 n = 500 rank = 10 sigma = 0.1 set.seed(410) W = matrix(rnorm(p*rank), nrow = p) W = t(apply(W, 1, function(x){x/sqrt(sum(x^2)) * sqrt((1-sigma))})) Z = matrix(rnorm(n*rank), ncol = rank) %*% t(W) + matrix(rnorm(n*p, sd = sqrt(sigma)), ncol = p) # cubic transformation X = Z^3 # mask 40% of the original observation X_obs = X loc = sample(1:prod(n*p), size = floor(prod(n*p)*0.4)) X_obs[loc] = NA
Simply specify the rank to the function call.
library(gcimputeR) est = impute_mixedgc_ppca(X_obs, rank = 10) # around 8 secs print("Normalized root mean squared error (NRMSE) is: ") print(sqrt(mean((est$Ximp[loc] - X[loc])^2))/sqrt(mean(X[loc]^2)))
ct = ct_impute(X_obs, est, 0.95) print("The empirical coverage is: ") print(mean(X[loc] >= ct$lower[loc] & X[loc] <= ct$upper[loc])) print("The mean confidence interval length is: ") mean(ct$upper[loc] - ct$lower[loc])
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.