knitr::opts_chunk$set(echo = TRUE)

R Markdown

Imputing missing data

Here we will do a quick comparison with softImpute. First the rank 1 case.

library("flashr")
set.seed(1)
n = 100
p = 500
k = 1
LL = matrix(rnorm(n*k),nrow=n)
FF = matrix(rnorm(p*k),nrow=p)
Y = LL %*% t(FF) + rnorm(n*p)

Ymiss = Y
Ymiss[sample(1:length(Y),length(Y)/2)] = NA
dmiss = flash_set_data(Ymiss)
f = flash_add_factors_from_data(dmiss,K=10)
f = flash_backfit(dmiss,f)

fg = flash_add_greedy(dmiss,10)
fgb = flash_backfit(dmiss,fg)

s = softImpute::softImpute(Ymiss,rank.max=10,type="svd")
index = which(is.na(Ymiss))
s_imp = (s$u %*% diag(s$d) %*% t(s$v))[index]
f_imp = flash_get_fitted_values(f)[index]
fg_imp = flash_get_fitted_values(fg)[index]
fgb_imp = flash_get_fitted_values(fgb)[index]

opt = (LL %*% t(FF))[index]
mean((s_imp-opt)^2)
mean((f_imp-opt)^2)
mean((fg_imp-opt)^2)
mean((fgb_imp-opt)^2)
mean(opt^2)

flash_get_objective(dmiss,f)
flash_get_objective(dmiss,fg)
flash_get_objective(dmiss,fgb)

Imputing missing data

Here we will do a quick comparison with softImpute.

Now a rank 7 case.

set.seed(1)
n = 100
p = 500
k = 7
LL = matrix(rnorm(n*k),nrow=n)
FF = matrix(rnorm(p*k),nrow=p)
Y = LL %*% t(FF) + rnorm(n*p)

Ymiss = Y
Ymiss[sample(1:length(Y),length(Y)/2)] = NA
dmiss = flash_set_data(Ymiss)
f = flash_add_factors_from_data(dmiss,K=10)
f = flash_backfit(dmiss,f)

fg = flash_add_greedy(dmiss,10)
fgb = flash_backfit(dmiss,fg)

s = softImpute::softImpute(Ymiss,rank.max=10,type="svd")
index = which(is.na(Ymiss))
s_imp = (s$u %*% diag(s$d) %*% t(s$v))[index]
f_imp = flash_get_fitted_values(f)[index]
fg_imp = flash_get_fitted_values(fg)[index]
fgb_imp = flash_get_fitted_values(fgb)[index]

opt = (LL %*% t(FF))[index]
mean((s_imp-opt)^2)
mean((f_imp-opt)^2)
mean((fg_imp-opt)^2)
mean((fgb_imp-opt)^2)
mean(opt^2)


stephenslab/flashr2 documentation built on Feb. 6, 2024, 5:21 a.m.